o
    Zhm                     @   sh  d dl mZ d dlmZ d dlmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZmZmZmZ d d	lmZ d d
lmZ dgZdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Z dd Z!dd Z"d d! Z#d"d# Z$d$d% Z%d&d' Z&d(d) Z'd*d+ Z(d,d- Z)d.d/ Z*d0d1 Z+d2d3 Z,d4ej-jfd5d6Z.eG d7d8 d8Z/d9ed:efd;dZ0dS )<    )	dataclass)partial)AnyCallableOptionalN)	out_dtype)quantized_decomposed_lib)_WrapperModule)"_get_aten_graph_module_for_pattern,_replace_literals_with_existing_placeholders'_replace_literals_with_new_placeholders"remove_tensor_overload_for_qdq_ops)GraphModule)replace_pattern reference_representation_rewritec                 C   sf   t jj| ||||t j}t jj|||||	t j}t jjj|||
}t jj|||||t j}|S N)	torchopsquantized_decomposeddequantize_per_tensorint8atenlineardefaultquantize_per_tensor)x_i8x_scalex_zero_pointx_quant_minx_quant_max	weight_i8weight_scaleweight_zero_pointweight_quant_minweight_quant_max	bias_fp32	out_scaleout_zero_pointout_quant_minout_quant_maxx_fp32weight_fp32out_fp32out_i8 r.   `/var/www/auris/lib/python3.10/site-packages/torch/ao/quantization/pt2e/representation/rewrite.py_qdq_quantized_linear   s    r0   c                 C   s   t jj| ||} t jj|||	}| t j}|t j}tt jjjjt j	|| || d }|| }tt jjj
jt j	|
|}|| }tt jjjjt j	||| | | }t jj|||t j}|S r   )r   r   r   clamptoint16r   r   r   int32divTensormulr   )r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   x_i16
weight_i16acc_i32
bias_scalebias_i32r-   r.   r.   r/   _reference_quantized_linear<   s2   
	

	r=   c
                 C   s   t jj| |||t j\}
}t jj| |
|||t j}t jj||
|||t j} t jj|||||t j}t jjj	| ||	}|S r   )
r   r   r   choose_qparamsr   r   r   r   r   r   )r*   r   r   x_epsr    r!   r"   r#   r$   r%   r   r   r   r+   r,   r.   r.   r/   _qdq_dynamic_quantized_linearq   s&   r@   c
                 C   s   t jj| |||t j\}
}| |
 } t | } | jt jd}|| }t |||}|jt jd}t jj	|||}|t j
}|t j
}tt jj	jjt j|| || d }|
| }tt jj	jjt j|	|}|| }||
|  }|S NZdtype)r   r   r   r>   r   roundr2   r4   r1   r   r3   r   r   r   r5   r6   )r*   r   r   r?   r    r!   r"   r#   r$   r%   r   r   x_i32r   r8   r9   r:   r;   r<   r,   r.   r.   r/   #_reference_dynamic_quantized_linear   s0   

rE   c                 C   s   ddg}ddg}ddg}d}ddg}d}t jj| ||||t j}t jj|||||	t j}t jjj|||
||||||	}t jj|||||t j}|S N   r   F)	r   r   r   r   r   r   convolutionr   r   )r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   stridepaddingdilation
transposedoutput_paddinggroupsr*   r+   r,   r-   r.   r.   r/   _qdq_quantized_conv2d   s@   
rO   c                 C   s  ddg}ddg}ddg}d}ddg}d}t jj| ||} t jj|||	}| t j}|t j}tt jjjjt j	|| || d ||||||}|| }tt jjj
jt j	|
|}|d}|d}|| }tt jjjjt j	||| | | }t jj|||t j}|S )NrG   r   F)r   r   r   r1   r2   r3   r   rH   r   r4   r5   r6   Z	unsqueezer7   r   )r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   rI   rJ   rK   rL   rM   rN   r8   r9   r:   r;   r<   r-   r.   r.   r/   _reference_quantized_conv2d   sN   




	rQ   c
                 C   sh   t jj| ||||	t j}
t jj|||||	t j}|
| }t jj|}t jj|||||	t j}|S r   )r   r   r   r   r   r   Zrelur   r   r   r   y_i8y_scaley_zero_pointr&   r'   	quant_min	quant_maxr*   Zy_fp32r,   r-   r.   r.   r/   _qdq_quantized_add_reluF  s   rX   c
                 C   s   |  tj}
| tj}ttjjjjtj|
| || }
ttjjjjtj|| || }|
| | }tjj|||	 tj	}|S )z
    See comments for `_reference_quantized_add` for more information on
    how to derive the formula for out_i8 based on x_i8 and y_i8
    )
r2   r   r4   r   r   r   r7   r6   r1   r   r   r   r   rS   rT   rU   r&   r'   rV   rW   rD   Zy_i32out_i32r-   r.   r.   r/   _reference_quantized_add_relu`  s"   

r[   c
                 C   sZ   t jj| ||||	t j}
t jj|||||	t j}|
| }t jj|||||	t j}|S r   )r   r   r   r   r   r   rR   r.   r.   r/   _qdq_quantized_add  s   r\   c
                 C   s   |  tj}
| tj}t|| |
|   tj}
t|| ||   tj}|
| | }d}d}	tjj|||	 tj}|S )a  
        # How to Derive the formula for out_i8 based on x_i8 and y_i8
        # (since quantized add takes x_i8, y_i8 and their quantization parameters, and produce an out_i8)

        # out_i8 is quantized output, we can write down the formula for it first:
    out_i8 = out_f32 / out_scale + out_zero_point           (1)

        # then out_fp32 is computed from x_f32 + y_f32, and the x_fp32 and y_fp32 are the dequantized x_i8 and y_i8
        out_f32 = x_f32 + y_f32           (2)
        x_fp32 = (x_i8 - x_zero_point) * x_scale         (3)
        y_fp32 = (y_i8 - y_zero_point) * y_scale         (4)

        # applying the above fomula to the out_i8 equation we can get the following:
        out_i8 = out_fp32 / out_scale + out_zero_point             # (1)
           = (x_f32 + y_f32) / out_scale + out_zero_point      # applying (2) to substitute out_fp32 with x_fp32 + y_fp32
           = ((x_i8 - x_zero_point) * x_scale + (y_i8 - y_zero_point) * y_scale) / out_scale + out_zero_point  # apply (3) and (4)
       )r2   r   r4   rC   r   r   r1   r   rY   r.   r.   r/   _reference_quantized_add  s   r_   c	                 C   sj   d}	d}
d}d}d}t jj| ||||t j}t jjj||	|
|||\}}t jj|||||t j}|S rF   )	r   r   r   r   r   r   max_pool2d_with_indicesr   r   )r   r   r   r   r   r&   r'   r(   r)   kernel_sizerI   rJ   rK   	ceil_moder*   r,   _r-   r.   r.   r/   _qdq_quantized_max_pool2d  s   
rd   c	                 C   s~   d}	d}
d}d}d}t | ||} | t j}t jjj|| |	|
|||\}}|||  | }t |||}|t j}|S rF   )	r   r1   r2   r4   r   r   r`   r   r   )r   r   r   r   r   r&   r'   r(   r)   ra   rI   rJ   rK   rb   rD   rZ   rc   r,   r-   r.   r.   r/   _reference_quantized_max_pool2d  s   
re   c                 C      t jj| ||||t j}|S r   )r   r   r   r   r   r*   scale
zero_pointrV   rW   xr.   r.   r/   _quantize_per_tensor_int8     rk   c                 C   sH   | | }t |}|jt jd}|| }t |||}|jt jd}|S rA   )r   rC   r2   r4   r1   r   rg   r.   r.   r/   #_reference_quantize_per_tensor_int8  s   
rm   c                 C   rf   r   )r   r   r   r   r   )r   rh   ri   rV   rW   r*   r.   r.   r/   _dequantize_per_tensor_int8  rl   rn   c                 C   s0   t jj| ||} | t j| | jt jdS rA   )r   r   r   r1   r2   float32)r   rh   ri   rV   rW   r.   r.   r/   %_reference_dequantize_per_tensor_int8  s   rp   c              	   C       t jj| |||||t j}|S r   )r   r   r   Zquantize_per_channelr   )r*   scaleszero_pointsch_axisrV   rW   r-   r.   r.   r/   _quantize_per_channel_int8*  s   ru   c                 C   sP   t | |d} t jjt | | t j| ||}t ||d}|t jS NrP   )	r   	transposer   r   r1   rC   r2   r4   r   )r*   rr   rs   rt   rV   rW   rZ   r.   r.   r/   $_reference_quantize_per_channel_int83  s   rx   c              	   C   rq   r   )r   r   r   Zdequantize_per_channelr   )r   rr   rs   rt   rV   rW   r,   r.   r.   r/   _dequantize_per_channel_int8>  s   ry   c                 C   sR   t jj| ||} t | |d} | t j}|| t j| }t ||d}|S rv   )r   r   r   r1   rw   r2   r4   float)r   rr   rs   rt   rV   rW   rD   r,   r.   r.   r/   &_reference_dequantize_per_channel_int8H  s   r{   gmc                 C   s   t | dgdddddS )NrP            )rG   r]   r^   )exclude_literalsliteral_to_ph_idx)r   )r|   r.   r.   r/   '_replace_ph_qdq_per_channel_replacementV  s   r   c                   @   sf   e Zd ZU dZeedf ed< eed< eed< dZe	ee
ge
f  ed< dZe	ee
ge
f  ed< dS )	_RewriteInfozData needed for rewrite, this includes example inputs, pattern and replacement functions
    and post transformation functions for the exported pattern and replacement GraphModule
    .example_inputspatternreplacementNpattern_post_transreplacement_post_trans)__name__
__module____qualname____doc__tupler   __annotations__r   r   r   r   r   r.   r.   r.   r/   r   \  s   
 r   modelreturnc                 C   sF  t jdddt jdt jdt jdt jdt jdt jdgt jdt jdgt jdt jdddt jdt jdt jdt jdt jdt jdgt jdt jdgt jdt jdt jdt jdt jdt jdt jdt jdgt jdt jdgt jdf}t jdt jdddt t j	j
t jdddt jdt jdt jdt jdt jdt jdgt jdt jdgt jdt jdt jdf
}t jdddt jdt jdt jdt jdt jdt jdgt jdt jdgt jdt jdddt jdt jdt jdt jdt jdt jdgt jdt jdgt jdt jdt jdt jdt jdt jdt jdt jdgt jdt jdgt jdf}t jdddt jdt jdt jdt jdt jdt jdddt jdt jdt jdt jdt jdt jdt jdt jdt jdt jdgt jdt jdgt jdf
}t jdddt jdt jdt jdt jdt jdt jdgt jdt jdgt jdt jdt jdt jdt jdt jdgt jdt jdgt jdf	}t jdd	d	d	t jdt jdt jdt jdt jdt jdgt jdt jdgt jdf}t jdddt jdt jdt jdt jdt jdt jdgt jdt jdgt jdf}t jdd	d	d	t jdt jd	t jdt jd	t jddddf}t jdddt jdt jd	t jdt jd	t jddddf}	t|ttttttdddd
t t j	j
d	idttdddd
t t j	j
d	idt|ttttttt|ttttttdgdttdgdt|ttttt|ttttt|ttttttt|ttttt|ttttt|tt tt!t"t"t|	tt#tt$t"t"g
}
t%|  |
D ]@}|j&}|j'}|j(}|j)}|j*}t+||}t%| t+||}t%| |r
||}|r||}|,  |,  t-| || q| S )Nr]   r^   )   r   rB   rG   )r   r   i)rG   r}   r}   r}   r}   r   )r   rP   )r   ).r   randintr   Zrandnrz   ZzerosintZtensorZfinforo   Zepsr   r	   r@   rE   r   r   r0   r=   r   rO   rQ   rX   r[   r\   r_   rd   re   rk   rm   rn   rp   ru   rx   r   ry   r{   r   r   r   r   r   r   r
   Z	recompiler   )r   Z _QUANTIZED_LINEAR_EXAMPLE_INPUTSZ(_DYNAMIC_QUANTIZED_LINEAR_EXAMPLE_INPUTSZ _QUANTIZED_CONV2d_EXAMPLE_INPUTSZ)_QUANTIZED_ADD_OR_ADD_RELU_EXAMPLE_INPUTSZ$_QUANTIZED_MAX_POOL2D_EXAMPLE_INPUTSZ(_QUANTIZE_PER_TENSOR_INT8_EXAMPLE_INPUTSZ*_DEQUANTIZE_PER_TENSOR_INT8_EXAMPLE_INPUTSZ)_QUANTIZE_PER_CHANNEL_INT8_EXAMPLE_INPUTSZ+_DEQUANTIZE_PER_CHANNEL_INT8_EXAMPLE_INPUTSZ_REWRITE_INFO_LISTZrewrite_infor   r   r   r   r   r.   r.   r/   r   k  sd  		

G

)1dataclassesr   	functoolsr   typingr   r   r   r   Z!torch._higher_order_ops.out_dtyper   Z$torch.ao.quantization.fx._decomposedr   Z'torch.ao.quantization.pt2e.export_utilsr	   Z torch.ao.quantization.pt2e.utilsr
   r   r   r   Ztorch.fxr   Ztorch.fx.subgraph_rewriterr   __all__r0   r=   r@   rE   rO   rQ   rX   r[   r\   r_   rd   re   rk   rm   rn   rp   ru   rx   ry   r{   Zfxr   r   r   r.   r.   r.   r/   <module>   sH   #5!/3R%)	
