o
    Zh                     @   s$
  d dl Z d dlmZ d dlZd dlmZ d dlmZmZ d dl	m
Z
mZ eddZejejejejejgZejejgZdd	 eD Zed
d	 eD  dd Zed e
edddejdededededejdejfddZe
edddejdededededejdejfddZed e
edddejdejdejdededejdejfddZ e
edddejdejdejdededejdejfdd Z!ed! e
ed"ddejdejdejdejdejdejdejfd#d$Z"e
ed"ddejdejdejdejdejdejdejfd%d&Z#ed' e
ed(ddd)dejdededededejd*eej dejfd+d(Z$e
ed(ddd)dejdejdejdededejd*eej dejfd,d-Z%ed. e
ed/ddd)dejdejdejdededejd*eej dejfd0d1Z&e
ed/ddd)dejdejdejdededejd*eej dejfd2d3Z'ed4 e
ed5ddd)dejdejdejdejdejdejd*eej dejfd6d7Z(e
ed5ddd)d*eej dejfd8d9Z)ed: e
ed;ddejd<ed=ed>edejde*ejejf fd?d@Z+edA e
edBddejd<ed=ed>edejde*ejejf fdCdDZ,e
ed;ddejdeded>edejde*ejejf fdEdFZ-e
edBddejdeded>edejde*ejejf fdGdHZ.dIdJ Z/edK e
edLddejdMejdNejdOedededejdejfdPdLZ0e
edLddejdMejdNejdOedededejdejfdQdRZ1edS e
edTddd)dejdMejdNeej dOedededejd*eej dejfdUdTZ2e
edTddd)dejdMejdNeej dOedededejd*eej dejfdVdWZ3edX e
edYddejdejde*ejejf fdZdYZ4e
edYddejdejde*ejejf fd[d\Z5ed] e
ed^d_dejdejde*ejejf fd`d^Z6eda e
edbddejdejde*ejejf fdcdbZ7e
edbddejdejde*ejejf fdddeZ8dfdg Z9edh e
ediddejdMejdNejdededejfdjdiZ:e
ediddejdMejdNejdededejfdkdlZ;edm e
edndej<fdejdMejdNejdededejdoejfdpdnZ=e
edndej<fdejdMejdNejdededejdoejfdqdrZ>eds e
edtd	uddejdMejdNejdededejfdvdtZ?e
edtd	uddejdMejdNejdededejfdwdxZ@edy e
edzdduej<fd{ejdMejdNeej dededejd|edoejfd}dzZAed~ G dd dejBjCZDe
edddejdMejdNejdOedededejfddZEe
edddejdMejdNejdOedededejfddZFed e
edddejdejdejfddZGe
edddejdejdejfddZHdS )    N)Optional)_unsqueeze_multiple)determine_qparamsvalidate_qmin_qmax)implLibraryZquantized_decomposedZDEFc                 C   s&   i | ]}|t |jt |jfqS  )torchZiinfominmax.0kr   r   S/var/www/auris/lib/python3.10/site-packages/torch/ao/quantization/fx/_decomposed.py
<dictcomp>   s    r   c                 C   s.   i | ]}|t t|jt t|jfqS r   )intr	   finfor
   r   r   r   r   r   r      s   . c                 C   s^   |t vrtd| t | \}}| |ksJ d| d|  ||ks-J d| d| d S )NzUnsupported dtype: z9quant_min out of bound for dtype, quant_min_lower_bound: z quant_min: z9quant_max out of bound for dtype, quant_max_upper_bound: z quant_max: )_DTYPE_TO_QVALUE_BOUNDS
ValueError)	quant_min	quant_maxdtypeZquant_min_lower_boundZquant_max_upper_boundr   r   r   _quant_min_max_bounds_check   s"   

r   zxquantize_per_tensor(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> Tensorquantize_per_tensorZCompositeExplicitAutogradinputscale
zero_pointr   r   r   returnc                 C   sp   | j tjtjfv r| tj} | j tjksJ d| j  t||| d| }tt| | | |||S )a  Affine quantization for the Tensor using the same quantization parameters to map
    from floating point to quantized values

    Args:
       input (torch.Tensor): original float32 or bfloat16 Tensor
       scale (float): quantization parameter for affine quantization
       zero_point (int): quantization parameter for affine quantization
       quant_min (int): minimum quantized value for output Tensor
       quant_max (int): maximum quantized value for output Tensor
       dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor

    Returns:
       Tensor with requested dtype (e.g. torch.uint8), note the quantization parameters
       are not stored in the Tensor, we are storing them in function arguments instead
    <Expecting input to have dtype torch.float32, but got dtype:       ?)	r   r	   float16bfloat16tofloat32r   clampround)r   r   r   r   r   r   Z	inv_scaler   r   r   r   1   s   
ZMetac                 C   sH   | j tjtjfv r| tj} | j tjksJ d| j  tj| |dS )Nr   r   )r   r	   r    r!   r"   r#   
empty_liker   r   r   r   r   r   r   r   r   quantize_per_tensor_metaV   s   	
r)   zquantize_per_tensor.tensor(Tensor input, Tensor scale, Tensor zero_point, int quant_min, int quant_max, ScalarType dtype) -> Tensorzquantize_per_tensor.tensorc                 C   sV   |  dksJ d|   |  dksJ d|   t| | | |||S zAffine quantization for the Tensor using the same quantization parameters to map
    from floating point to quantized values
    Same as `quantize_per_tensor` but scale and zero_point are Scalar Tensor instead of
    scalar values
       >Expecting zero_point tensor to be one element, but received : 9Expecting scale tensor to be one element, but received : numelr   itemr(   r   r   r   quantize_per_tensor_tensorm   s   r1   c                 C   s   | j tjtjfv r| tj} | dksJ d|  | dks-J d|  | j tjks;J d| j  tj| |dS )Nr+   r,   r-   r   r&   )r   r	   r    r!   r"   r#   r/   r'   r(   r   r   r   quantize_per_tensor_tensor_meta   s   	
r2   zquantize_per_tensor.tensor2(Tensor input, Tensor scale, Tensor zero_point, Tensor quant_min, Tensor quant_max, ScalarType dtype) -> Tensorzquantize_per_tensor.tensor2c                 C   s^   |  dksJ d|   |  dksJ d|   t| | | | | |S r*   r.   r(   r   r   r   quantize_per_tensor_tensor2   s   r3   c                 C   s   t | |||||S N)r2   r(   r   r   r    quantize_per_tensor_tensor2_meta   s   	r5   zdequantize_per_tensor(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensordequantize_per_tensor	out_dtyper8   c                C   sV   | j |ksJ d| d| j  |du rtj}|tv r$| || | S td| )a  Affine dequantization for the Tensor using the same quantization parameters to map
    from quantized values to floating point values

    Args:
       input (torch.Tensor): Tensor with dtype matching `dtype` argument,
       e.g. (`torch.uint8`), it is a per tensor quantized Tensor if combined with
       quantization parameters in the argument of this function (scale/zero_point)

       scale (float): quantization parameter for affine quantization

       zero_point (int): quantization parameter for affine quantization

       quant_min (int): minimum quantized value for input Tensor (not used in computation,
       reserved for pattern matching)

       quant_max (int): maximum quantized value for input Tensor (not used in computation,
       reserved for pattern matching)

       dtype (torch.dtype): dtype for input Tensor (not used in computation,
       reserved for pattern matching)

       out_dtype (torch.dtype?): optional dtype for output Tensor

    Returns:
       dequantized float32 Tensor
    Expecting input to have dtype: z
, but got N,Unsupported dtype in dequantize_per_tensor: )r   r	   r#   r   r"   r   r   r   r   r   r   r   r8   r   r   r   r6      s   &c                C   s   |d u rt j}t j| |dS Nr&   )r	   r#   r'   r;   r   r   r   dequantize_per_tensor_meta  s   r=   zdequantize_per_tensor.tensor(Tensor input, Tensor scale, Tensor zero_point, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensorzdequantize_per_tensor.tensorc             	   C   sZ   |  dksJ d|   |  dksJ d|   t| | | ||||dS zAffine dequantization for the Tensor using the same quantization parameters to map
    from quantized values to floating point values
    Same as `dequantize_per_tensor` but scale and zero_point are Scalar Tensor instead of
    scalar values
    r+   r,   r-   r7   r/   r6   r0   r;   r   r   r   dequantize_per_tensor_tensor'  s   r@   c                C   s   |d u rt j}| dksJ d|  | dks%J d|  | j|ks1J d| |tv r<t j| |dS td| )Nr+   r,   r-   r9   r&   r:   )r	   r#   r/   r   r   r'   r   r;   r   r   r   !dequantize_per_tensor_tensor_metaL  s   rA   zdequantize_per_tensor.tensor2(Tensor input, Tensor scale, Tensor zero_point, Tensor quant_min, Tensor quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensorzdequantize_per_tensor.tensor2c             	   C   sb   |  dksJ d|   |  dksJ d|   t| | | | | ||dS r>   r?   r;   r   r   r   dequantize_per_tensor_tensor2m  s   rB   c             	   C   s   t | ||||||dS )Nr7   )rA   r;   r   r   r   "dequantize_per_tensor_tensor2_meta  s   rC   zrchoose_qparams.tensor(Tensor input, int quant_min, int quant_max, float eps, ScalarType dtype) -> (Tensor, Tensor)zchoose_qparams.tensorqminqmaxepsc              	   C   s|   | j tjtjtjfv sJ d| j  |tv s#J dt  d| t|| t| \}}t	|||||t
|gddS )[  Given an input Tensor, derive the per tensor affine quantization parameter
    (scale and zero_point) for target quantized Tensor from the Tensor

    Args:
       input (torch.Tensor): floating point input Tensor
       quant_min (int): minimum quantized value for target quantized Tensor
       quant_max (int): maximum quantized value for target quantized Tensor
       dtype (torch.dtype): dtype for target quantized Tensor

    Returns:
       scale (float): quantization parameter for the target quantized Tensor
       zero_point (int): quantization parameter for the target quantized Tensor
    CExpecting input to have dtype torch.float32/16/b16, but got dtype: $Expecting target dtype to be one of , but got: F)has_customized_qrange)r   r	   r#   r    r!   r   keysr   aminmaxr   Tensorr   rD   rE   rF   r   min_valmax_valr   r   r   choose_qparams_tensor  s*   



rR   z|choose_qparams_symmetric.tensor(Tensor input, int quant_min, int quant_max, float eps, ScalarType dtype) -> (Tensor, Tensor)zchoose_qparams_symmetric.tensorc              
   C   s   | j tjtjtjfv sJ d| j  |tv s#J dt  d| t|| t| \}}t	|||||t
|gdtjdS )rG   rH   rI   rJ   F)rK   Zqscheme)r   r	   r#   r    r!   r   rL   r   rM   r   rN   Zper_tensor_symmetricrO   r   r   r   choose_qparams_symmetric_tensor  s,   



rS   c                 C   sj   | j tjtjtjfv sJ d| j  ||k s!J d| d| tjdtj| jdtjdtj| jdfS )NrH   zKExpecting quant_min to be smaller than quant_max but received min:         z max: r+   r   device)	r   r	   r#   r    r!   emptydoublerU   int64r   r   r   rF   r   r   r   r   choose_qparams_tensor_meta  s"   


rZ   c                 C   s(   t jdt j| jdt jdt j| jdfS )Nr+   rT   )r	   rV   rW   rU   rX   rY   r   r   r   $choose_qparams_symmetric_tensor_meta  s   
r[   c                 C   s6   t t|  }d||< ||d< | t|}||fS )Nr   )listrangedimpermutetuple)xaxisZnew_axis_listyr   r   r   _permute_to_axis_zero  s
   rd   zquantize_per_channel(Tensor input, Tensor scales, Tensor zero_points, int axis, int quant_min, int quant_max, ScalarType dtype) -> Tensorquantize_per_channelscaleszero_pointsrb   c                 C   s   | j tjtjfv r| tj} | j tjksJ d| j  ||  k s,J d|   t||| t| |\} }dg|   }|j	d |d< |
|}|
|}tt| d|  | ||}	|	t|}
|
|S )at  Affine per channel quantization for the Tensor using the same quantization
    parameters for each channel/axis to map from floating point to quantized values

    Args:
       input (torch.Tensor): original float32 or bfloat16 Tensor
       scales (torch.Tensor): a list of scale quantization parameter for
       affine quantization, one per channel
       zero_point (torch.Tensor): a list of zero_point quantization parameter for
       affine quantization, one per channel
       quant_min (int): minimum quantized value for output Tensor
       quant_max (int): maximum quantized value for output Tensor
       dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor

    Returns:
       Tensor with requested dtype (e.g. torch.uint8), note the quantization parameters
       are not stored in the Tensor, we are storing them in function arguments instead
    r   Expecting axis to be < r+   r   r   )r   r	   r    r!   r"   r#   r^   r   rd   shapeviewr$   r%   r_   r`   )r   rf   rg   rb   r   r   r   permute_axis_list	new_shaperesoutr   r   r   re   ,  s"   



c                 C   sr   | j tjtjfv r| tj} | j tjksJ d| j  ||  k s,J d|   t||| tj| |dS )Nr   rh   r&   )	r   r	   r    r!   r"   r#   r^   r   r'   )r   rf   rg   rb   r   r   r   r   r   r   quantize_per_channel_meta\  s   

ro   zdequantize_per_channel(Tensor input, Tensor scales, Tensor? zero_points, int axis, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensordequantize_per_channelc                C   s   | j |ksJ d| d| j  |du rtj}||  k s&J d|   t||| t| |\} }dg|   }	|jd |	d< ||	}|durT| ||	 | }
n| | }
|
|}
|
	t
|}|S )a  Affine per channel dequantization for the Tensor using the same quantization
    parameters for each channel/axis to map from quantized values to floating point values

    Args:
       input (torch.Tensor): Tensor with dtype matching `dtype` argument,
       e.g. (`torch.uint8`), it is a per channel quantized Tensor if combined with
       quantization parameter in the argument of this function (scales/zero_points/axis)

       scales (torch.Tensor): a list of scale quantization parameter for
       affine quantization, one per channel

       zero_points (torch.Tensor): a list of zero_point quantization parameter for
       affine quantization, one per channel

       quant_min (int): minimum quantized value for output Tensor (not used in computation,
       reserved for pattern matching)

       quant_max (int): maximum quantized value for output Tensor (not used in computation,
       reserved for pattern matching)

       dtype (torch.dtype): requested dtype for output Tensor (not used in computation,
       reserved for pattern matching)

       out_dtype (torch.dtype?): optional dtype for output Tensor

    Returns:
       dequantized float32 Tensor
    Expecting input to have dtype , but got dtype: Nrh   r+   r   )r   r	   r#   r^   r   rd   ri   rj   r"   r_   r`   )r   rf   rg   rb   r   r   r   r8   rk   rl   rm   rn   r   r   r   rp   z  s"   )

c                C   sf   | j |ksJ d| d| j  |d u rtj}||  k s&J d|   t||| tj| |dS )Nrq   rr   rh   r&   )r   r	   r#   r^   r   r'   )r   rf   rg   rb   r   r   r   r8   r   r   r   dequantize_per_channel_meta  s   rs   zLchoose_qparams_per_token(Tensor input, ScalarType dtype) -> (Tensor, Tensor)choose_qparams_per_tokenc                 C   sx   |   jddd}|jtjkr| }|tjkr#d}d|d  d }ntd| |jdd		|}t
|}||fS )
  Choose quantization parameters for per token quantization. This means for a N dimension Tensor
    (M1, M2, ...Mn, N), we calculate scales/zero_points for each N elements and quantize
    every N elements with the same quantization parameter. The dimension for scales/zero_points
    will be (M1 * M2 ... * Mn)

    Args:
       input (torch.Tensor): original float32/float16 Tensor
       dtype (torch.dtype): dtype (e.g. torch.uint8) for input Tensor

    Returns:
        scales and zero_points, both float32 Tensors
    Tr^   Zkeepdim      r+   z/unsupported dtype in choose_qparams_per_token: gh㈵>r
   )absamaxr   r	   r    floatint8	Exceptionr$   div
zeros_like)r   r   rf   Zn_bitsr   rg   r   r   r   rt     s   

c                 C   @   t | jd d dg }tj|tj| jdtj|tj| jdfS Nrv   r+   rT   r\   ri   r	   rV   rW   rU   rX   r   r   sizer   r   r   choose_qparams_per_token_meta     	
r   z]_choose_qparams_per_token_asymmetric_impl(Tensor input, ScalarType dtype) -> (Tensor, Tensor))_choose_qparams_per_token_asymmetric_implZCompositeImplicitAutogradc                 C   s   d\}}t j| ddd}t j| ddd}t |t |}t |t |}t t jj}|| t	||  }	|	j
|d}	||	 }
||	 }||
 }|| }t || dk||
 || }t 
||| }|	t j|t jfS )ru   )i   rv   Trw   rz   r   )r	   Zaminr|   r
   r   r   r   r#   rF   r}   r$   wherer%   r"   Zfloat64rX   )r   r   rD   rE   rP   rQ   Zmin_val_negZmax_val_posrF   r   Zdescaled_minZdescaled_maxZzero_point_from_min_errorZzero_point_from_max_errorr   r   r   r   r     s&   
zWchoose_qparams_per_token_asymmetric(Tensor input, ScalarType dtype) -> (Tensor, Tensor)#choose_qparams_per_token_asymmetricc                 C   s
   t | |S r4   )r   r   r   r   r   r   r   E     
	c                 C   r   r   r   r   r   r   r   (choose_qparams_per_token_asymmetric_metaQ  r   r   c                 C   sf   t t|  d d }|| ksJ d| d|  || ks1J d| d|  d S )Nrv   znum_tokens: z	 scales: z zero_points: )mathprodr\   r   r/   )r   rf   rg   Z
num_tokensr   r   r   !_per_token_quant_qparam_dim_check`  s   r   z}quantize_per_token(Tensor input, Tensor scales, Tensor zero_points, int quant_min, int quant_max, ScalarType dtype) -> Tensorquantize_per_tokenc                 C   sB   t ||| t| || | d| | |||} | S )a  Per token quantization for the Tensor using the quantization parameters to map
    from floating point to quantized values. This means for a N dimension Tensor
    (M1, M2, ...Mn, N), we calculate scales/zero_points for each N elements and quantize
    every N elements with the same quantization parameter. The dimension for scales/zero_points
    will be (M1 * M2 ... * Mn)

    Args:
       input (torch.Tensor): original float32 or bfloat16 Tensor
       scales (float32 torch.Tensor): quantization parameter for per token affine quantization
       zero_points (int32 torch.Tensor): quantization parameter for per token affine quantization
       quant_min (int): minimum quantized value for output Tensor
       quant_max (int): maximum quantized value for output Tensor
       dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor

    Returns:
       Tensor with requested dtype (e.g. torch.uint8), note the quantization parameters
       are not stored in the Tensor, we are storing them in function arguments instead
    r   )r   r   muladdr%   r$   r"   r   rf   rg   r   r   r   r   r   r   r   p  s   c                 C   s   t ||| tj| |dS r<   r   r	   r'   r   r   r   r   quantize_per_token_meta  s   	r   zdequantize_per_token(Tensor input, Tensor scales, Tensor zero_points, int quant_min, int quant_max, ScalarType dtype, ScalarType output_dtype) -> Tensordequantize_per_tokenoutput_dtypec                 C   s   | | } | | } |  |S )a  Per token dequantization for the Tensor using the quantization parameters to map
    from floating point to quantized values. This means for a N dimension Tensor
    (M1, M2, ...Mn, N), we calculate scales/zero_points for each N elements and quantize
    every N elements with the same quantization parameter. The dimension for scales/zero_points
    will be (M1 * M2 ... * Mn)

    Args:
       input (torch.Tensor): quantized Tensor (uint8, int8 etc.)
       scales (float64 torch.Tensor): quantization parameter for per token affine quantization
       zero_points (int64 torch.Tensor): quantization parameter for per token affine quantization
       quant_min (int): minimum quantized value for input Tensor
       quant_max (int): maximum quantized value for input Tensor
       dtype (torch.dtype): dtype (e.g. torch.uint8) for input Tensor
       output_dtype (torch.dtype): dtype (e.g. torch.float32) for output Tensor

    Returns:
       dequantized Tensor with dtype `output_dtype`
    )r"   r   rf   rg   r   r   r   r   r   r   r   r     s   
c                 C   s   t ||| tj| |dS r<   r   r   r   r   r   dequantize_per_token_meta  s   
r   zquantize_per_channel_group(Tensor input, Tensor scales, Tensor zero_points, int quant_min, int quant_max, ScalarType dtype, int group_size) -> Tensorquantize_per_channel_group   c           	      C   s   |dksJ || j d kr|j d dkr| j d }| j d | dks$J |  dks,J | d|}t| dks=J |dd}|dd}|d| | 	||
|| }|S )Nr+   rv   r   ry   r   )ri   r^   reshaper	   isnansumr   r   r%   Zclamp_r"   
reshape_as)	r   rf   rg   r   r   r   
group_sizeZto_quantZ
input_int8r   r   r   r     s"   
	c                 C   sf   |dksJ || j d kr|j d dkr| j d }| j d | dks$J |  dks,J tj| |dS )aX  Groupwise quantization within each channel for an 2-d Tensor using the quantization parameters
    to map from floating point to quantized values. This means for each row of a 2-d Tensor
    (M, N), we calculate scales/zero_points for each `group_size` elements
    and quantize every `group_size` elements with the same quantization parameter.
    The dimension for scales/zero_points will be (M * ceil(N, group_size),)

    Args:
       input (torch.Tensor): original float32 or bfloat16 Tensor
       scales (float32 torch.Tensor): quantization parameter for per channel group affine quantization
       zero_points (int32 torch.Tensor): quantization parameter for per channel group affine quantization
       quant_min (int): minimum quantized value for output Tensor
       quant_max (int): maximum quantized value for output Tensor
       dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor

    Returns:
       Tensor with requested dtype (e.g. torch.uint8), note the quantization parameters
       are not stored in the Tensor, we are storing them in function arguments instead
    r+   rv   r   ry   r&   )ri   r^   r	   r'   )r   rf   rg   r   r   r   r   r   r   r   quantize_per_channel_group_meta	  s   
r   zdequantize_per_channel_group(Tensor input, Tensor scales, Tensor? zero_points, int quant_min, int quant_max, ScalarType dtype, int group_size, ScalarType output_dtype) -> Tensordequantize_per_channel_groupw_int8r   c                 C   s   |dksJ || j d kr|j d dkr| j d }| j d | dks$J |  dks,J | d|}|dd}|durC|dd}	n
tjg tj|jd}	||	|	| 
|}
|
S )a!  Groupwise dequantization within each channel for an 2-d Tensor using the quantization parameters
    to map from floating point to quantized values. This means for each row of a 2-d Tensor
    (M, N), we calculate scales/zero_points for each `group_size` elements
    and quantize every `group_size` elements with the same quantization parameter.
    The dimension for scales/zero_points will be (M * ceil(N, group_size),)

    Args:
       input (torch.Tensor): quantized Tensor (uint8/int8 etc.)
       scales (float32 torch.Tensor): quantization parameter for per channel group affine quantization
       zero_points (int32 torch.Tensor): quantization parameter for per channel group affine quantization
       quant_min (int): minimum quantized value for input Tensor
       quant_max (int): maximum quantized value for input Tensor
       dtype (torch.dtype): dtype (e.g. torch.uint8) for input Tensor
       output_dtype (torch.dtype): dtype (e.g. torch.float32) for output Tensor

    Returns:
       dequantized Tensor with dtype `output_dtype`
    r+   rv   r   ry   NrT   )ri   r^   r   r	   Zzerosint32rU   subr   r   r"   )r   rf   rg   r   r   r   r   r   Zw_int8_groupedZzpZw_dqr   r   r   r   5  s   "
zyfake_quant_per_channel(Tensor input, Tensor scales, Tensor zero_points, int axis, int quant_min, int quant_max) -> Tensorc                   @   s$   e Zd Zedd Zedd ZdS )FakeQuantPerChannelc                 C   s   |j tjkr|tj}|j tjkr|tj}|j tjks&J d|j  || k s5J d|  ttd|tt|d |j }t	||}t	||}	t
|d|  |	 }
t|
|||	 | }t|
|k|
|k}| | |S )Nr   rh   r   r+   r   )r   r	   r#   r"   r   r^   r\   r]   ndimr   r%   r$   logical_andZsave_for_backward)ctxr   rf   rg   rb   r   r   Zbroadcast_dimsZunsqueeze_scalesZunsqueeze_zero_pointstemprn   maskr   r   r   forwardo  s$   
"


zFakeQuantPerChannel.forwardc                 C   s   | j \}|| d d d d d fS r4   )Zsaved_tensors)r   gyr   r   r   r   backward  s   zFakeQuantPerChannel.backwardN)__name__
__module____qualname__staticmethodr   r   r   r   r   r   r   n  s
    
r   fake_quant_per_channelZAutogradc                 C   s   t | |||||S r4   )r   applyr   rf   rg   rb   r   r   r   r   r   r     s   	c                 C   s
   t | S r4   r	   r'   r   r   r   r   fake_quant_per_channel_meta  r   r   zFconvert_element_type.no_fuse(Tensor input, ScalarType dtype) -> Tensorzconvert_element_type.no_fusec                 C   s   t jjj| |S r4   )r	   opsZprimsconvert_element_typedefaultr   r   r   r   r     s   r   c                 C   s   t j| |dS r<   r   r   r   r   r   convert_element_type_meta  s   r   )r   )Ir   typingr   r	   Ztorch._refsr   Ztorch.ao.quantization.utilsr   r   Ztorch.libraryr   r   Zquantized_decomposed_libZuint8r~   Zuint16Zint16r   Z_INTEGER_DTYPESZfloat8_e5m2Zfloat8_e4m3fnZ_FLOAT_DTYPESr   updater   ZdefinerN   r}   r   r   r   r)   r1   r2   r3   r5   r6   r=   r@   rA   rB   rC   r`   rR   rS   rZ   r[   rd   re   ro   rp   rs   rt   r   r   r   r   r   r   r   r#   r   r   r   r   r   ZautogradFunctionr   r   r   r   r   r   r   r   r   <module>   s  


$


		
2			
 			
 		
'
'
	

/

	

>
	
#
+



&

!

$%.


"