
    [Th                        S SK JrJr  S SKJrJr  S SKJrJrJ	r	  S SK
r
S SK
Jr  S SKJr  S SKJr  S SKJr  / S	Qr " S
 S\5      r\" SSS9 " S S\5      5       r\" SSS9 " S S\5      5       r \	\\\4   \4   rS\l        \" SSS9 " S S\5      5       r\" SSS9 " S S\5      5       r\ " S S5      5       r " S S\5      rg)    )ABCabstractmethod)	dataclassfield)CallableOptionalUnionN)Tensor)ObserverOrFakeQuantize)"_ObserverOrFakeQuantizeConstructor)Node)	QuantizerQuantizationSpecBaseQuantizationSpecFixedQParamsQuantizationSpec
EdgeOrNodeSharedQuantizationSpecDerivedQuantizationSpecQuantizationAnnotationc                       \ rS rSrSrSrg)r      zBase class for different types of quantization specs that allows users to
specify how to quantize a Tensor (input/output of a Node) in the model
 N)__name__
__module____qualname____firstlineno____doc____static_attributes__r       a/var/www/auris/envauris/lib/python3.13/site-packages/torch/ao/quantization/quantizer/quantizer.pyr   r      s    r   r   T)eqfrozenc                       \ rS rSr% Sr\R                  \S'   \\S'   Sr	\
\   \S'   Sr\
\   \S'   Sr\
\R                     \S'   Sr\
\   \S	'   S
r\\S'   S rSrg)r      zQuantization spec for common operators that allows user to specify how to
quantize a Tensor, this includes dtype, quant_min, quant_max etc.
dtypeobserver_or_fake_quant_ctrN	quant_min	quant_maxqschemech_axisF
is_dynamicc                 
   U R                   bM  U R                  b@  U R                   U R                  :  a&  [        SU R                    SU R                   S35      eU R                  b  U R                  S:  a  [        S5      eg g )Nz
quant_min z must be <= quant_max .r   zCh_axis is < 0.)r'   r(   
ValueErrorr*   )selfs    r    __post_init__QuantizationSpec.__post_init__1   s~     NN&*/T^^,,B4>>BRRST  <<#q(8.// )9#r   r   )r   r   r   r   r   torchr%   __annotations__r   r'   r   intr(   r)   r*   r+   boolr0   r   r   r   r    r   r      sh     ;;
 !CB#Ix}##Ix}#'+GXemm$+!GXc]!J0r   r   c                       \ rS rSr% \R
                  \S'   \\S'   \\S'   Sr	\
\   \S'   Sr\
\   \S'   Sr\
\R                     \S'   S	r\\S
'   Srg)r   C   r%   scale
zero_pointNr'   r(   r)   Fr+   r   )r   r   r   r   r2   r%   r3   floatr4   r'   r   r(   r)   r+   r5   r   r   r   r    r   r   C   sO    ;;LO#Ix}##Ix}#'+GXemm$+Jr   r   z)torch.ao.quantization.quantizer.quantizerc                   $    \ rS rSr% Sr\\S'   Srg)r   X   z_
Quantization spec for the Tensors whose quantization parameters are shared with other Tensors
edge_or_noder   N)r   r   r   r   r   r   r3   r   r   r   r    r   r   X   s    
 r   r   c                       \ rS rSr% Sr\\   \S'   \\\	   /\
\\4   4   \S'   \R                  \S'   Sr\\   \S'   Sr\\   \S'   Sr\\R$                     \S	'   Sr\\   \S
'   Sr\\S'   Srg)r   b   z^Quantization spec for the Tensors whose quantization parameters are derived from other Tensorsderived_fromderive_qparams_fnr%   Nr'   r(   r)   r*   Fr+   r   )r   r   r   r   r   listr   r3   r   r   tupler
   r2   r%   r'   r   r4   r(   r)   r*   r+   r5   r   r   r   r    r   r   b   s    hz""&<!= >ffn@U UVV;;#Ix}##Ix}#'+GXemm$+!GXc]!Jr   r   c                   p    \ rS rSr% Sr\" \S9r\\\	\
   4   \S'   Sr\	\
   \S'   Sr\\S'   S	r\\S
'   Srg)r   p   zHow are input arguemnt or output should be quantized,
expressed as QuantizationSpec, this corresponds to how a Tensor in the
operator Graph is observed (PTQ) or fake quantized (QAT)
)default_factoryinput_qspec_mapNoutput_qspecTallow_implicit_sharingF
_annotatedr   )r   r   r   r   r   r   dictrG   r   r   r   r3   rH   rI   r5   rJ   r   r   r   r    r   r   p   s[     CHCOT$)= >>?  48L(/07
 $(D' Jr   r   c                   n   \ rS rSrS\R
                  R                  S\R
                  R                  4S jr\S\R
                  R                  S\R
                  R                  4S j5       r	\S\R
                  R                  SS4S j5       r
S\R
                  R                  S\\\4   SS4S	 jrS
rg)r      modelreturnc                     U$ )a  Allows for user defined transforms to run before annotating the graph.
This allows quantizer to allow quantizing part of the model that are otherwise not quantizable.
For example quantizer can
a) decompose a compound operator like scaled dot product attention,
into bmm and softmax if quantizer knows how to quantize bmm/softmax but not sdpa
or b) transform scalars to tensor to allow quantizing scalares.

Note: this is an optional method
r   r/   rN   s     r    transform_for_annotation"Quantizer.transform_for_annotation   s	     r   c                     g Nr   rQ   s     r    annotateQuantizer.annotate       r   Nc                     g rU   r   rQ   s     r    validateQuantizer.validate   rX   r   edge_or_node_to_obs_or_fqc                     g)a  A callback that will be called after the observers or fake quants are created
for each sharing group, but before they are inserted into the graph. The
callback can be used to make final quantization adjustments, such as enforcing
specific scale and zero point on model input or output.

Args:
  * `model`: the graph module being prepared.
  * `edge_or_node_to_obs_or_fq`: a dictionary mapping each annotated edge and
    node to the corresponding observer or fake quant object. Note that multiple
    edges and/or nodes can map to the same observer / fake quant instance if
    they were annotated with SharedQuantizationSpec. This dictionary can be
    modified by the callback.
Nr   )r/   rN   r\   s      r    prepare_obs_or_fq_callback$Quantizer.prepare_obs_or_fq_callback   s    $ 	r   r   )r   r   r   r   r2   fxGraphModulerR   r   rV   rZ   rK   r   r   r^   r   r   r   r    r   r      s    XX))			  ehh22 uxx7K7K   ehh22 t  xx## $(
4J(J#K 
	r   r   )abcr   r   dataclassesr   r   typingr   r   r	   r2   r
   torch.ao.quantizationr   torch.ao.quantization.qconfigr   torch.fxr   __all__r   r   r   rC   r   r   r   r   r   r   r   r   r    <module>ri      s   # ( , ,   8 L 	3  d4  0+  0 ! 0F d4 #7  ! 5t$d*+
C
  d4 1  ! d4 
2 
 !
   0, ,r   