
    [Thw                        S SK r S SKrS SKrS SKrS SKrS SKJs  Js  Jr	  S SK
Jr  S SKJr  S SKJr  S SKJrJrJrJrJrJr  S SKJrJrJrJrJrJrJrJr  S SKJ r J!r!  S SK"J#r#  SS	K$J%r%J&r&  / S
Qr'\r(\RR                  \RT                  RR                  \RV                  \RT                  RV                  0\RT                  RR                  \R                  RR                  \RT                  RV                  \R                  RV                  0S.r,S r-   S!S jr.S"S jr/S r0S r1S#S jr2    S$S jr3S r4S r5    S%S jr6S r7S r8S&S jr9S\Rt                  SS4S jr;S&S jr<S#S jr=      S'S jr>     S(S jr? S#S jr@S)S  jrAg)*    N)_FusedModule)_is_activation_post_process)_activation_is_memoryless_add_module_to_qconfig_obs_ctrdefault_dynamic_qconfigfloat16_dynamic_qconfig!float_qparams_weight_only_qconfig&float_qparams_weight_only_qconfig_4bit)_get_special_act_post_process_has_special_act_post_process)get_default_dynamic_quant_module_mappingsget_default_qat_module_mappings$get_default_qconfig_propagation_list(get_default_static_quant_module_mappings2get_default_static_quant_reference_module_mappingsno_observer_set)DeQuantStubQuantWrapper)type_before_parametrizations   )get_qparam_dict)has_no_children_ignoring_parametrizations)
get_default_custom_config_dictpropagate_qconfig_add_quant_dequantpreparequantizequantize_dynamicprepare_qatquantize_qatconvertswap_module)%float_to_observed_custom_module_class)observed_to_quantized_custom_module_classc                      [         $ )z'Defines the default custom config dict.)_DEFAULT_CUSTOM_CONFIG_DICT     V/var/www/auris/envauris/lib/python3.13/site-packages/torch/ao/quantization/quantize.pyr   r   B   s    &&r(   c                    UR                  [        U 5      U5      nUR                  X55      n[        U SU5      n[        R                  R
                  R                  R                  XP5        [        XP5      nX`l        U R                  5        H]  u  pxU(       a  US-   U-   OUn	Ub8  XtR                  S/ 5      ;   a  M0  [        U5      UR                  S/ 5      ;   a  MQ  [        XXi5        M_     g)a  This is a helper function for `propagate_qconfig_`

Args:
    module: input module
    qconfig_dict: dictionary that maps from name of submodule to quantization
                 configuration
    qconfig_parent: quantization config of parent module, we will fallback to
                   this config when there is no specified config for current
                   module
    prefix: corresponding prefix of the current module, used as key in
            qconfig_dict
    prepare_custom_config_dict: dictionary for custom handling of modules
                                see docs for :func:`~torch.ao.quantization.prepare_fx`

Return:
    None, module is modified inplace with qconfig attached
qconfig.Nnon_traceable_module_namenon_traceable_module_class)getr   getattrtorchaoquantizationr+   _assert_valid_qconfigr   named_childrentype_propagate_qconfig_helper)
moduleqconfig_dictqconfig_parentprefixprepare_custom_config_dictmodule_qconfigqconfig_with_device_checknamechildmodule_prefixs
             r)   r7   r7   G   s    2 "%%$V,nN "%%f=NVY?N	HH!!77O >~ V.N,,./5t+4%-223NPRSSE{)--.JBOP &%> /r(   c                 ,    Uc  0 nUc  0 n[        XUS9  g)ac  Propagate qconfig through the module hierarchy and assign `qconfig`
attribute on each leaf module

Args:
    module: input module
    qconfig_dict: dictionary that maps from name or type of submodule to
        quantization configuration, qconfig applies to all submodules of a
        given module unless qconfig for the submodules are specified (when
        the submodule already has qconfig attribute)
    prepare_custom_config_dict: dictionary for custom handling of modules
        see docs for :func:`~torch.ao.quantization.prepare_fx`

Return:
    None, module is modified inplace with qconfig attached
N)r<   )r7   )r8   r9   r<   s      r)   r   r   x   s)      !)%'"9Sr(   c                 $    U R                  U5      $ )z.Forward hook that calls observer on the outputactivation_post_process)selfinputoutputs      r)   _observer_forward_hookrI      s    ''//r(   c                 *    U R                  US   5      $ )z2Forward pre hook that calls observer on the outputr   rD   )rF   rG   s     r)   _observer_forward_pre_hookrK      s    ''a11r(   Fc                     [        U S5      (       d   S5       eU(       a  U R                  [        SS9  g U R                  [        SS9  g )NrE   zGExpect activation_post_process attribute already attached to the moduleT)prepend)hasattrregister_forward_pre_hookrK   register_forward_hookrI   )r8   pre_hooks     r)   &_register_activation_post_process_hookrR      sV    )  QPQ  (()CT(R$$%;T$Jr(   c                   ^^^ Uc
  [        5       nUc  0 nTcI  [        U 5      n[        U5      S::  d
   SU 35       e[        U5      S:  a  [        [	        U5      5      OSmSS jmS mSUUU4S jjnU R                  5        GH  u  px[        U5      [        R                  4;   a  M&  [        [        U5      [        R                  [        R                  45      (       aN  T" U5      (       a?  [        US5      (       d   S	[        U5       S
35       eT" UR                  T5      Ul        M  M  [!        U["        5      (       a  T" U5      (       a
  U" U5        M  M  Ub*  [        U5      U;   a  T" U5      (       a  U" U5        GM  GM  [%        U5      (       a  ['        U5      n	U" X5        GM-  T" U5      (       aj  [        U5      U;   a[  U[        U5         n
U
R)                  U5      n[+        XU5        [        U
[-        [/        5       5      5      (       d  U" U5        GM  GM  [1        UUUTU5        GM     [3        U 5      (       a@  [!        U [4        R                  R6                  5      (       d  [        U 5      U;   a  U" U 5        [        U S5      (       aC  [!        U [4        R                  R6                  5      (       d  [        U 5      U;   a	  U" U 5        gggg)aG  Add observer for the leaf child of the module.

This function insert observer module to all leaf child module that
has a valid qconfig attribute.

Args:
    module: input module with qconfig attributes for all the leaf modules that we want to quantize
    qconfig_propagation_list: a list of quantizable modules that will have observers added to them
        if they are leaf nodes
    device: parent device, if any
    non_leaf_module_list: list of non-leaf modules we want to add observer

Return:
    None, module is modified inplace with added observer modules and forward_hooks
Nr   zR_add_observer_ only works with cpu or single-device CUDA modules, but got devices r   c                 b    Uc  U R                  5       OU" 5       nUb  UR                  U5        U$ N)
activationto)r+   devicespecial_act_post_processrV   s       r)   get_activation_post_process3_add_observer_.<locals>.get_activation_post_process   s=     (/  )+ 	
 MM&!r(   c                 D    [        U S5      =(       a    U R                  S L$ )Nr+   rN   r+   )ms    r)   needs_observation)_add_observer_.<locals>.needs_observation   s    q)$>$)>>r(   c                    > T" U 5      (       aX  [        U [        5      (       dB  U R                  ST" U R                  TU5      5        [	        U [        U R                  5      S9  ggg)z]Adds an activation post process module and register
a pre or post hook that calls the module
rE   rQ   N)
isinstancer   
add_moduler+   rR   r   )r^   rY   rX   rZ   r_   s     r)   insert_activation_post_process6_add_observer_.<locals>.insert_activation_post_process   sa    
 Q
1k(B(BLL)+IIv'? 35aii@ )Cr(   rE   zfunctional class z- has no pre-defined `activation_post_process`weight_fake_quantrU   )r   _get_unique_devices_lennextiterr5   r   nnDropout
issubclassnnqFloatFunctionalQFunctionalrN   r+   rE   rc   r   r   r   
from_floatsetattrtupler   _add_observer_r   r1   
Sequential)r8   qconfig_propagation_listnon_leaf_module_listrX   custom_module_class_mappingdevicesre   r?   r@   rY   observed_classobserved_childrZ   r_   s      `        @@r)   ru   ru      s   ,  '#G#I "*&(# ~&v.LA	j_`g_hi	j(+Gq(8d7m$d? & ,,.'.2::,>(/#2E2Es1W
 
 !''4  z&'CE'J&KKxyz  1LMM61-	 ( |,, ''.u5 ( !,,U37KK ''.u5 (*511'DU'K$*5Ke$$,U37RR8,U3N ,66u=NF.1 neO4E.FGG.~> H ($+S /h 	2&996588#6#677(04LL&v. 	+,,6588#6#677(04LL&v. M 8 	-r(   c                 0   U R                  5        Vs1 s H*  oR                  R                  S:w  d  M  UR                  iM,     snU R                  5        Vs1 s H*  oR                  R                  S:w  d  M  UR                  iM,     sn-  $ s  snf s  snf )Nmeta)
parametersrX   r6   buffers)r8   ps     r)   rh   rh   .  sy    $//1M1XX]]f5LHAHH1M ..*Q*Qhhmmv.E*Q  M Qs   BBB9Bc                     [        U 5      (       a-  [        U S5      (       a  U R                  (       a  [        U 5      $ U R	                  5        H  u  p[        U5      U R                  U'   M     U $ )aO  Wrap the leaf child module in QuantWrapper if it has a valid qconfig
Note that this function will modify the children of module inplace and it
can return a new module which wraps the input module as well.

Args:
    module: input module with qconfig attributes for all the leaf modules
    that we want to quantize

Return:
    Either the inplace modified module with submodules wrapped in
    `QuantWrapper` based on qconfig or a new `QuantWrapper` module which
    wraps the input module, the latter case only happens when the input
    module is a leaf module and we want to quantize it.
r+   )r   rN   r+   r   r5   r   _modules)r8   r?   r@   s      r)   r   r   4  s[      	2&99FI&&NNF##,,. 1% 8 /Mr(   c                 |   [         R                  R                  S5        Uc
  [        5       nUR	                  S0 5      nU(       d  [
        R                  " U 5      n UnUc
  [        5       n[        U SS9  [        S U R                  5        5       5      (       d  [        R                  " S5        [        U UUUS9  U $ )a/  Prepares a copy of the model for quantization calibration or quantization-aware training.

Quantization configuration should be assigned preemptively
to individual submodules in `.qconfig` attribute.

The model will be attached with observer or fake quant modules, and qconfig
will be propagated.

Args:
    `model`: input model to be modified in-place
    `inplace`: carry out model transformations in-place, the original module is mutated
    `allow_list`: list of quantizable modules
    `observer_non_leaf_module_list`: list of non-leaf modules we want to add observer
    `prepare_custom_config_dict`: customization configuration dictionary for prepare function

.. code-block:: python

   # Example of prepare_custom_config_dict:
   prepare_custom_config_dict = {
       # user will manually define the corresponding observed
       # module class which has a from_float class method that converts
       # float custom module to observed custom module
       "float_to_observed_custom_module_class": {
           CustomModule: ObservedCustomModule
       }
    }

z!quantization_api.quantize.prepareNr#   r9   c              3   `   #    U  H$  n[        US 5      =(       a    UR                  v   M&     g7f)r+   Nr]   ).0r^   s     r)   	<genexpr>prepare.<locals>.<genexpr>  s#     LOqwq)$22Os   ,.zNone of the submodule got qconfig applied. Make sure you passed correct configuration through `qconfig_dict` or by assigning the `.qconfig` attribute directly on submodules)ry   )r1   _C_log_api_usage_oncer   r/   copydeepcopyr   r   anymoduleswarningswarnru   )modelinplace
allow_listobserver_non_leaf_module_listr<   ry   rw   s          r)   r   r   O  s    F 
HH  !DE!)%C%E""<"@"@/# e$  *#G#I u40 LEMMOLLLK	
  %$?	 Lr(   c                    ^  [        T S5      (       a&  [        T R                  5      (       a  [        T S5        SU 4S jjnU" SS9  U" SS9  g )NrE   Fc                   > U (       a  TR                   OTR                  nU (       a  [        O[        n[	        5       nUR                  5        H  u  pEXRL d  M  UR                  U5        M     U H  nUR                  U5        M     g rU   )_forward_pre_hooks_forward_hooksrK   rI   setitemsaddpop)rQ   hook_mapobserver_hookhandle_ids_to_remove	handle_idhook_fnr8   s         r)   remove_hooks5_remove_activation_post_process.<locals>.remove_hooks  sp    086,,f>S>S*2&8N 	  #u"*.."2I'$((3 #3 .ILL# .r(   Trb   F)rN   r   rE   delattr)r8   r   s   ` r)   _remove_activation_post_processr     sM     v0116Q&&7 7 	12
$ $% r(   c                     U R                  5        H  n[        U5        M     [        U S5      (       a  U ?[	        U 5        g)zzClean up the qconfig left in the module so that new qconfig can be
propagated.

Args:
    module: module to be cleaned up
r+   N)children_remove_qconfigrN   r+   r   )r8   r@   s     r)   r   r     s9     " # vy!!N#F+r(   c                     [         R                  R                  S5        Uc
  [        5       nU(       d  [        R
                  " U 5      n U R                  5         [        U SS9  U" U /UQ76   [        XSS9  U $ )aS  Quantize the input float model with post training static quantization.

First it will prepare the model for calibration, then it calls
`run_fn` which will run the calibration step, after that we will
convert the model to a quantized model.

Args:
    model: input float model
    run_fn: a calibration function for calibrating the prepared model
    run_args: positional arguments for `run_fn`
    inplace: carry out model transformations in-place, the original module is mutated
    mapping: correspondence between original module types and quantized counterparts

Return:
    Quantized model.
z"quantization_api.quantize.quantizeTr   )	r1   r   r   r   r   r   evalr   r!   )r   run_fnrun_argsmappingr   s        r)   r   r     sd    " 
HH  !EF:<e$	JJLE4 
58ED)Lr(   c                    [         R                  R                  S5        UGc  U[         R                  :X  a|  [        R
                  [        [        R                  [        [        R                  [        [        R                  [        [        R                  [        [        R                  [        0nGOU[         R                  :X  a|  [        R
                  [        [        R                  [        [        R                  [        [        R                  [        [        R                  [        [        R                  [        0nGO4U[         R                  :X  a+  [        R                  [         [        R"                  [         0nOU[         R$                  :X  a  [        R                  [&        0nO[)        SU S35      e[+        U[,        5      (       a  U[         R                  L a  [        nOcU[         R                  L a  [        nOIU[         R                  L a  [         nO/U[         R$                  L a  [&        nO[/        S[1        U5      5      e[3        [5        U[6        R8                  " U5      5      5      nUc
  [;        5       nU(       d  [<        R>                  " U 5      n U RA                  5         [C        X5        [E        XSS9  U $ )a*  Converts a float model to dynamic (i.e. weights-only) quantized model.

Replaces specified modules with dynamic weight-only quantized versions and output the quantized model.

For simplest usage provide `dtype` argument that can be float16 or qint8. Weight-only quantization
by default is performed for layers with large weights size - i.e. Linear and RNN variants.

Fine grained control is possible with `qconfig` and `mapping` that act similarly to `quantize()`.
If `qconfig` is provided, the `dtype` argument is ignored.

Args:
    model: input model
    qconfig_spec: Either:

        - A dictionary that maps from name or type of submodule to quantization
          configuration, qconfig applies to all submodules of a given
          module unless qconfig for the submodules are specified (when the
          submodule already has qconfig attribute). Entries in the dictionary
          need to be QConfig instances.

        - A set of types and/or submodule names to apply dynamic quantization to,
          in which case the `dtype` argument is used to specify the bit-width

    inplace: carry out model transformations in-place, the original module is mutated
    mapping: maps type of a submodule to a type of corresponding dynamically quantized version
        with which the submodule needs to be replaced

z*quantization_api.quantize.quantize_dynamicz5Don't know how to quantize with default settings for z. Provide full qconfig pleasez.Unknown dtype specified for quantize_dynamic: Tr   )#r1   r   r   qint8rl   Linearr   LSTMGRULSTMCellRNNCellGRUCellfloat16r   quint8EmbeddingBagr	   	Embeddingquint4x2r
   
ValueErrorrc   r   RuntimeErrorstrdictzip	itertoolsrepeatr   r   r   r   r   r!   )r   qconfig_specdtyper   r   default_qconfigs         r)   r   r     s   > 
HH  !MNEKK		20/4

3

3L emm#		20/4

3

3L ell"!B?L enn$!GL GwNkl  
L#	&	&EKK5Oemm#5Oell"?Oenn$DO@#e*  Ci.>.>.OPQ;=e$	JJLu+ED)Lr(   c                 6   [         R                  R                  S5        U R                  (       d   S5       eUc
  [	        5       nU(       d  [
        R                  " U 5      n [        U SS9  [        XSSS9  [        U [        UR                  5       5      SS9  U $ )	a  
Prepares a copy of the model for quantization calibration or
quantization-aware training and converts it to quantized version.

Quantization configuration should be assigned preemptively
to individual submodules in `.qconfig` attribute.

Args:
    model: input model to be modified in-place
    mapping: dictionary that maps float modules to quantized modules to be
             replaced.
    inplace: carry out model transformations in-place, the original module
             is mutated
z%quantization_api.quantize.prepare_qatz1prepare_qat only works on models in training modeNr   TF)r   r   remove_qconfig)r   r   )r1   r   r   trainingr   r   r   r   r!   r   r   values)r   r   r   s      r)   r   r   4  s{     
HH  !HI>>NNN>13e$u40EDGEW^^5E1FPTULr(   c                     [         R                  R                  S5        U(       d  [        R                  " U 5      n U R                  5         [        U SS9  U" U /UQ76   [        U SS9  U $ )aC  Do quantization aware training and output a quantized model

Args:
    model: input model
    run_fn: a function for evaluating the prepared model, can be a
            function that simply runs the prepared model or a training
            loop
    run_args: positional arguments for `run_fn`

Return:
    Quantized model.
z&quantization_api.quantize.quantize_qatTr   )r1   r   r   r   r   trainr   r!   )r   r   r   r   s       r)   r    r    Q  sW     
HH  !IJe$	KKMt$
58E4 Lr(   c           	          [         R                  R                  S5        U(       d  [        R                  " U 5      n [        U USUUUS9  U(       a  [        U 5        U $ )ad  Converts submodules in input module to a different module according to `mapping`
by calling `from_float` method on the target module class. And remove qconfig at the
end if remove_qconfig is set to True.

Args:
    `module`: prepared and calibrated module
    `mapping`: a dictionary that maps from source module type to target
               module type, can be overwritten to allow swapping user defined
               Modules
    `inplace`: carry out model transformations in-place, the original module
               is mutated
    `convert_custom_config_dict`: custom configuration dictionary for convert function
    `use_precomputed_fake_quant`: a flag to enable use of precomputed fake quant

.. code-block:: python

   # Example of convert_custom_config_dict:
   convert_custom_config_dict = {
       # user will manually define the corresponding quantized
       # module class which has a from_observed class method that converts
       # observed custom module to quantized custom module
       "observed_to_quantized_custom_module_class": {
           ObservedCustomModule: QuantizedCustomModule
       }
   }

z!quantization_api.quantize.convertT)r   is_referenceconvert_custom_config_dictuse_precomputed_fake_quant)r1   r   r   r   r   _convertr   )r8   r   r   r   r   r   r   s          r)   r!   r!   h  sU    H 
HH  !DEv&!#=#= Mr(   c           
         Uc  U(       a
  [        5       O	[        5       nUc
  [        5       nUR                  S0 5      nU(       d  [        R
                  " U 5      n 0 nU R                  5        HE  u  p[        U	[        5      (       d  [        U	5      U;  a  [        U	USUUUS9  [        XXe5      Xx'   MG     UR                  5        H  u  pXR                  U
'   M     U $ )aC  Converts submodules in input module to a different module according to `mapping`
by calling `from_float` method on the target module class

Args:
    module: input module
    mapping: a dictionary that maps from source module type to target
             module type, can be overwritten to allow swapping user defined
             Modules
    inplace: carry out model transformations in-place, the original module
             is mutated
    is_reference: a flag to enable quantized reference module
    use_precomputed_fake_quant: a flag to enable use of precomputed fake quant

r$   Tr   )r   r   r   r/   r   r   r5   rc   r   r   r   r"   r   r   )r8   r   r   r   r   r   ry   reassignr?   modkeyvalues               r)   r   r     s    ,   ?@9; 	
 ")%C%E""<"@"@3R# v&H**,	 3--,S19TT*+E %5
 -& nn&
$ ' Mr(   c                    U n[        U S5      (       Ga)  U R                  Gb  Sn[        U 5      U;   a   U[        U 5         R                  U 5      nSnO[        U 5      U;   a  U[        U 5         n[        US5      (       ai  UR                  (       aX  U R                  c   eU R                  R                  5       nU" U R
                  5        [        U5      nUR                  X5      nOQ[        R                  " UR                  5      n	SU	R                  ;   a  UR                  XS9nOUR                  U 5      nSnU(       a  U R                  R                  5        H  n
UR                  U
5        M     U R                  R                  5        H  nU[        Ld  M  UR!                  U5        M!     [#        U 5      n[%        U5      S::  d3  [%        U5      S	:X  a  [&        R(                  " S
5      U;   d
   SU 35       e[%        U5      S:  a  [+        [-        U5      5      OSnU(       a  UR/                  U5        U$ )zSwaps the module if it has a quantized counterpart and it has an
`observer` attached.

Args:
    mod: input module
    mapping: a dictionary that maps from nn module to nnq module

Return:
    The corresponding quantized module of `mod`
r+   NFT_IS_REFERENCEr   r   r      r~   zOswap_module only works with cpu or single-device CUDA modules, but got devices r   )rN   r+   r   from_observedr   weightr   rr   inspect	signaturer   r   r   rO   r   rI   rP   rh   ri   r1   rX   rj   rk   rW   )r   r   ry   r   new_modswappedqmodweight_post_processweight_qparamssigpre_hook_fnr   rz   rX   s                 r)   r"   r"     s    GsI3;;#:',0KK1,S1mC   G)#.'97<=Dt_--$2D2D{{...&)kk&8&8&:##CJJ/!01D!E//#>''8/3>>A"oo . G #ooc2GG"55<<>11+>  ? --446"8811': 7
 +3/Gw<1$G!ell6&:g&Ek`ah`ijk  -0L1,<T$w-($F

6"Nr(   c                     S n[        U S5      (       a  U R                  UU" U5      S-   '   U R                  5        H%  u  pEU(       a  U" U5      U-   OUn[        XQU5        M'     g)a  Traverse the modules and save all observers into dict.
This is mainly used for quantization accuracy debug
Args:
    mod: the top module we want to save all observers
    prefix: the prefix for the current module
    target_dict: the dictionary used to save all the observers
c                     U S:X  a  U $ U S-   $ )N r,   r'   )r;   s    r)   
get_prefix&_get_observer_dict.<locals>.get_prefix  s    2v76C<7r(   rE   N)rN   rE   r5   _get_observer_dict)r   target_dictr;   r   r?   r@   rA   s          r)   r   r     sn    8 s-.. '' 	v!::	
 ))+5;
6*T15}= ,r(   )Nr   N)NNr   )NNNN)FNNN)NF)NFTFNF)NFFNF)r   )Br   r   r   r   r1   torch.ao.nn.quantizedr2   rl   	quantizedro   torch.nntorch.ao.nn.intrinsicr   torch.ao.quantization.observerr   torch.ao.quantization.qconfigr   r   r   r   r	   r
   +torch.ao.quantization.quantization_mappingsr   r   r   r   r   r   r   r   torch.ao.quantization.stubsr   r   torch.nn.utils.parametrizer   utilsr   r   __all__is_activation_post_processr   quantizableMultiheadAttentionr&   r   r7   r   rI   rK   rR   ru   rh   r   r   r   r   r   r   r   r   r    r!   r   r"   r   r'   r(   r)   <module>r      s        # #  . F 	 	 	 B C M 9 
 	$$
r~~@@.
 	R\\..
))2<<+J+J2	 ' #.b20
2
K " $F/R: "&#AH!4, < EKKuWt:2 #$1l #$;~ KP9x>r(   