
    [ThL                        S SK r S SKrS SKrS SKJrJrJrJr  S SKrS SK	J
r
Jr  S SKJrJr  S SKJrJrJrJr  S SKJr  S SKJrJr  S SKJr  S S	KJr  S S
KJr  S SKJ r   S SK!J"r"J#r#J$r$J%r%J&r&J'r'J(r(  S SK)J*r*  S SK+J,r,J-r-J.r.  S SK/J0r0  SSK1J2r2  SSK3J4r4J5r5  SSK6J7r7J8r8  SSK9J:r:J;r;  SSK<J<r<  SSK=J>r>J?r?J@r@JArAJBrB  SSKCJDrDJErEJFrFJGrGJHrHJIrIJJrJJKrKJLrL  / SQrM\R                  \R                  \R                  \R                  \R                  \R                  \R                  \R                  \R                  \R                  /
rX\R                  \R                  R                  R                  R                  \R                  \R                  R                  R                  R                  0r`S\R                  RT                  S\.S\b\c\R                  R                  4   S\b\c\f\c\g4   4   S\b\c\4   SS4S jrhS\R                  RT                  S\.S\b\c\R                  R                  4   S\b\c\f\c\g4   4   S\b\c\4   SS4S  jriS\.S!\-SS4S" jrjS#\R                  R                  S\k4S$ jrlS\,S\b\c\4   S\k4S% jrmS&\*S'\SS4S( jrnS)\S\.S!\-SS4S* jroS+\.S\b\c\f\c\g4   4   S\b\c\4   S\f\c\c4   4S, jrpS\.S!\-SS4S- jrqS\.S\b\c\R                  R                  4   S\\R                  R                     4S. jrrS\.S\b\c\R                  R                  4   S\R                  RT                  S/\kS'\\   SS4S0 jrs  S?S\.S\b\c\R                  R                  4   S1\t\c   S\b\c\4   S'\S2\kS/\kSS4S3 jjruS\.S4\.S!\-SS4S5 jrvS\.S!\-S\b\c\R                  R                  4   S6\b\\b\g\g4   4   S7\t\.   SS4S8 jrw        S@S\*S/\kS9\\7\b\c\4   S4   S:\kS;\kS<\\\b\c\4   S4   S'\\\b\c\4   S4   S2\kS=\kS\*4S> jjrxg)A    N)AnyCallableOptionalUnion)
CUSTOM_KEYNUMERIC_DEBUG_HANDLE_KEY)BackendConfigget_native_backend_config)get_fused_module_classesget_pattern_to_dtype_configsget_qat_module_classes-get_root_module_to_quantized_reference_module)_is_activation_post_process)qconfig_equals
QConfigAny)QConfigMapping)	QuantType)_remove_qconfig)DeQuantStub)_parent_name"activation_is_statically_quantizedget_qparam_dictget_swapped_custom_module_classis_per_channelto_underlying_dtypeweight_is_quantized)GraphModule)ArgumentGraphNode)type_before_parametrizations   )quantized_decomposed_lib)convert_eq_obsupdate_obs_for_equalization)ConvertCustomConfigPrepareCustomConfig)_is_observed_module_is_observed_standalone_module)lower_to_fbgemm))_compare_prepare_convert_qconfig_mappings_generate_node_name_to_qconfig&_is_qconfig_supported_by_dtype_configs_update_qconfig_for_fusion_update_qconfig_for_qat)	_get_module_is_custom_module_lstm_is_custom_module_mhaassert_and_get_unique_devicecollect_producer_nodescreate_getattr_from_valueget_custom_module_class_keys graph_module_from_producer_nodesnode_arg_is_weight)convertconvert_custom_moduleconvert_standalone_moduleconvert_weighted_modulemodelnodemodulesnode_name_to_scopenode_name_to_qconfigreturnc           	      .  ^ U R                   nUc   e[        UR                  [        5      (       d   e[	        XT5      u  pgX!R                     n[        US5      (       a  UR                  X5        g[        U4S j[        UR                  5      [        UR                  R                  5       5      -    5       5      n	U	(       d  [        U5      (       dJ  UR                  U5         UR                  UR                  S   5        UR                  U5        SSS5        gUR                   n
Sn[        US5      (       a  UR"                  nS nU
[$        ;   Ga  U(       Gd  SnSnUR'                  5       u  nn[)        UR*                  5      (       a  [-        UR.                  5      n[0        R2                  R4                  R6                  R8                  n[0        R2                  R4                  R:                  R8                  nUR<                  nUR>                  n[A        U
5      nUUUUUUS	.nO[0        R2                  R4                  RB                  R8                  n[0        R2                  R4                  RD                  R8                  n[G        U5      n[-        U5      nUR<                  nUR>                  n[A        U
5      nUUUUUS
.nUR                  U5         UR                  S   nU/nURI                  5        H]  u  nnUS;   a@  [        U[F        [,        45      (       d%  [K        XXg-   U-   U5      nURM                  U5        ML  URM                  U5        M_     URO                  X[Q        U5      0 5      nU/USS -   nURS                  U[Q        U5      U" UU5      5      nUR                  U5        [T        URV                  ;   at  [X        URV                  [T           ;   aY  [T        URV                  ;  a  0 URV                  [T        '   URV                  [T           [X           URV                  [T           [X        '   UR                  U5        SSS5        gU(       Ga  Sn[0        R2                  R4                  RB                  RZ                  n[A        U
5      nU[0        R\                  [0        R^                  4;   d   S5       eUR<                  nUR>                  n[a        US[0        Rb                  5      n[a        US[0        Rd                  " [0        Rf                  5      Rh                  5      n UUU US.n[j        U   n!UR                  U5         UR                  S   nUR                  S   /n"URI                  5        H  u  nn#U"RM                  U#5        M     URO                  SU![Q        U"5      0 5      n$URO                  S[l        Rn                  U$S40 5      n%URO                  S[l        Rn                  U$S40 5      n&US   nUS   nUS   n
U%U&UUU
S
.nSSS5        UR                  U5         UR                  S   nU/nURI                  5        H2  u  nnUS;   a  UnURM                  U5        M!  URM                  U5        M4     URO                  X[Q        U5      0 5      nU/USS -   n[0        R2                  R4                  RD                  RZ                  nURS                  U[Q        U5      U" UU5      5      nUR                  U5        [X        URV                  ;   a$  URV                  [X           URV                  [X        '   UR                  U5        SSS5        gU
[0        Rp                  :X  a  [0        R2                  R4                  Rr                  Rt                  n'UR                  U5         UR                  S   nURO                  SU'U[0        Rp                  40 5      n(URO                  SU'U([0        RF                  40 5      n)UR                  U)5        UR                  U5        SSS5        gg! , (       d  f       g= f! , (       d  f       g= f! , (       d  f       GNK= f! , (       d  f       g= f! , (       d  f       g= f)aU  Replace activation_post_process module call node with quantize and
dequantize node working with decomposed Tensor

Before:
... -> observer_0(x) -> ...
After:
... -> torch.ops.quantized_decomposed.quantize_per_tensor(x, ...) ->
torch.ops.quantized_decomposed.dequantize_per_tensor() -> ...

or quantize_per_channel and dequantize_per_channel
Nr9   c              3   <   >#    U  H  n[        UT5      v   M     g 7fN_has_none_qconfig.0nrA   s     X/var/www/auris/envauris/lib/python3.13/site-packages/torch/ao/quantization/fx/convert.py	<genexpr>M_replace_observer_with_quantize_dequantize_node_decomposed.<locals>.<genexpr>y   #      :A 	!122:   r   F
is_dynamicc                     0 nSUR                   ;   a1  UR                   S   R                  nU[        R                  :w  a  SU0nU$ )Nval	out_dtype)metadtypetorchfloat32)dequantize_op
input_nodedequantize_op_kwargsdq_out_dtypes       rK   add_dequantize_op_kwargs\_replace_observer_with_quantize_dequantize_node_decomposed.<locals>.add_dequantize_op_kwargs   sE    !JOO#%??5177Lu}},(3\'B$##    call_function)_scale__zero_point__axis__quant_min__quant_max__dtype_)r`   ra   rc   rd   re   r`   ra   r"   zVonly uint8 and int8 are supported in reference flow for dynamic quantization right nowqschemeeps)rc   rd   _eps_re   rc   rd   re   );graph
isinstancetargetstr_get_module_path_and_prefixhasattrr9   alllistargsuserskeys_is_conversion_supportedinserting_beforereplace_all_uses_with
erase_noderU   rP   SUPPORTED_QDTYPEScalculate_qparamsr   rg   intch_axisrV   opsquantized_decomposedquantize_per_channeldefaultdequantize_per_channel	quant_min	quant_maxr   quantize_per_tensordequantize_per_tensorfloatitemsr5   appendcreate_nodetupler_   r   rT   r   tensoruint8int8getattrper_tensor_affinefinforW   rh   _QSCHEME_TO_CHOOSE_QPARAMS_OPoperatorgetitemfloat16convert_element_typeno_fuse)*r=   r>   r?   r@   rA   rj   module_pathprefixactivation_post_processskip_replacementrU   rP   r\   	node_typequantize_opscale
zero_pointr|   rX   r   r   dtype_qparamsrY   quantize_op_inputskeyvalue_or_nodeqparam_nodequantized_node	dq_inputsdequantized_noderg   rh   choose_qparams_opchoose_qparams_op_inputsvaluechoose_qparams_node
scale_nodezero_point_nodedtype_convert_opconvert_fp16_nodeconvert_fp32_nodes*       `                                     rK   :_replace_observer_with_quantize_dequantize_node_decomposedr   [   s   $ KKEdkk3''''5"6K &kk2&	22''4  dii4

(9#::  78OPP ##D)&&tyy|4T" * 	 $))EJ&55,77
$ !!: $	*.3EEGz199::199:G))88MMUUK		..EEMM  099I/99I(/F  *!((!G  ))88LLTTK!II::PPXXM%LEZJ/99I/99I(/F  *((!G ##D)1J",&-mmo"] 55"=5#,?? #<k&:S&@-#K '--k: '--m<) '6, #...@(A2N ((+=ab+AAI$22i (
C  &&'78 dii',		*0EE%5%:%::8:$))*5NRiiO*O, %%j12JK T"_ *)` 
 $	ii44HHOO
 %U+%++uzz22 	
-	
2 ,55	+55	19e>U>UV-uekk%--6P6T6TU
 %$	
 :'B##D)1J(,		!~$%mmo
U )//6 . #("3"3!2E:R4SUW# **!1!14G3KRJ $//!1!14G3KRO  .I.II&E% /(( G) *: ##D)1J",&-mmo"] 55 #0K&--k: '--m< '6 #...@(A2N ((+=ab+AAI "II::PPWWM$22i (
C  &&'78'4994BF)),C %%&>? T"K *)L 
%--	 9999NNVV##D)1J % 1 1!1J3NPR! !& 1 1!14Eu{{3SUW! &&'89T" *) 
 k *) 	@ *)^ *): *)R *)s@   '0aE6aCa#8D)a5=A:b
a
a #
a25
b
bc                 	  ^ Uc   e[        UR                  [        5      (       d   eU R                  n[	        XT5      u  pgX!R                     n[        U4S j[        UR                  5      [        UR                  R                  5       5      -    5       5      n	U	(       d  [        U5      (       dJ  UR                  U5         UR                  UR                  S   5        UR                  U5        SSS5        gUR                  n
Sn[        US5      (       a  UR                   nU
["        R$                  ["        R&                  ["        R(                  ["        R*                  ["        R,                  4;   Gab  U(       GdZ  SnSnUR/                  5       u  p[1        UR2                  5      (       a-  [5        UR6                  5      nUUUU
S.n["        R8                  nO+[;        U5      n[5        U5      nXU
S.n["        R<                  nUR                  U5         UR                  S   nU/nUR?                  5        HB  u  nnUS	;   a%  [A        XXg-   U-   U5      nURC                  U5        M1  URC                  U5        MD     URE                  X[G        U5      0 5      nURI                  S
U4S9nUR                  U5        UR                  U5        SSS5        gU(       a  Sn["        RJ                  n["        RL                  RN                  RP                  S;   nU
US.nUR                  U5         UR                  S   nU/nUR?                  5        H  u  nnURC                  U5        M     URE                  X[G        U5      0 5      nURI                  S
U4S9nUR                  U5        UR                  U5        SSS5        gU
["        RR                  :X  a  SnSnSU
0nUR                  U5         UR                  S   nU/nUR?                  5        H  u  nnURC                  U5        M     URE                  X[G        U5      0 5      nURI                  S
U4S9nUR                  U5        UR                  U5        SSS5        gg! , (       d  f       g= f! , (       d  f       g= f! , (       d  f       g= f! , (       d  f       g= f)zReplace activation_post_process module call node with quantize and
dequantize node

Before:
... -> observer_0(x) -> ...
After:
... -> torch.quantize_per_tensor(x, ...) -> x.dequantize() -> ...
Nc              3   <   >#    U  H  n[        UT5      v   M     g 7frE   rF   rH   s     rK   rL   B_replace_observer_with_quantize_dequantize_node.<locals>.<genexpr>  rN   rO   r   FrP   r_   )r`   ra   rb   re   )r`   ra   re   rf   
dequantize)rr   )fbgemmx86)re   _reduce_range_call_methodtore   )*rk   rl   rm   rj   rn   rp   rq   rr   rs   rt   ru   rv   rw   rx   rU   ro   rP   rV   quint8qint8qint32float8_e5m2float8_e4m3fnrz   r   rg   r{   r|   r   r   r   r   r5   r   r   r   r   quantize_per_tensor_dynamicbackends	quantizedenginer   )r=   r>   r?   r@   rA   rj   r   r   r   r   rU   rP   r   r   r   r   r|   r   rY   r   r   r   r   r   r   reduce_ranger   s       `                      rK   /_replace_observer_with_quantize_dequantize_noder   g  s    dkk3''''KKE5"6K &kk2  dii4

(9#::  78OPP ##D)&&tyy|4T" * 	 $))EJ&55,77
   $	*.3EEG199::199:G  *! 	G  44K%LEZJ"'PUVG33K ##D)1J",&-mmo"] 55 #<k&:S&@-#K '--k: '--m< '6 #...@(A2N  %00^DU0V&&'78T"- *). 
 $	77 ~~//66:KK#|D##D)1J",%mmo
U"))%0 . #...@(A2N  %00^DU0V&&'78T" *) 
%--	!	e$##D)1J",%mmo
U #))%0 .
 #...@(A2N  %00^DU0V&&'78T" *)	 
 u *) 	T *)B *)  *)s3   0RB8R2BR.5BR?
R
R+.
R<?
Srj   c                     U R                   S   n[        U[        5      (       d
   SU 35       eU R                  U5        UR	                  U 5        [        X!5        g )Nr   z@Expecting the for call custom module node to be a Node, but got )rr   rk   r    rw   rx   _insert_dequantize_node)r>   rj   call_custom_module_nodes      rK   6_replace_observer_or_dequant_stub_with_dequantize_noder     si     #iil  d	IJaIbcd  	67	T3;r^   r   c                     U R                   nSn[        U S5      (       a  U R                  nU[        ;   =(       a    U(       + =(       d    U=(       d    U[        R
                  :H  $ )NFrP   )rU   ro   rP   ry   rV   r   )r   rU   rP   s      rK   ru   ru     s[    #))EJ&55,77
 
#	#	8Z 	"	"EMM!r^   c                 |    [        U [        5      =(       a&    U R                  U;   =(       a    XR                     SL $ )zTCheck if a node has a qconfig of None, i.e. user requested to not quantize
the node
N)rk   r    name)r>   rA   s     rK   rG   rG     s9     	4 	4II--	4 +t3r^   observedbackend_configc                    U R                   R                   He  nUR                  S:w  a  M  UR                   H@  nU(       d  M  [	        X#5      (       d  M  [        U5      nUc  M.  [        X5      nU" 5         MB     Mg     g)zExtract the subgraph that produces the weight for dynamic quant
or weight only quant node and run the subgraph to observe the weight.
Note that the observers of dynamic quant or weight only quant ops are
run during the convert step.
r_   N)rj   nodesoprr   r8   r4   r7   )r   r   r>   node_argweight_observer_nodesweight_observer_modules         rK   _run_weight_observersr   #  so     $$77o%		Hx.t>>(>x(H%(0)I*& '( " %r^   argc                    [        U [        5      (       aA  U R                  S:X  a1  U R                  S:X  a!  U R                  S   nUR                  X5        g[        U [        [        45      (       a  U  H  n[        XAU5        M     g[        U [        5      (       a$  U R                  5        H  n[        XAU5        M     g[        R                  " S[        U 5       35        g)zwIf the arg is a dequantize Node, or a list/tuple/dict of dequantize Node,
we'll recursively remove the dequantize Node
r   r   r   z6Unsupported node type in recursive remove dequantize: N)rk   r    r   rl   rr   replace_input_withrq   r   "_maybe_recursive_remove_dequantizedictvalueswarningswarntype)r   r>   rj   quantize_nodearg_elements        rK   r   r   9  s     #t=!8SZZ<=W 	3	C$	'	'K.{%H 	C		::<K.{%H ( 	DT#YKP	
r^   obs_nodec                    U R                   S   n[        U[        5      (       d
   SU 35       eUR                  U;   a  X#R                     SL OSnU(       a  [	        U R
                  5      nU(       a  US   OSnSnU HK  nUR                  S:X  d  M  UR                  [        R                  R                  R                  :X  d  MI  Un  O   U(       a  UnSn	OUnSn	U(       a#  UR                  U;   a  XR                     u  pX4$ Sn
X4$ )aQ  Given and observer node, get the `Scope` or the fully qualified name for
the submodule containing the observed node, also return a prefix of "_input"
when the observed node is an input of a F.linear op, and not the output of another
quantized op.
TODO: this logic is hacky, we should think about how to remove it or make it more
general
r   z.Expecting observed node to be a Node, but got Nr_   _input )rr   rk   r    r   rq   rs   r   rl   rV   nn
functionallinear)r   r@   rA   observed_nodeis_input_observer_onlyrs   first_linear_use_or_first_uselinear_noderJ   r   r   _s               rK   rn   rn   N  s*    MM!$M
 t  H	7GH 
 !55 	//0D8 
  X^^$49at%Att&188uxx7J7J7Q7Q+Q  ,7) )6% 	&)..2DD+,N,NO  r^   c                     UR                  U 5         UR                  SU 45      n[        U R                  5       H  nX2Ld  M	  UR	                  X5        M     SSS5        g! , (       d  f       g= f)z-Inserts dequantize node for `node` in `graph`r   N)inserting_afterr   r   rs   r   )r>   rj   dequantize_node	user_nodes       rK   r   r     sW    			t	$++L4'Bdjj)I/,,TC * 
%	$	$s   .A"A""
A0c                     U R                   R                  5        HA  nUR                  S:X  d  M  U[        UR                  5         n[        U5      (       d  M?  Us  $    g)zP
If the node is observed, return the observer
instance. Otherwise, return None.
call_moduleN)rs   rt   r   rm   rl   r   )r>   r?   maybe_obs_node	maybe_obss       rK   _maybe_get_observer_for_noder     sR     **//+-N$9$9 :;I*955  	 ,
 r^   is_referencec                    U(       a/  [         R                  R                  R                  R                  nO.[         R                  R                  R                  R
                  nU[        U R                  5         nUR                  S   R                  n[        U R                  5      n[        [        U5      5       H  n	X;   d  M
  X   n
U
R                  S:X  d  M   U
R                  S:X  d  M2  U
R                  S   nU R                  X5        [        U
R                   5      S:X  d  Mm  UR"                  R%                  U
5        M     UR                  S   R&                  n[        U5      S:  a&  US   S:X  d   S5       e [)        XR"                  5        U" XdS9n[+        U R                  5      u  p[-        X   X5        X[        U R                  5      '   g)a|  Converts a observed standalone module to a quantized standalone module by calling
the fx convert api, currently using the same `is_reference` flag as parent, but we may
changing this behavior in the future (e.g. separating quantization and lowering for
standalone module as well)

Args:
  - node: The call_module node of the observed standalone module
  - modules: named_module of original model
  - model: original model
  - is_reference: a flag from parent provided by user to decide if we want to
    produce a reference model or a fbgemm/qnnpack model
  - backend_config: backend configuration of the target backend of quantization
_observed_graph_module_attrsr   r   r   zCurrently only quantized)r   N)rV   aoquantizationquantize_fxconvert_to_reference_fx
convert_fxrm   rl   rT   &standalone_module_input_quantized_idxsrq   rr   rangelenr   r   rs   rj   rx   'standalone_module_output_quantized_idxsr   r   setattr)r>   r?   r=   r   r   
convert_fnobserved_standalone_modulesm_input_quantized_idxsrr   idxr   r   sm_output_quantized_idxsquantized_standalone_moduleparent_namer   s                   rK   r;   r;     s   * XX**66NN
XX**66AA
 /6c$++6F.G8==&,,  		?DSY))Cvv&3::+E #'';syy>Q&KK**3/    :>>& --  #$q('*a/K1KK/( 	 kk2 #-"# %T[[1KG $D ;Cr^   observed_node_namesis_decomposedc                 d   U[        U R                  5         nUR                  nSn	[        U5      n
[	        Xz5      (       aA  UR
                  n	UR                  5       n[        U R                  5      u  p[        X   X5        U R                  U;   nUb  [        X5      (       d  U(       d  g[        U5      nUR                  [        U5      / 5      n[        X5      (       d  g[        U5      nU(       d  gSnUn[	        U[         R"                  R$                  R&                  R(                  5      (       a  UnUS   nSU0n[	        U[         R$                  R*                  5      (       ap  UR-                  5       nUR-                  5       nU" UR.                  5        U" UR0                  5        [3        U5      n[3        U5      nUR5                  UUS.5        GOX[	        U[         R$                  R6                  [         R$                  R8                  45      (       a  UR:                   H~  n[=        UU5      (       d  M  UR?                  S5      (       d  M.  [A        UU5      nUR-                  5       n	U	RB                  [         RD                  :X  a  U	" U5        [3        U	5      UU'   M     OU	SL nU(       a3  UR-                  5       n	[G        U5      nU(       a  U	RI                  U5        U(       + nU(       a  U(       a  U(       d  U	" UR,                  5        UR5                  [3        U	5      5        [K        U5      nUR                  [M        U5      S5      nUc   S[M        U5       35       eURO                  UU5      nUb  UUS'   g[        U R                  5      u  p[        X   UU5        g)a  Convert a weighted module to reference quantized module in the model
If the QConfig of a QAT module is not set, the module will still be converted to
a float module.

Args:
  - node: The call_module node of the observed standalone module
  - modules: named_module of original model
  - observed_node_names: names for the set of observed fx node, we can skip
    this conversion if the node is not observed
Nr   r  )	weight_ih	weight_hhweightz3No reference quantized module class configured for )(rm   rl   qconfigr   rk   weight_fake_quantto_floatr   r  r   rG   r   getr   r-   r   rV   r   r   	intrinsic_FusedModuleRNNCellBaser  r  r  r   updateLSTMGRU_flat_weights_namesro   
startswithr   rU   r   r3   r   r   r!   
from_float) r>   r?   r
  rA   r   r  r   original_moduler  weight_post_processqat_module_classesr	  r   is_observedpattern_to_dtype_configsdtype_configsis_weight_quantizedfused_modulefloat_modulewq_or_wq_dictweight_post_process_ihweight_post_process_hhweight_qparams_ihweight_qparams_hhwnr  is_ptqdeviceis_qat)root_module_to_quantized_reference_moduleref_qmodule_clsref_qmodules                                    rK   r<   r<     s<   & c$++./O)11G/?/66 .??)224(5$d<))22K 	T88  <NK,00o1FKM1'II .g6 L"L/588;;#8#8#E#EFF##A %m4M, 4 455!(!1!(!1|556|556+,BC+,BC..	
 
L588==%((,,"?	@	@ 22B|R((R]]8-D-D r2&-nn&6#&,,;'/$34G$Hb! 3 %,").."21,?F#&&v.( ,6 3 34_-@AB 	6nE . @CC$\2DO 	#j	<=YZf=g<hij#!,,\=IK%Q(5$dK8r^   	prev_nodec                    [        U[        5      (       d
   SU 35       eUR                  S:X  a[  UR                  S:X  aJ  U R	                  XR
                  S   5        [        UR                  5      S:X  a  UR                  U5        gggg)z
Given a custom module `node`, if the previous node is a dequantize, reroute the custom as follows:

Before: quantize - dequantize - custom_module
After: quantize - custom_module
             \ - dequantize
zDExpecting the argument for custom module node to be a Node, but got r   r   r   N)	rk   r    r   rl   r   rr   r   rs   rx   )r>   r2  rj   s      rK   ,_remove_previous_dequantize_in_custom_moduler4  o  s     4  Z	Mi[YZ  ||}$)9)9\)I	>>!+<=y1$Y' % *J$r^   custom_module_class_mapping(statically_quantized_custom_module_nodesc                    U[        U R                  5         nUR                  n[        U5      (       Ga  UR	                  U 5        [        X5      (       a  [        U R                  5      S:X  a>  [        U R                  S   [        5      (       a  [        U R                  S   5      S:X  d   eU R                  u  nu  p[        U[        5      (       d   e[        U[        5      (       d   e[        U	[        5      (       d   e[        XU5        [        XU5        [        X	U5        O[        X5      (       a  [        U R                  5      S:X  d   eU R                  u  pn[        U
[        5      (       d   e[        U[        5      (       d   e[        U[        5      (       d   e[        X
U5        [        XU5        [        XU5        OHU R                  S   n[        U[        5      (       d   e[        XU5        [        X5      nUc   eXl        [        XSU5      nUR!                  U5      n[#        U R                  5      u  nn[%        UU   UU5        g)a  Converts an observed custom module to a quantized custom module based on
`custom_module_class_mapping`
For static quantization, we'll also remove the previous `dequantize` node and
attach the observer node for output to the module, the observer for the node
will be converted to a dequantize node instead of quantize-dequantize pairs
later in the graph. In the end we would have a quantized custom module that
has the same interface as a default quantized module in nn.quantized namespace,
i.e. quantized input and quantized output.

Args:
  - node: The call_module node of the observed standalone module
  - graph: The graph containing the node
  - modules: named_module of original model
  - custom_module_class_mapping: mapping from observed custom module class to
    quantized custom module class, used to swap custom modules
  - statically_quantized_custom_module_nodes: we'll add the custom module node
    if we find it is statically quantized, this will be used later when converting
    observers to quant/dequant node pairs, if the observed node is a statically
    quantized custom module nodes, we'll convert the observer to a dequantize node,
    this is to keep the interface the same as the default quantized module.
    TODO: maybe we want to redesign this part to align with reference model design
    as well, but there has been some discussions around the interface, so we can do
    it later.
   r"      r   N)rm   rl   r  r   addr1   r   rr   rk   r   r    r4  r2   r   r   r   from_observedr   r  )r>   rj   r?   r5  r6  observed_custom_moduler  inputshidden0hidden1queryr   r   r   r   quantized_custom_module_classquantized_custom_moduler	  r   s                      rK   r:   r:     s   > %S%56$,,G)'22044T:!$00 DII!#tyy|U33		!%*+ ,099(V'gfd++++gt,,,,gt,,,,8uM8N8N"411 tyy>Q&&& $		EeT****c4((((eT****8eL8EJ8eL ))A,Cc4((((8EJ&B4&Q#*666=T: %DW%! <II %T[[1KGK $(?@r^   convert_custom_configis_standalone_module_remove_qconfig_flagqconfig_mappingkeep_original_weightsc	           
      l   Uc
  [        5       n[        U[        5      (       a0  [        R                  " S[
        SS9  [         R                  " U5      n[        U[        5      (       a9  [        R                  " S[
        SS9  U(       a  [        R                  " U5      OSn[        R                  " U5      nUb  [        U[        5      (       d   e[        U[        5      (       a0  [        R                  " S[
        SS9  [        R                  " U5      nUc
  [        5       n[        U 5      (       d   S5       eU R                  S   n	U	R                  n
U	R                  nU	R                   nU	R"                  n[        U R%                  S	S
95      nU(       a  U	R&                  n[        R                  " U5      nU	R(                  (       a  [+        XV5        [-        X5        [/        X5        [1        U UU R2                  XZ5      nUR5                  5        HF  u  nnUU;   d   SU S35       eUU   c  M  [7        UUU   5      (       a  M5   SU SU SUU    35       e   Un[9        UR:                  5      nUR:                  nU	R<                  b  [?        X5      n[A        XU5        [C        X5        SnURD                  nURF                  n[I        U5      n[K        URM                  5       5      n[O        U5      n[Q        U5      n[S        5       n[U        U R2                  RV                  5       GH  nURX                  S:X  a'  Un US-  nU U;   a  [[        UU R2                  5        M9  M;  URX                  S:X  a  []        U5      S:X  a  M\  Un!UR^                  S   n"[        U"[T        [J        45      (       a%  U H  n#[a        U"U#   U!U R2                  5        M     M  [        U"[b        [        45      (       a!  SU;   a  [a        U"U!U R2                  5        M  M  [        R                  " S[e        U"5       35        GM  URX                  S:X  d  GM!  [g        UU5      n$U$c   e[i        U$5      (       aY  UR^                  S   n%U%U;   a  [k        UU R2                  5        GMp  U(       a  [m        U UUU
U5        GM  [o        U UUU
U5        GM  [        U$[p        5      (       a  [k        UU R2                  5        GM  [s        U$5      (       a  [u        UXX5        GM  [w        U$5      [S        U5      Ry                  U5      Ry                  U5      ;   a8  [w        U$5      U;   a  [w        U$S   5      U;  a  GMC  [{        UUUUUUU5        GMW  [w        U$5      U;   d  GMi  [}        UU R2                  UUU5        GM     U R2                  R                  5         [        X R2                  5      n U(       d  [        XX5      n U(       a  [        U 5        U R                  5         U R                  R                  SS5        U $ )a+  
We will convert an observed model (a module with observer calls) to a reference
quantized model, the rule is simple:
1. for each observer module call in the graph, we'll convert it to calls to
   quantize and dequantize functions based on the observer instance
2. for weighted operations like linear/conv, we need to convert them to reference
   quantized module, this requires us to know whether the dtype configured for the
   weight is supported in the backend, this is done in prepare step and the result
   is stored in observed_node_names, we can decide whether we need to swap the
   module based on this set

Args:
   * `is_standalone_module`: when this flag is True, it means we are quantizing
   a submodule that is not inlined in parent module, and will be quantized
   separately as one unit.

   * `is_decomposed`: a boolean flag to indicate whether we want to use the
    quantize operator for decomposed quantized tensor
    (torch.ops.quantized_decomposed.quantize_per_tensor) or default/standalone
    quantized tensor (torch.quantize_per_tensor)

Returns:
     a quantized standalone module, whether input/output is quantized is
     specified by prepare_custom_config, with
     input_quantized_idxs, output_quantized_idxs, please
     see docs for :func:`~torch.ao.quantization.prepare_fx` for details
NzPassing a convert_custom_config_dict to convert is deprecated and will not be supported in a future version. Please pass in a ConvertCustomConfig instead.r8  )
stacklevelzPassing a QConfig dictionary to convert is deprecated and will not be supported in a future version. Please pass in a QConfigMapping instead.zPassing a backend_config_dict to prepare is deprecated and will not be supported in a future version. Please pass in a BackendConfig instead.z-incoming model must be produced by prepare_fxr   F)remove_duplicatezExpected key z  in convert node_name_to_qconfigzExpected k zD to have the same value in prepare and convert QConfigMappings, but z was updated to r   placeholderr"   outputz1Unsupported node type for output_quantized_idxs: r   )Er&   rk   r   r   r   FutureWarning	from_dictr   copydeepcopyr	   r
   r(   rT   r@   prepare_custom_configr
  rA   named_modulesrF  r.  r/   r.   r+   r,   rj   r   r   r6   observed_to_quantized_mapping!equalization_node_name_to_qconfigr%   r$   r   input_quantized_indexesoutput_quantized_indexesr   r   rt   r   r   setrq   r   r   r   r   rr   r   r    r   r0   r   r   r   r   r   r)   r;   r!   unionr<   r:   eliminate_dead_coder   r*   r   delete_all_unused_submodulespop)&r=   r   rC  rD  rE  rF  r   r  rG  observed_graph_module_attrsr@   rQ  r
  rA   r?   prepare_qconfig_mappingmodules_copyconvert_node_name_to_qconfigkvcustom_module_classesr5  weight_eq_obs_dictplaceholder_node_seen_cntinput_quantized_idxsoutput_quantized_idxsr/  root_module_classesr  fused_module_classesr6  r>   cur_placeholder_node_idxreturn_noderL  r  modr   s&                                         rK   r9   r9     s   L $ 3 5'..Q		
 !4 = =>S T/4((L		
 :IN$$_5d 	 mmO4O"j.&Q&QQQ.$''K		
 '00@24u%%V'VV%"'**-K"L 	$66  	$99  %@$S$S2M2b2b 5&&&>?G 2M2]2]}}W-&--#OD"5:12I['E<o(
$ )..0DAq11Cqc!ABC1+A.:%a)Ea)HII !! %#-.J1.M-NPI 1  <8;; #8"U"U"DDP 9Hu'9: %0 !"&;&S&S'<'U'U 	6nE .   I N N PQ/?3NC:=%,U[[&&'77m#'@$%*%'+??
 (ekk: @ WW ()Q. KYYq\F&4-000C6s[%++ 1 FT4L11 --6v{EKKX . GV~V WW%dG,C?"?*3// $		! $LLJekk %R! #.0 H! #.0 C--F%++ 044)',
 .c2c:M6N6T6T"7e()* 159MM4SV<DWW''("!  .c26KK%KK/<w (H 
KK##%{{+E );
 	&&(	JJNN148Lr^   )FF)FNFTNNFF)yrO  r   r   typingr   r   r   r   rV   torch.ao.quantizationr   r   $torch.ao.quantization.backend_configr	   r
   *torch.ao.quantization.backend_config.utilsr   r   r   r   torch.ao.quantization.observerr   torch.ao.quantization.qconfigr   r   %torch.ao.quantization.qconfig_mappingr    torch.ao.quantization.quant_typer   torch.ao.quantization.quantizer   torch.ao.quantization.stubsr   torch.ao.quantization.utilsr   r   r   r   r   r   r   torch.fxr   torch.fx.graphr   r   r    torch.nn.utils.parametrizer!   _decomposedr#   	_equalizer$   r%   custom_configr&   r'   graph_moduler(   r)   r*   qconfig_mapping_utilsr+   r,   r-   r.   r/   utilsr0   r1   r2   r3   r4   r5   r6   r7   r8   __all__r   r   r   r   r   uint16int16int32r   r   ry   r   r}   r~   choose_qparamsr   per_tensor_symmetricchoose_qparams_symmetricr   fxr   rm   r   Moduler   r   r   r   r   boolru   rG   r   r   rn   r   r   r;   rW  r<   r4  r:   r9    r^   rK   <module>r     s      1 1  F  G D @ 6 : 3   ! 0 0 C 2 B C M , 
 
 
 
LL	KK	LL	KK	JJ	LL	KK	KK		  
UYY;;JJQQ			 > > W W ^ ^! F#88F#
F# #uxx&'F# S%T	"223	F#
 sJ/F# 
F#XJ#88J#
J# #uxx&'J# S%T	"223	J#
 sJ/J# 
J#j	<
	<	<		<ehhoo $ 


*.sJ*?
	
)K ) )SW ),
C 
t 
E 
d 
*66S%T	"2236 sJ/6 38_	6rD$ Du D D
c588??23ehhoo=<
=<#uxx&'=< 88=< 	=<
 ]+=< 
=<L  M9
M9#uxx&'M9 SM9 sJ/	M9
 "M9 M9 M9 
M9`(
(((-(	(*SA
SASA #uxx&'SA "&idDj1A&A!B	SA
 /2$iSA 
SAp NR!&!%CGAE"'PPP !!4d38nd!JKP 	P
 P >4S>4?@P -c3h=>P P  P Pr^   