
    [ThD                     (   S SK r S SKrS SKrS SKrS SKJr  S SKJr  S SKJ	r	  S SK
JrJrJr  S SKrS SKJr  S SKJr  S SKJs  Jr  S SKJs  Jr  S SKJrJr  S SKJr  S SKJ r J!r!  / S	Qr"S
\#S\$\#\#4   4S jr%S\\&   S\RN                  S\(\#\4   4S jr)S\RN                  S\(\#\4   S\R$                  RT                  4S jr+S)S\R$                  RT                  S\R$                  RT                  4S jjr,S\RT                  S\RT                  4S jr-S\RT                  S\.\RN                     S\.\RN                     S\.\RN                     4S jr/\R`                  \Rb                  \Rd                  \Rf                  \Rh                  \Rj                  \Rl                  \Rn                  \Rp                  \Rr                  \Rn                  \Rt                  \Rv                  /r<\Rz                  \R|                  /r?\R`                  \R                  \Rb                  \R                  \Rd                  S 0rBS\.\RN                     S\(\#\RT                  4   4S jrCS\.\RN                     S\(\#\RT                  4   S\(\RT                  \RT                  4   4S jrD " S S 5      rES*S! jrFS"\ES\G4S# jrH " S$ S%5      rIS\R                  4S\R$                  RT                  S&\\(\#\4      S'\&\R                     S\R$                  RT                  4S( jjrKg)+    N)defaultdict)Iterable)Enum)AnycastOptional)ArgumentTarget)	ShapeProp)fuse_conv_bn_evalfuse_linear_bn_eval)matches_module_patternreplace_node_modulefuseremove_dropoutextract_subgraphmodules_to_mkldnnreset_modulesMklSubgraphgen_mkl_autotuneruse_mkl_length	UnionFindoptimize_for_inferencetargetreturnc                 N    U R                  SS5      Gt pU(       a  US   U4$ SU4$ )zd
Splits a qualname into parent path and last atom.
For example, `foo.bar.baz` -> (`foo.bar`, `baz`)
.   r    )rsplit)r   parentnames      Z/var/www/auris/envauris/lib/python3.13/site-packages/torch/fx/experimental/optimization.py_parent_namer$   %   s1    
 MM#q)MV6!9,,B,,    patternnodemodulesc                    [        UR                  5      S:X  a  gUR                  S   U4n[        X5       H  u  pE[        U[        R
                  5      (       d    gUR                  S:w  a    g[        UR                  [        5      (       d    gUR                  U;  a    g[        X%R                     5      ULd  M    g   g)Nr   Fcall_moduleT)
lenargszip
isinstancefxNodeopr   strtype)r&   r'   r(   nodesexpected_typecurrent_nodes         r#   r   r   /   s     499~"&))A,!5E'*7':#,00??m+,--s33g-++,-]B (; r%   
new_modulec                     [        U R                  [        5      (       d   e[        U R                  5      u  p4X!U R                  '   [	        X   XB5        g N)r.   r   r2   r$   setattr)r'   r(   r7   parent_namer"   s        r#   r   r   C   sE     dkk3''''$T[[1K%DKKG $3r%   modelc                    [         R                  [         R                  4[         R                  [         R                  4[         R
                  [         R                  4[         R                  [         R                  4/nU(       d  [        R                  " U 5      n U(       a)  [        U [        R                  R                  5      (       d  [        R                  " U 5      nOU n[        UR!                  5       5      n[        R                  " UR"                  5      nU GH%  nUR$                   GH  n['        XxU5      (       d  M  [)        UR*                  S   R,                  5      S:  a  M?  XXR*                  S   R.                     n	XXR.                     n
U
R0                  (       d  M{  US   [         R                  [         R                  [         R
                  4;   a  [3        X5      nO[5        X5      n[7        UR*                  S   X[5        UR9                  UR*                  S   5        UR;                  U5        GM     GM(     [        R                  " XF5      $ )z
Fuses convolution/BN and linear/BN layers for inference purposes.
Will deepcopy your model by default, but can modify the model inplace as well.
r   r   )nnConv1dBatchNorm1dConv2dBatchNorm2dConv3dBatchNorm3dLinearcopydeepcopyr.   torchr/   GraphModulesymbolic_tracedictnamed_modulesgraphr4   r   r+   r,   usersr   track_running_statsr   r   r   replace_all_uses_with
erase_node)r<   inplaceno_tracepatternsfx_modelr(   	new_graphr&   r'   first_layerbnfused_layers               r#   r   r   L   s    
BNN#	BNN#	BNN#	BNN#	H e$:eUXX-A-ABB$$U+8))+,Ghnn-IOOD%gW==tyy|))*Q.%iil&9&9:[[)--1:"))RYY		!BB"3K"DK"5k"FK#DIIaL'G**499Q<8$$T* $ " >>(..r%   c                     [         R                  " U 5      n " S S[        R                   R                  5      nU" U5      R	                  5       $ )z-
Removes all dropout layers from the module.
c                   P   ^  \ rS rSrS\S\\S4   S\\\	4   S\	4U 4S jjr
SrU =r$ )	&remove_dropout.<locals>.DropoutRemoverz   r   r,   .kwargsr   c                    > [        U R                  U   [        R                  5      (       a  [	        U5      S:X  d   eUS   $ [
        TU ]  XU5      $ )Nr   r   )r.   
submodulesr>   Dropoutr+   superr*   )selfr   r,   r^   	__class__s       r#   r*   2remove_dropout.<locals>.DropoutRemover.call_module{   sJ     $//&12::>>4yA~%~Aww*6@@r%    )__name__
__module____qualname____firstlineno__r
   tupler	   rK   r2   r   r*   __static_attributes____classcell__)rd   s   @r#   DropoutRemoverr\   z   sE    	A 	A(-hm(<	AFJ3PS8n	A	A 	Ar%   rn   )r/   rJ   rH   Transformer	transform)r<   rU   rn   s      r#   r   r   t   sB       'HA-- A (#--//r%   orig_moduler4   inputsoutputsc                 l  ^	 [         R                  " 5       n0 m	U H#  nUR                  UR                  5      nUT	U'   M%     U H  nUR	                  UU	4S j5      nUT	U'   M      UR                  U Vs/ s H  nT	U   PM
     sn5        UR                  5         [         R                  " X5      $ s  snf )zy
Given lists of nodes from an existing graph that represent a subgraph, returns a submodule that executes that subgraph.
c                    > TU    $ r9   rf   )xenvs    r#   <lambda>"extract_subgraph.<locals>.<lambda>   s	    s1vr%   )r/   Graphplaceholderr"   	node_copyoutputlintrI   )
rq   r4   rr   rs   rV   inputnew_noder'   r}   rw   s
            @r#   r   r      s     
I"$C((4E
  &&t-=>D	  8fc&k89NN>>+11 9s   5B1c                 .    [         R                  " U 5      $ r9   )	th_mkldnnMkldnnBatchNorm)a_s     r#   rx   rx      s    !:!:1!=r%   c                    0 nU  H  nUR                   S:X  d  M  [        UR                  [        5      (       d   eXR                     n[	        U5      [
        ;   d  MY  [
        [	        U5         " U[        R                  5      n[        U[        R                  5      (       d   e[        R                  " U5      X%'   [        X1U5        M     U$ )z
For each node, if it's a module that can be preconverted into MKLDNN,
then we do so and create a mapping to allow us to convert from the MKLDNN
version of the module to the original.
r*   )r1   r.   r   r2   r3   
mkldnn_maprH   floatr>   ModulerF   rG   r   )r4   r(   old_modulesr'   
cur_moduler7   s         r#   r   r      s     /1K77m#dkk3//// -JJ:-'Z(89*ekkR
!*bii8888*.--
*C'#D:>  r%   r   c                     U  HY  nUR                   S:X  d  M  [        UR                  [        5      (       d   eXR                     nXB;   d  MK  [	        X1X$   5        M[     g)zU
Maps each module that's been changed with `modules_to_mkldnn` back to its
original.
r*   N)r1   r.   r   r2   r   )r4   r(   r   r'   r   s        r#   r   r      sP     77m#dkk3//// -J(#D;3JK r%   c                   6    \ rS rSrS\R
                  4S jrSrg)r      fx_graphc                 :    Xl         / U l        / U l        / U l        g r9   )r   r4   start_nodes	end_nodes)rc   r   s     r#   __init__MklSubgraph.__init__   s     $&
*,(*r%   )r   r   r4   r   N)rg   rh   ri   rj   r/   rz   r   rl   rf   r%   r#   r   r      s    + +r%   r   c                 H   ^ ^^^^ SmSmS[         S[        4U UUUU4S jjnU$ )a?  
This generates a heuristic that can be passed into `optimize_for_inference` that
determines whether a subgraph should be run in MKL by running it with the example_inputs.

Example usage:
    heuristic = gen_mkl_autotuner(example_inputs, iters=10)
    fast_model = optimization.optimize_for_inference(model, heuristic)
NrM   r   c                   >^^ U R                   nT
cF  U R                  R                  m
U R                  R                  m[	        T
5      R                  T	5        U Vs/ s H#  n[        R                  " UR                  5      PM%     snm[        [        [        R                     U R                   Vs/ s H  o"R                  S   PM     sn5      n[        T
U R                   X5      mUU4S jnU" UU4S j5      n[#        TR$                  R                   ['        TR)                  5       5      T5        U" UU4S j5      nXV:  $ s  snf s  snf )Nr   c                    > [        T5       H
  nU " 5         M     [        R                  " 5       n[        T5       H
  nU " 5         M     [        R                  " 5       U-
  $ r9   )rangetime)fr   beginiterswarmups      r#   	benchmark?gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.benchmark   sE    6] #IIKE5\ "99;&&r%   c                     > T" T V s/ s H  o R                  5       PM     sn 6  V s/ s H  o R                  5       PM     sn $ s  sn f s  sn f r9   )	to_mkldnnto_dense)isample_inputs	submodules    r#   rx   >gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>  s?    &/1WA++-1W&X&X

&X1Ws
   AAc                     > T" T 6 $ r9   rf   )r   r   s   r#   rx   r     s
    	=(Ar%   )r   r   owning_moduler   r   	propagaterH   randnshaper   listr/   r0   r   r,   r   r4   r   rM   rK   rL   )rM   input_nodesr'   output_argsr   mkl_timeno_mkl_timer   r   example_inputsrU   r   r   r   s          @@r#   use_mkl_heuristic,gen_mkl_autotuner.<locals>.use_mkl_heuristic   s   ''~~33H..44Kh)).9=HI[TTZZ0[I4=EOO*TOD99Q<O*TU$Xu{{KU		' 
 	OO!!4	(?(?(A#BK	
   AB%%- J*Ts   *E3E
)r   bool)r   r   r   r   rU   r   s   ``` @@r#   r   r      s/     HK& & & &> r%   rM   c                 2    [        U R                  5      S:  $ )z
This is a heuristic that can be passed into `optimize_for_inference` that
determines whether a subgraph should be run in MKL by checking if there
are more than 2 nodes in it
   )r+   r4   )rM   s    r#   r   r     s     u{{ar%   c                   L    \ rS rSrS rS\4S jrS\S\4S jrS\S\4S	 jrS
r	g)r   i  c                 0    S /U-  U l         S/U-  U l        g )Nr   r!   size)rc   ns     r#   r   UnionFind.__init__  s    ,06A: !sQw	r%   vc                 >    XR                   U'   SU R                  U'   g )Nr   r   )rc   r   s     r#   make_setUnionFind.make_set   s    A		!r%   r   c                     U R                   U   nX:X  a  U$ Uc   eU R                  U5      U R                   U'   [        [        U R                   U   5      $ r9   )r!   findr   int)rc   r   pars      r#   r   UnionFind.find$  sM    kk!n8H3ACQ((r%   r   bc                     U R                  U5      U R                  U5      p!X:X  a  U$ U R                  U   U R                  U   :  a  X!p!XR                  U'   U R                  U==   U R                  U   -  ss'   g r9   )r   r   r!   )rc   r   r   s      r#   joinUnionFind.join,  se    yy|TYYq\16H99Q<$))A,&qA		!		!$r%   r   N)
rg   rh   ri   rj   r   r   r   r   r   rl   rf   r%   r#   r   r     s9    '# )c )c )%c %c %r%   r   pass_configtracerc                   ^^ SSS[         0S.nUc  0 nUR                  U5        US   (       a  [        U 5      n US   (       a  [        U 5      n US   SL a  U $ [	        US   [
        5      (       d  [        S	5      eSUS   ;  a  [        S
5      eUS   S   nU" 5       nUR                  [        R                  " U 5      5      m[        R                  " UR                  T5        [        U R                  5       5      n " S S[        5      n[        TR                   5       GH  nUR"                  n	UR$                  S:X  a  XhR&                     n
[)        U
5      [*        ;   ay  UR,                  n	[/        U
R1                  5       S5      nUbP  UR2                  [4        R6                  :X  d   S5       eUR8                  [4        R8                  " S5      :X  d   S5       eOQUR$                  S:X  aA  UR&                  [*        ;   a  UR,                  n	O UR&                  [:        ;   a  UR<                  n	XR"                  :w  d  GM  XR<                  :X  a$  [?        S UR@                   5       5      (       d  GMQ  TRC                  U5         [        RD                  " UR@                  U4S j5      nSSS5        [G        [H        [        RJ                  RL                     W5      Ul         TRO                  U5         TRQ                  SSU45      nURS                  U5        U4Ul         SSS5        GM
     [U        [        TR                   5      U5      nUTl+        TR                    H  nUR$                  S:X  d  M  UR&                  S:X  d  M'  UR@                  S   n[        URX                  5      nU HI  nUR$                  S:X  d  M  UR&                  S:X  d  M'  URS                  U5        TR[                  U5        MK     []        URX                  5      S:X  d  M  TR[                  U5        M     []        TR                   5      n[_        U5      mU4S jn[a        TR                   5       GHC  u  nnUR$                  S:X  a*  UR&                  S:X  a  UUl1        TRe                  U5        MA  UR$                  S:X  aD  UR&                  S:X  a4  U" UR@                  S   5      c   eU" UR@                  S   5      Ul3        M  URh                   Vs/ s H7  n[	        U[        Rj                  5      (       d  M$  U" U5      c  M/  U" U5      PM9     nn[]        U5      S:X  a  M  [?        S U 5       5      (       a   e[m        U5      nUS   Ul7        USS  H  nTRq                  US   U5        M     GMF     [s        U4S j5      nTR                    H  n[u        US5      (       a7  UTRw                  URn                  5         R                   Ry                  U5        [u        US5      (       a7  UTRw                  URb                  5         Rz                  Ry                  U5        [u        US5      (       d  M  UTRw                  URf                  5         R|                  Ry                  U5        M     UR                  5        Hy  nU" U5      (       a  M  URz                  UR|                  -    H4  nUR@                  S   nURS                  U5        TR[                  U5        M6     [        UR                   Xn5        M{     SnTR                    H*  nUR&                  S:X  d  UR&                  S:X  d  M%  US-  nM,     [        R                  " [        5      R                  SU5        TR                  5         [        R                  " U T5      nU$ ! , (       d  f       GN"= f! , (       d  f       GM  = fs  snf ) a  
Performs a set of optimization passes to optimize a model for the
purposes of inference. Specifically, the passes that are run are:
1. Conv/BN fusion
2. Dropout removal
3. MKL layout optimizations

The third optimization takes a function `use_mkl_heuristic` that's used
to determine whether a subgraph should be explicitly run in MKL layout.

Note: As FX does not currently handle aliasing, this pass currently
assumes nothing aliases. If that isn't true, use at your own risk.
T	heuristic)conv_bn_fuser   mkldnn_layout_optimizeNr   r   r   Fz+mkldnn_layout_optimize config is not a dictz4Heuristic not found in mkldnn_layout_optimize configc                        \ rS rSrSrSrSrSrg)*optimize_for_inference.<locals>.MklSupportib  r   r      rf   N)rg   rh   ri   rj   NOYESUNKNOWNrl   rf   r%   r#   
MklSupportr   b  s    r%   r   r*   z)this pass is only for torch.float modulescpuz!this pass is only for CPU modulescall_functionc              3   >   #    U  H  oR                   S :H  v   M     g7f)r   N)r   ).0args     r#   	<genexpr>)optimize_for_inference.<locals>.<genexpr>  s     Iy::3ys   c                 *   > TR                  SU 45      $ )Nr   )call_method)r   r   s    r#   rx   (optimize_for_inference.<locals>.<lambda>  s    )=)=kA4)Pr%   r   r   r   r   c                    > [        U S5      (       a  TR                  U R                  5      $ [        U S5      (       a  TR                  U R                  5      $ g )Ncolorstart_color)hasattrr   r   r   )r   ufs    r#   	get_color)optimize_for_inference.<locals>.get_color  sF    1g77177##1m$$771==))r%   c              3   (   #    U  H  oS L v   M
     g 7fr9   rf   )r   r   s     r#   r   r     s     9j9js   r   c                     > [        T 5      $ r9   )r   )r   s   r#   rx   r     s
    H@Ur%   r   r   	end_colorzmkldnn conversions: %s)Fr   updater   r   r.   rK   RuntimeErrortracerF   rG   r/   rI   rootrL   r   r   r4   r   r1   r   r3   mkldnn_supportedr   next
parametersdtyperH   r   devicemkldnn_supported_unknownr   anyr,   inserting_beforemap_argr   rk   r'   r	   inserting_aftercreate_noderP   r   r   rN   rQ   r+   r   	enumerater   r   r   all_input_nodesr0   sortedr   r   r   r   r   appendr   r   valuesr   logging	getLoggerrg   infor~   )r<   r   r   default_pass_configr   
cur_tracerr(   r   r'   supports_mkldnnr   sample_parametermkldnn_argsdense_xr   prv_noderN   user	num_nodesr   cur_idxr   
cur_colorsother_colormkldnn_graphsrM   prvmkldnn_conversionsresultr   r   s                                @@r#   r   r   6  s   & #."?
 {+>*U+,u%34=)*BCTJJHII-.FGGQRR+,DEkRJe 45HNN:??H-$()<)<)>$?GT  X^^$$--77m# -JJ#33",..#'
(=(=(?#F #/(..%++=CBC=+22ell7  ;:;  WW'{{..",.. 88","4"4mm+"4"44ItyyIII**40 jjIIP 1
 U277#3#34kBDI))$/"..}j4'R**73 $w 0/? %J $D$8'BK&H 77m#z(Ayy|H$E77m+{0J..x8''-  4::!###D)  HNN#I	9	B$ #8>>277m#{(B&DKK WW%$++*CTYYq\*666&tyy|4DN ---Aa)  Q< 	!-   :!#9j99999
+J#ADJ)!"~
1{3  .) 3. -88U,VM4!!"''$**-.44;;DA4''"''$"2"234@@GGM4%%"''$..12<<CCDI  %%' ''))EOO;iil**3/##D) < %++w< ( ;;+%
)B!#  h$$%=?QRMMO^^E8,FMG 10 0/ds*   &_3._#_*/_*:_*
_	
_'	)FF)
   r   )LrF   r   operatorr   collectionsr   collections.abcr   enumr   typingr   r   r   rH   torch.fxr/   torch.nnr>   torch.nn.functional
functionalFtorch.utils.mkldnnutilsmkldnnr   torch.fx.noder	   r
   torch.fx.passes.shape_propr   torch.nn.utils.fusionr   r   __all__r2   rk   r$   r3   r0   rK   r   r   r   r   r   r   r   rA   rE   rB   ReLU	MaxPool2d	AvgPool2dAdaptiveAvgPool2drelu	transposesigmoid
avg_pool2dadaptive_avg_pool2dr   addmulr   MkldnnConv2dMkldnnLinearr   r   r   r   r   r   r   r   Tracerr   rf   r%   r#   <module>r2     s       # $  & &      & & * 0 H - -sCx -d^#%7759#s(^(4
''4 cN48=4%/ %/588?? %/P0")) 0		 0&22=2 M2 "'']	2. IIIINNGGLLLL	JJ	OO	MMFFLL & %LL(,,7 IIy%%IIy%%NN=
T"''] T#ryy.5I &L=L#ryy.!L bii*+L"+ ++\ +  $  % %: -1 iip88??p$sCx.)p Op XX__	pr%   