
    [ThD                        S SK r S SKrS SKrS SKJr  S SKJr  S SKrS SKJ	s  J
r  S SKJs  J
r  S SKJrJrJr  S SKJrJr  S SKJrJr  \ R2                  " \5      rS\R                  R8                  S\\R                  R8                     4S	 jrS
\R                  R8                  SS4S jrS\R                  R@                  SS4S jr!S\R                  R@                  S\S\"\#\R                  R8                  4   S\$\%\R                  R8                     \"\#\R                  R8                  4   4   4S jr&S\R                  R@                  S\%\R                  R8                     S\"\#\R                  R8                  4   S\RN                  RP                  S\#S\R                  R8                  4S jr)S\R                  R@                  S\S\R                  R8                  S\"\#\R                  R8                  4   S\$\R                  R8                  S4   SS4S jr*S\R                  R@                  S\"\#\RN                  RP                  4   S\"\#\4   S\R                  R@                  4S jr+S\R                  R@                  S\SS4S jr,S\S\"\#\RN                  RP                  4   S\R                  R@                  4S jr-g)     N)defaultdict)Optional)ConstantArgumentExportedProgramModuleCallSignature)legalize_graphNodeList)erase_nodesfuse_as_graphmodulenodereturnc                 p   [        U R                  R                  5       5      n[        5       nU H  nUR                  S:X  a  M  UR                  S:X  a  UR
                  [        R                  :X  d   SU  SU 35       eUR                  [        UR                  R                  5       5      5        M     U$ )Noutputcall_functionz"Expected getitem node as user for z, instead got )	listuserskeyssetoptargetoperatorgetitemupdate)r   
node_usersgetitem_usersusers       J/var/www/auris/envauris/lib/python3.13/site-packages/torch/export/_swap.py_get_getitem_usersr      s    djjoo'(JEM77h GG&4;;(:J:J+J	K/v^D6J	KJT$**//"345      curr_module_nodec                 ,   [         R                  SU 5        [        U R                  R	                  5       5      n[        U5      S:X  d   S[        U5       35       eUS   nUR                  S:X  a  UR                  [        R                  :X  d   e[        U5      n[        U5      S:w  a  [         R                  SUU5        g[        [        U5      5      nUR                  S:X  a  UR                  [        R                  :X  d  [         R                  SUU5        g[        UR                   S   5       H  u  pVXbR                  ;  a&  [         R                  S	U UR                   S   5          gUR                  S:X  a3  UR                  ["        R$                  :X  a  UR                   S   U:X  a  M}  [         R                  S
U UR                   S   5          g   ['        5       n[        U5      nU H5  n	UR)                  [        U	R                  R	                  5       5      5        M7     [        U5      S:w  a  [         R                  SUU5        g[        [        U5      5      n
U
R                  S:X  d  [         R                  SUU
5        gU 4U
l        g)a  
We want to try to remove extraneous pytree flatten/unflatten calls between modules
calls. Instead of having the following:
graph():
    ...
    %foo : [num_users=1] = call_module[target=foo](args = (%getitem_1, %getitem_2), kwargs = {})
    %tree_flatten_spec : [num_users=1] = call_function[target=torch.fx._pytree.tree_flatten_spec](args = (%foo, %_spec_1), kwargs = {})
    %getitem_4 : [num_users=1] = call_function[target=operator.getitem](args = (%tree_flatten_spec, 0), kwargs = {})
    %tree_unflatten_1 : [num_users=2] = call_function[target=torch.utils._pytree.tree_unflatten](args = ([%getitem_4], %_spec_2), kwargs = {})
    %getitem_5 : [num_users=1] = call_function[target=operator.getitem](args = (%tree_unflatten_1, 0), kwargs = {})
    %getitem_7 : [num_users=0] = call_function[target=operator.getitem](args = (%tree_unflatten_1, 1), kwargs = {})
    %getitem_6 : [num_users=1] = call_function[target=operator.getitem](args = (%getitem_5, 0), kwargs = {})
    %bar : [num_users=1] = call_module[target=bar](args = (%getitem_6,), kwargs = {})
    ...

We could do the following, if we know that all the outputs of `foo` feed into `bar`:
graph():
    ...
    %foo : [num_users=1] = call_module[target=foo](args = (%getitem_1, %getitem_2), kwargs = {})
    %bar : [num_users=1] = call_module[target=bar](args = (%getitem_6,), kwargs = {})
    ...

Currently this optimization only works for the case where all of the outputs
of `foo` go directly into `bar`, and `bar` has no other inputs.
z+Trying to remove pytrees for module call %s   z4Expected only one user for module node, instead got r   r   zaMore than one user found for flatten node, %s: %s. Unable to fuse it with another unflatten call.NzTFlatten node %s's user is not a pytree.tree_unflatten. Instead it is: %s. Passing...zModule %s's outputs are not all directly used as inputs to the subsequent module. Unable to fuse the connecting flatten/unflatten. The inputs to the subsequent module are: %s. zModule %s's outputs are not all directly used in the same order as outputted. Unable to fuse the connecting flatten/unflatten. The inputs to the subsequent module are: %s. zaMore than one user found for unflatten node, %s: %s. Unable to fuse it with another flatten call.call_modulezLUnflatten node %s's user is not a call_module. Instead it is: %s. Passing...)logdebugr   r   r   lenr   r   	fx_pytreetree_flatten_specr   nextiterpytreetree_unflatten	enumerateargsr   r   r   r   )r    curr_module_usersflatten_nodeflatten_getitem_usersunflatten_nodeiargunflatten_getitem_getitem_usersunflatten_getitem_usersunflatten_getitem_usernext_module_nodes              r   _try_remove_connecting_pytreesr9   $   s|   6 II;=MN-3388:;!#X	=dCT>U=VWX#$Q'L?*9#>#>>	? /|<
 !Q&		=!		
 	$456N_,!!V%:%::		,		
 	N//23(((IIS !##A&  FFo%

h...q II. !##A& 1 46 '*e#0@"9'..'--2245	
 #:
 *+q0		;+		
 	D!@AB=0		,		
 	 ./r   gmc                     U R                   R                   H   nUR                  S:X  d  M  [        U5        M"     U R                   R	                  5         g)a  
Remove extraneous pytree flatten/unflatten calls.

We try a couple of optimizations here:
    1. Remove pytree flatten/unflatten calls between modules
    2. TODO: Remove module's in_spec + initial unflatten call
    3. TODO: Remove module's out_spec + final flatten call
r#   N)graphnodesr   r9   eliminate_dead_code)r:   r   s     r   _remove_extraneous_pytreesr?      s<     77m#*40  HH  "r   	signaturenode_name_mapc           	         / nUR                    Hx  n[        U[        5      (       a   UR                  c  UR	                  S 5        M8  UR
                  U;  a  UR	                  S 5        M[  UR	                  X$R
                     5        Mz     SSKJn  U" XUR                  5      nUR                  R                  S:X  d   eUR                  R                  S   nUR                  b   eU R                  R                  [        R                  US45      n[!        UR                  5       V	s/ s H.  n	U R                  R                  [        R                  X45      PM0     n
n	UR                  R                  S   nUR                  c   eU R                  R                  [        R                  US45      nUR                   Vs0 s H.  nXR                  R                  [        R                  X45      _M0     nnX4$ s  sn	f s  snf )Nr"   )_generate_unflatten   r   )inputs
isinstancer   valueappendname	unflattenrC   in_specnum_childrenchildren_specscontextr<   r   r   r   range)r:   r@   rA   tree_unflatten_argsinput_rC   r2   	args_spec	args_noder3   
args_nodeskwargs_speckwargs_nodekkwargs_nodess                  r   _construct_inputsrY      s   
 :<""f.//FLL4H  &&t,[[-&&t,&&}[['AB # /()BSBSTN))Q...!!003I$$$&&x'7'7.!9LMI y--..A 	x//)@.  
 ##2215K***(((()9)9NA;NOK $$$A 	
88!!("2"2[4DEE$   ##s   +5G>5HrT   rX   module_to_swaprI   c                     SSK JnJn  U" X0XFR                  5        U R                  R                  U[        U5      U5      nU$ )Nr"   )_assign_attr	_AttrKind)rJ   r\   r]   MODULEr<   r#   tuple)r:   rT   rX   rZ   rI   r\   r]   module_nodes           r   _insert_call_modulera      s:     3T+;+;<((&&tU:->MKr   r`   orig_outputs.c                     SSK Jn  U" XUR                  5      n[        U5       HO  u  px[        R
                  R                  U5      U   R                  n	UR                  U	SS9  XUR                  '   MQ     g )Nr"   )_generate_flatten_specT)propagate_meta)
rJ   rd   out_specr-   torchfxProxyr   replace_all_uses_withrI   )
r:   r@   r`   rA   rb   rd   r0   r3   orig_output	proxy_outs
             r   _deconstruct_outputsrm      sh     2)"9;M;MNL#L1HHNN<0388	)))D)I*3k&&' 2r   modules_to_swapmodule_call_graphc                    [         R                  S5        [         R                  U R                  5        [        U 5        [	        [
        5      nU R                  R                   Vs0 s H  oDR                  U_M     nnU R                  R                   H[  nUR                  R                  S5      =n(       d  M'  UR                  5        H   u  pxXq;   d  M  X7   R                  U5          MY     M]     UR                  5        H  u  p U	R                  SS5      n[        X
SU 35      u  pn[         R                  S5        [         R                  UR                  5        X)   n[        XU5      u  nn[!        U UUX   U	5      n[#        XUX^5        [%        X
5        [         R                  S5        [         R                  U R                  5        M     [        U 5        [         R                  S5        [         R                  U R                  5        ['        U 5        [         R                  S	5        [         R                  U R                  5        U R)                  5         U $ s  snf )
NzStarting graph:nn_module_stack._fused_zFused subgraph nodes:zSwapped graph:z#Before removing extraneous pytrees:z"After removing extraneous pytrees:)r$   r%   r<   r   r   r   r=   rI   metagetvaluesrH   itemsreplacer   rY   ra   rm   r
   r?   	recompile)r:   rn   ro   
partitionsr   rA   rq   pathrs   rI   r=   submod_namesub_gmorig_inputsrb   r@   rT   rX   r`   s                      r   _swap_module_helperr      s   
 II IIbhh2&1$&7J %'HHNN/$2D		4N  /
 "iimm,=>>?>*113*$++D1 4  "'')(	T ll3,,?}--
)\ 			)*		&,,):)@	#4RM#R 
L)
L/*?
 	RKUB		"#		"((} *@ 2II34IIbhhr"II23IIbhhLLNIo/s   'I&c                    SSK JnJn  [        R                  R
                  R                  5       U R
                  l        U R
                  R                   Vs/ s H  oDR                  S:X  d  M  UPM     nn/ nUR                  nUc  / nUR                  R                  S:X  d   eUR                  R                  S   nUR                  R                  S   n	UR                  [        :X  d   eU	R                  [         :X  d   e[#        UR                  5       H  n
UR%                  SU
 35        M     UR'                  U	R(                  5        U HT  nU R
                  R+                  US   5         UR%                  U R
                  R-                  U5      5        SSS5        MV     U R
                  R+                  US   5         U" U [        U5      5      n[/        U5       H)  u  pSUl	        [0        R2                  Ul        X4Ul        M+     SSS5        [9        S	 U R
                  R                   5       5      nU R
                  R+                  U5         U" XR6                  S   UR:                  5      nU4Ul        SSS5        U R=                  5         gs  snf ! , (       d  f       GMa  = f! , (       d  f       N= f! , (       d  f       NH= f)
aH  
Given the unlifted module from calling ep.module(), we want to remove the
pytree processing from the graph module's PyTreeCodeGen and instead make it
nodes inside of the graph. This allows us to do some optimizations, like
remove these pytree calls if it is unnecessary, and makes the PyTree part
more obvious to graph passes.
r   )_generate_flattenrC   placeholderNrD   r"   arg_r   c              3   H   #    U  H  oR                   S :X  d  M  Uv   M     g7f)r   N)r   ).0r   s     r   	<genexpr>._fix_input_output_signature.<locals>.<genexpr>  s     N''X:Mtts   "	")torch.export.unflattenr   rC   rg   rh   r<   CodeGen_codegenr=   r   forward_arg_namesrK   rL   rM   typer_   dictrO   rH   extendrN   inserting_beforer   r-   r   r   r   r.   r)   rf   rz   )r:   r@   r   rC   r   old_placeholdersnew_placeholdersr   arg_spec
kwarg_specr3   r4   	flat_nodeold_placeholderoutput_nodeunflats                   r   _fix_input_output_signaturer   ]  sB    N ..0BHH)+T77m;ST!33   --222$$33A6&&55a8
}}%%%$&&&x,,-A$$tA3Z0 .  !3!34 XX&&'7':;##BHH$8$8$=> <; !
 
	"	"#3A#6	7%b%0@*AB	"+,<"=A!0O%-%5%5O"$->O  #> 
8 NNNK		"	";	/$R)9)9!)<i>P>PQ"9 
0 LLNC U  <; 
8	7 
0	/s1   J/J2+J"AJ5)K"
J2	5
K
Kepc                    U R                    Vs0 s H,  o"R                  (       d  M  UR                  UR                  _M.     nnU R                  5       nSUl        UR
                  R                  5         [        U[        R                  R                  5      (       d   e[        X@R                   S   R                  5        U R                   Ul         [        R                  " [        U5      R                  U5      Ul        [        R                  " [        U5      R                   U5      Ul        [        U[        R                  R                  5      (       d   e[#        XAU5      nU$ s  snf )a&  
Unlifts the given ExportedProgram into a fx.GraphModule, and then swaps
previously traced modules with new eager modules specified. Returns a
fx.GraphModule with a custom forward function.

Args:
    ep (ExportedProgram): Exported program to modify
    modules_to_swap (Dict[str, torch.nn.Module]): Mapping from module fqn to
        eager module to swap with. The specified module fqn should have also
        been specified in the `preserve_module_call_signature` argument to
        torch.export so that we know how to restore the calling convention
        to this argument.
    run_with_interpreter: Whether or not to run the graph using
        fx.Interpreter. Setting to true will help result in better error
        messages and easier debugging, but it has found to result in a QPS
        drop.
Fr   )ro   r@   fqnmodulevalidate_inputsr<   r>   rF   rg   rh   GraphModuler   types
MethodTyper   trainevalr   )r   rn   entryro   r:   s        r   _swap_modulesr     s   * 130D0D0Du"		5??"0D   
BBHH  "b%((..////$8$8$;$E$EF//BR3BHtBx}}b1BGb%((..////	R2C	DBI#s
   E*E*).loggingr   r   collectionsr   typingr   rg   torch.fx._pytreerh   _pytreer'   torch.utils._pytreeutilsr+   torch.export.exported_programr   r   r   torch.fx.passes.tools_commonr   r	   !torch.fx.passes.utils.fuser_utilsr
   r   	getLogger__name__r$   Noder   r   r9   r   r?   r   strr_   r   rY   nnModulera   rm   r   r   r    r   r   <module>r      s      #   $ $ $ $ 
 B N !UXX]] s588==/A t0UXX]] t0t t0n#588#7#7 #D #"'$'$"'$ UXX]]*+'$ 4S%((--%7 889	'$TUXX]]# sEHHMM)* HHOO	
  XX]]44"4 4 UXX]]*+	4
 s*+4 
4&cc#uxx./c C!445c XX	cL11)<1	1h%%*.sEHHOO/C*D%
XX%r   