
    [Th3                    B   % S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	J
r
  S SKJrJr  S SKJrJrJrJr  S SKrS SKrS SKrS SKJr  S SKJs  Jr  S SKJr  S SKJr  S SK J!r!J"r"  S SK#J$r$J%r%  S S	K&J'r'J(r(J)r)J*r*  S S
K+J,r,  S SK-J.r.  S SK/J0r0  SSK1J2r2  SSK3J4r4  SSK5J6r6J7r7J8r8  SSK9J:r:  SSK;J<r<  SSK=J>r>J?r?  SSK@JArAJBrB  \(       a  S SKCrC\2R                  rE\F\GS'   \R                  " \I5      rJ\R                  \GS'   \R                  R                  rM\R                  R                  rN\ " S S5      5       rO\ " S S5      5       rP\ " S S5      5       rQS\R                  S\F4S jrSS \R                  S\F4S! jrUS \R                  S\F4S" jrVS\R                  S\W4S# jrX " S$ S%5      rY\Y" 5       rZ SnS&\R                  S'\\\R                     S(\\\R                     S)\\]   S\R                  4
S* jjr^S\R                  S\F4S+ jr_S\R                  S\F4S, jr`S\R                  S\F4S- jraS\R                  S\F4S. jrbS\R                  S\F4S/ jrcS\R                  S\F4S0 jrdS\R                  S\F4S1 jreS\R                  S\F4S2 jrfS3\R                  S\g\\\R                     \\\R                     4   4S4 jrhS5\\\R                     S6\]4S7 jriS3\R                  S5\\\R                     S8\\\R                     S9\WS\g\R                  \R                  4   4
S: jrjS3\R                  S\g\R                  \R                  4   4S; jrk\W" S<5      rlS=\WS\W4S> jrmS\R                  S\W4S? jrnS@\R                  4SA jro\R                  " S5      SB 5       rqSC\r\R                  \W4   S\\\g\R                  \W4      4SD jrsSE\R                  S\R                  4SF jrtSG\R.                  R                  SH\R.                  R                  SI\R.                  R                  SJ\R.                  R                  SK\R                  SL\WSM\R.                  R                  SN\R.                  R                  4SO jrvS3\R                  SG\R                  SH\R                  SP\WS\g\R                  \R                  4   4
SQ jrwS3\R                  S\R                  4SR jrx SnS&\R                  SS\PST\QSU\\.\R                        4SV jjrySW rzS\O4SX jr{S@\R                  4SY jr|S&\R                  SZ\\\}   S[\\\}   S\\}SS\PS]\\\R                     S\g\}\\\W   \\\W   4   4S^ jr~S S_KJr  S`\GR                  Sa\WS\GR                  4Sb jrSc r SoS&\R                  SS\PS\\\R                     4Sd jjr SpS3\R                  S\g\R                  \R                  4   4Se jjr     SqSf\R.                  R                  Sg\]Sh\]Si\FSj\\\]\\\]   4      Sk\FSl\\]   SS4Sm jjrg)r    Ndefaultdict)	dataclassreplace)CallableOptionalTYPE_CHECKINGUnion)(create_structured_trace_for_min_cut_info)BackwardState)is_sym_nodepy_sym_types)magic_methodsmethod_to_operator)find_symbol_binding_fx_nodesfree_symbolshint_intis_symbol_binding_fx_node)graph_drawer)
OrderedSet)CheckpointPolicy   )config)GraphInfoProvider)dp_knapsackgreedy_knapsackilp_knapsack)KnapsackEvaluator)get_aot_graph_name)get_cuda_generator_meta_valis_with_effects)fx_graph_cseget_aten_targetAOT_PARTITIONER_DEBUGlogc                      \ rS rSr% Sr\\   \S'   \\   \S'   \\   \S'   \\   \S'   \\   \S'   S\R                  4S	 jr
S\R                  4S
 jrS\R                  4S jrS\R                  4S jrS\R                  4S jrSrg)OpTypes<   z8Class for keeping track of different operator categoriesfusible_opscompute_intensive_ops
random_opsview_opsrecomputable_opsnodec                 2    [        U5      U R                  ;   $ N)r#   r)   selfr.   s     U/var/www/auris/envauris/lib/python3.13/site-packages/torch/_functorch/partitioners.py
is_fusibleOpTypes.is_fusibleF   s    t$(8(888    c                 2    [        U5      U R                  ;   $ r0   )r#   r*   r1   s     r3   is_compute_intensiveOpTypes.is_compute_intensiveI   s    t$(B(BBBr6   c                 2    [        U5      U R                  ;   $ r0   )r#   r+   r1   s     r3   	is_randomOpTypes.is_randomL   s    t$77r6   c                 2    [        U5      U R                  ;   $ r0   )r#   r,   r1   s     r3   is_viewOpTypes.is_viewO   s    t$55r6   c                 2    [        U5      U R                  ;   $ r0   )r#   r-   r1   s     r3   is_recomputableOpTypes.is_recomputableR   s    t$(=(===r6    N)__name__
__module____qualname____firstlineno____doc__r   r   __annotations__fxNoder4   r8   r;   r>   rA   __static_attributes__rC   r6   r3   r'   r'   <   s    BH%%%h//8$$"" **9rww 9C C8bgg 86BGG 6>BGG >r6   r'   c                      \ rS rSr% \\R                     \S'   \\R                     \S'   \\R                     \S'   \\R                     \S'   \	\R                  \
4   \S'   \R                  S\\R                     4S j5       rS	\R                  S\4S
 jrS	\R                  S\4S jrS	\R                  S\4S jrS	\R                  S\
4S jrSrg)NodeInfoV   inputs_required_fw_nodesrequired_bw_nodesunclaimed_nodesfw_orderreturnc                 B   ^  [        S T R                   5       U 4S jS9$ )Nc              3   $   #    U  H  ov   M     g 7fr0   rC   .0ns     r3   	<genexpr>-NodeInfo.required_fw_nodes.<locals>.<genexpr>c   s     0/1Q/s   c                 "   > TR                   U    $ r0   )rT   )rZ   r2   s    r3   <lambda>,NodeInfo.required_fw_nodes.<locals>.<lambda>c   s    a@Pr6   key)sortedrQ   r2   s   `r3   required_fw_nodesNodeInfo.required_fw_nodes`   s!    0//06P
 	
r6   rZ   c                     XR                   ;   $ r0   )rQ   r2   rZ   s     r3   is_required_fwNodeInfo.is_required_fwf   s    ++++r6   c                     XR                   ;   $ r0   )rR   rg   s     r3   is_required_bwNodeInfo.is_required_bwi   s    ****r6   c                     XR                   ;   $ r0   )rS   rg   s     r3   is_unclaimedNodeInfo.is_unclaimedl   s    ((((r6   c                 T    XR                   ;   d   SU S35       eU R                  U   $ )NNode z not in fw nodes!)rQ   rT   rg   s     r3   get_fw_orderNodeInfo.get_fw_ordero   s2    +++IuQC7H-II+}}Qr6   rC   N)rD   rE   rF   rG   listrJ   rK   rI   r   dictint	functoolscached_propertyrd   boolrh   rk   rn   rr   rL   rC   r6   r3   rN   rN   V   s     M"277++!"''**((277C<  
4= 
 

, ,D ,+ +D +)bgg )$ ) bgg  #  r6   rN   c                   H    \ rS rSr% \\S'   \\S'   \\S'   \\S'   \\S'   Srg)	MinCutOptionst   ban_if_used_far_apartban_if_long_fusible_chainsban_if_materialized_backwardban_if_not_in_allowlistban_if_reductionrC   N)rD   rE   rF   rG   ry   rI   rL   rC   r6   r3   r{   r{   t   s      $$"&&!!r6   r{   r.   rU   c                 |    U R                   R                  SS 5      [        R                  [        R                  4;   $ )N	recompute)metagetr   MUST_RECOMPUTEPREFER_RECOMPUTEr.   s    r3   must_recomputer   }   s5    99==d+''))0  r6   fx_gc                 b    U R                   R                   H  n[        U5      (       d  M    g   g)NTF)graphnodesr   r   r.   s     r3   has_recomputable_opsr      s)    

  $ ! r6   c                    U R                   R                   Hf  n[        U5      (       d  M  [        UR                  S5      (       d  M2  [
        R                  R                  UR                  R                  ;   d  Mf    g   g)NtagsTF)	r   r   r   hasattrtargettorchTagnondeterministic_seededr   r   s     r3   has_recomputable_rng_opsr      sV    

  4  V,,		11T[[5E5EE ! r6   c                     [        U R                  S   [        R                  [        R                  45      (       a  g[        U R                  S   [        R
                  5      (       d   eg)Nvalr      )
isinstancer   r   SymIntSymBoolSymFloatr   s    r3   sym_node_sizer      sK    $))E"U\\5==$ABBdii&7777r6   c                       \ rS rSrS rSrg)InvalidNodeBase   c                     g)NzInvalid NoderC   rc   s    r3   __repr__InvalidNodeBase.__repr__   s    r6   rC   N)rD   rE   rF   rG   r   rL   rC   r6   r3   r   r      s    r6   r   joint_graphrP   outputssubgraphc                   ^
 [         R                  " 5       n0 m
U H4  nUR                  UR                  5      nUR                  Ul        UT
U'   M6     U R
                   GH;  n[        U5      (       a  US:w  a  [        T
U'   M%  UT
;   a  M-  UR                  S:X  a  [        T
U'   MH  UR                  S:X  a  [        R                  " UR                  0 UR                  D6nU Vs/ s H7  n[        U[         R                  5      (       d  M$  [        T
U   [        5      PM9     nn[!        U5      (       a  [        T
U'   M  UR#                  UU
4S j5      T
U'   M  UR                  S:X  a  UR#                  UU
4S j5      T
U'   GM(  UR                  S:X  d  GM;  GM>     / n	U H  n[        U[         R                  5      (       aN  UT
;  a  [%        SU S	35      e[        T
U   [        5      (       a   SU S
35       eU	R'                  T
U   5        Mp  U	R'                  U5        M     UR)                  [+        U	5      5        UR-                  5         UR/                  5         U$ s  snf )au  
Given a graph, extracts out a subgraph that takes the specified nodes as
inputs and returns the specified outputs.

This includes specifying non-placeholder nodes as inputs.

The general strategy is to initialize all inputs with proxies as we
encounter them, and trace through the graph, only keeping values which take
in valid proxies. Then, all dead code is eliminated.
backwardplaceholdercall_functionc                    > TU    $ r0   rC   xenvs    r3   r^   4_extract_graph_with_inputs_outputs.<locals>.<lambda>   	    CFr6   get_attrc                    > TU    $ r0   rC   r   s    r3   r^   r      r   r6   outputrq   z couldn't be found in envz was invalid, but is output)rJ   Graphr   namer   r   _must_be_in_backwardInvalidNodeoppytreearg_tree_leavesargskwargsr   rK   r   any	node_copyRuntimeErrorappendr   tupleeliminate_dead_codelint)r   rP   r   r   	new_graphr.   new_nodeall_argsr   output_valuesr   s             @r3   "_extract_graph_with_inputs_outputsr      s(     
I
C ((3		D		  !!%%(j*@#CI3; WW%#CIWW'--tyyHDKKHH "!Aa) 4
3q6?3!  
 8}}'D	!++D2BCCIWW
"!++D2BCCIWW 5 "6 Ma!!|"U1#-F#GHH!A  6qc456    Q(  #  U=)*!!#NN9s   %#I$I$c                     U R                   S:H  =(       aF    S[        U R                  5      ;  =(       a'    [        U 5      (       + =(       a    [	        U 5      (       + $ Nr   tangents)r   strr   _is_bwd_seed_offset_is_fwd_seed_offsetr   s    r3   
_is_primalr      sK    =  	*c$++..	*#D))	* $D))	r6   c                 ^    U R                   S:H  =(       a    S[        U R                  5      ;   $ r   r   r   r   r   s    r3   _is_tangentr      s$    77m#F
c$++6F(FFr6   c                     U R                   S:H  =(       a7    S[        U R                  5      ;   =(       d    S[        U R                  5      ;   $ )Nr   bwd_seedbwd_base_offsetr   r   s    r3   r   r      =    77m# c$++&&O*;s4;;?O*Or6   c                     U R                   S:H  =(       a7    S[        U R                  5      ;   =(       d    S[        U R                  5      ;   $ )Nr   fwd_seedfwd_base_offsetr   r   s    r3   r   r      r   r6   c                     U R                   S:H  =(       a)    [        U R                  R                  S5      [        5      $ )Nr   r   )r   r   r   r   r   r   s    r3   _is_backward_stater     s*    77m#W
499==3G(WWr6   c                 @    U R                   R                  SS 5      S:H  $ )Npartitioner_tagis_backwardr   r   r   s    r3   _has_tag_is_backwardr   	  s    99==*D1]BBr6   c                 @    U R                   R                  SS 5      S:H  $ )Nr   must_be_in_backwardr   r   s    r3   _has_tag_must_be_in_backwardr     s    99==*D15JJJr6   c                 `    [        U 5      =(       d    [        U 5      =(       a    [        U 5      $ r0   )r   r   r!   r   s    r3   r   r     s&    '- T"<t'<r6   joint_modulec                |    [         R                  " S U R                  R                  SS9 5       6 nUS U nX!S  nX44$ )Nc              3   8   #    U  H  oR                   v   M     g 7fr0   )r   rY   r.   s     r3   r[   +_extract_fwd_bwd_outputs.<locals>.<genexpr>  s     	K J)) J   r   r   )r   r   r   
find_nodes)r   num_fwd_outputsr   fwd_outputsbwd_outputss        r3   _extract_fwd_bwd_outputsr     sS     $$	K 2 2 = = = J	KG *?+K*+K##r6   saved_valuesr   c                 \    U  H&  nUR                   U:X  d  M  U R                  U5          g    g r0   )r   remove)r   r   saved_values      r3   _remove_by_namer   "  s+    #t#, $r6   saved_sym_nodesr   c                z   [        XS9u  pEU R                  R                  SS9n/ [        [        U5      Qn/ [        [
        U5      Qn/ [        [        U5      Qn	/ [        [        U5      Qn
/ [        [        U5      Qn[        U R                  X!-   U-   U
-   US5      nUR                  SS9 Hp  nUR                  (       d,  [        XR                  5        [        X-R                  5        M@  [        U5      (       d  MR  [        XR                  5        U(       a  Mp   e   [        5       n/ n/ nU HJ  n[        U5      nU(       a$  UR                  U5        UR!                  U5        M9  UR!                  U5        ML     [#        U R                  5      n[$        R&                  " UX5       Hc  nSUR(                  ;  a  M  [+        UR(                  S   5      U-
  n[-        US S9 H  nUU;  a  M  UR!                  UU   5        M!     UU-  nMe     UR/                  5         UR1                  UU-   5        [        U R                  Xy-   XA-   U-   S5      n[        U R                  UU-   U-   U
-   U-   US5      n[2        R4                  R7                  U U5      n[2        R4                  R7                  X5      nUU4$ )	Nr   r   r   r   r   c                     U R                   $ r0   r   )ss    r3   r^   *_extract_fwd_bwd_modules.<locals>.<lambda>e  s    166r6   r`   forward)r   r   r   filterr   r   r   r   r   r   usersr   r   r   r   addr   r   	itertoolschainr   r   rb   clearextendrJ   _lazy_graph_module_make_graph_module)r   r   r   r   r   r   placeholdersprimal_inputstangent_inputsfwd_seed_offset_inputsbwd_seed_offset_inputsbackward_state_inputs	bwd_graphr.   saved_symbolssaved_sym_nodes_bindingsaved_sym_nodes_derivedsymbolsymbol_bindingsnew_symbolsr   	fwd_graph
fwd_module
bwd_modules                           r3   _extract_fwd_bwd_modulesr  )  s     8 K  %%00M0BL7fZ67M9vk<89NIv&9<HIIv&9<HIGf%7FG2&7:PP	I $$$6zzL))4OYY7%%L))4(((( 7 /9lM    *40f%#**40#**40   3<3E3EFO 7V		!"499U#34}D)9:A '#**?1+=> ; 	$ W" 25LLM 3."_4	I 3
	
	 !	!  		 
 		I &&99,	RJ&&99,RJz!!r6   c                   [        U 5      (       a
  [        XUS9$ [        [        [        U R
                  R                  5      5      n[        [        [        U R
                  R                  5      5      nX4-   n[        XS9u  pg[        U R
                  XVS5      n[        S UR                   5       5      n	/ n
/ nU R
                  R                   GH	  nUR                  U	;  a  M  [        U5      (       a  UR                  U5        M9  SUR                  ;  aH  UR                  S:X  a8  UR                   n[#        S U 5       5      (       d   eU
R%                  U5        M  UR                    Vs/ s H  oR                  U	;  d  M  UPM     nnSUR                  ;   a*  [#        S U 5       5      (       a  UR%                  U5        M  U
R                  U5        GM     [        [&        R)                  U
5      R+                  5       5      n
[        [&        R)                  U5      R+                  5       5      n[-        U U
UUS9$ s  snf )	a  
Partitions the :attr:`joint_module` in a manner that closely resembles the
behavior observed in the original ``.forward()`` and ``.backward()`` of the
callable, i.e., the resulting forward graph contains those operators that
are executed in the original ``.forward()`` callable passed to
:func:`aot_function`.

The default partitioner collects the operators that are between the forward
inputs and the forward outputs. This helps in finding the tensors which have
to be stashed for the backward pass. These stashed tensors become the output
of the generated forward graph. The remaining operators are then placed in
the backward graph.

.. warning::
    This API is experimental and likely to change.

Args:
    joint_module(fx.GraphModule): The joint forward and backward graph. This
        is the result of AOT Autograd tracing.

Returns:
    Returns the generated forward and backward Fx graph modules.
r   r   c              3   \   #    U  H"  oR                   S :w  d  M  UR                  v   M$     g7fr   Nr   r   r   s     r3   r[   $default_partition.<locals>.<genexpr>  s#      $6d''X:M			6   ,,tensor_metar   c              3   Z   #    U  H!  oR                   [        R                  :H  v   M#     g 7fr0   )r   operatorgetitemrY   users     r3   r[   r    s     I54{{h&6&665s   )+c              3   8   #    U  H  n[        U5      v   M     g 7fr0   r   rX   s     r3   r[   r    s      2(71Ar   r   r   )r   #min_cut_rematerialization_partitionrt   r   r   r   r   r   r   r   r   r   r   r   r   r   r  allr  ru   fromkeyskeysr  )r   _joint_inputsr   r
  r  rP   r   r   forward_only_graphforward_node_namesr   r   r.   r  rZ   backward_usagess                   r3   default_partitionr1    s   4 L))2
 	
 
L,>,>,D,DEFM!&)<l>P>P>V>V"WX3F7 K <F $ $066$  LO""((99..t ""4($))+?0JJJEI5IIIII&  ::%a7I)I:   		)c 2(72 / /  &&7##D); )< l388:;L4==9>>@AO#''	 )s   0IIg    .Anumelc                     XR                   -  $ r0   )itemsize)r2  dtypes     r3   _tensor_nbytesr6    s    >>!!r6   c                   ^ S[         4S jmSU R                  ;   a  U R                  S   n[        U[        5      (       a  g[        U[        [
        45      (       a  [        U4S jU 5       5      $ [        U[        5      (       a#  [        U4S jUR                  5        5       5      $ [        U[        R                  5      (       a  T" U5      $ [        S[        U5       SU  35      eU R                  S	:X  d;  U R                  [        R                  R                   R"                  R$                  L a  g
[        SU  S35      e)NrU   c                     [        U [        R                  5      (       d  g[        [	        U R                  5       SS9U R                  5      $ )Nr      fallback)r   r   Tensorr6  r   r2  r5  r   s    r3   object_nbytes_size_of.<locals>.object_nbytes  s4    !U\\**hqwwy4@!''JJr6   r   r   c              3   4   >#    U  H  nT" U5      v   M     g 7fr0   rC   )rY   rZ   r>  s     r3   r[   _size_of.<locals>.<genexpr>  s     5A}Q''   c              3   8   >#    U  H  u  pT" U5      v   M     g 7fr0   rC   )rY   _rZ   r>  s      r3   r[   rA    s     @KDA}Q''Ks   zUnknown metadata type z	 on node r   r   rq   zO didn't have `val` metadata; we should always have `val` metadata on the nodes.)rv   r   r   r   rt   r   sumru   itemsr   r<  r   typer   r   opsaten_assert_scalardefault)r.   r   r>  s     @r3   _size_ofrL    s   KC K
 		iic<(( dE]++5555T""@CIIK@@@U\\** %%3DI;ivNOOww*uyy~~/L/L/T/T T

vde r6   r   c           	         SSK Jn  U" [        5      nU R                   H5  nUR                  S:X  d  M  X#R
                  R                  ==   S-  ss'   M7     [        R                  S[        UR                  5       S SS95        g )	Nr   r   r   r   z%sc                     U S   $ Nr   rC   r=  s    r3   r^   _count_ops.<locals>.<lambda>  s    QqTr6   Tra   reverse)collectionsr   rv   r   r   r   rD   r%   inforb   rF  )r   r   cntr.   s       r3   
_count_opsrV    s`    '%c*C77o%$$%*%  HHT6#))+>4HIr6   c                     / n [        [        R                  R                  5       H  n[	        [        R                  R                  U5      n[        U[        R                  R                  5      (       d  MR  UR                  5        HJ  n[	        X#5      n[        R                  R                  UR                  ;   d  M8  U R                  U5          M     M     U $ r0   )dirr   rH  rI  getattrr   _opsOpOverloadPacket	overloadsr   	pointwiser   r   )rH  	attr_nameopoverloadpacketoverloadop_overloads        r3   pointwise_opsrb    s    
C(	"599>>9=*EJJ,G,GHH(224H!"2=Kyy""k&6&66

+, 5 ) Jr6   	depth_mapc                     U  Vs0 s H=  n[        U[        R                  R                  R                  5      (       d  M8  X!U   _M?     nn[        UR                  5       S SS9$ s  snf )Nc                     U S   $ rO  rC   r=  s    r3   r^   sort_depths.<locals>.<lambda>&  s    AaDr6   TrQ  )r   r   rJ   r.   rK   rb   rF  )r   rc  arg
arg_depthss       r3   sort_depthsri  "  s^    '+'+z#uxx}}?Q?Q/Rs^t   *""$.$GGs   7A% 	A%gmc                   ^
^^ [         R                  " 5       m0 m
U R                  R                  SS9 H  nTR	                  UU
4S j5      T
U'   M     0 m[        U R                  R                  5       H
  u  p!UTU'   M     U
UU4S jn[        [        [        U R                  R                  5      5      nSn[        R                  nU H(  nUR                   H  nTU   U:  d  M  TU   nUnM     M*     Uc  U $ [        U R                  R                  5      TU   S  H  nU" U5        M     [        R                   R                  U T5      n	U	$ )a{  
This pass finds the first bwd node in the graph (by looking at users of
tangents) and then reorders the graph by walking from this node to all the
way to the end of the graph. At each op in this traveral, we insert this op
in a new graph and try to bring only the relevant subgraph from the other
non-bwd edges relevant for this op. This closely mimics the behavior of
autograd engine.

Why is this pass required in the first place?

This is an artifact of how partitioners work today. The starting point of
partitioner is a joint graph, which is fwd and then bwd graph. In the case
of checkpointing, we keep portions of fwd graph in their original place in
the joint graph, while obtaining a bwd graph. As a result, the resulting bwd
graph has copies of recomputed fwd subgraphs followed by the original bwd
graph. If we run this naively, this leads to bad memory footprint, because
the fwd subgraphs are live for way longer duration than necessary. This pass
reorders the operations such that we prioritize the ops for the original bwd
graph while only realizing those ops from the fwd graph that are necessary
at any given point in the graph.
r   r   c                    > TU    $ r0   rC   r   s    r3   r^   5reordering_to_mimic_autograd_engine.<locals>.<lambda>E  s	    Ar6   c                 8  > U /n[        5       n[        U5      S:  aM  UR                  5       n X;   d  U T;   a  M,  UR                  U 5        XR                  -  n[        U5      S:  a  MM  [        UU4S jS9nU H  n TR                  U U4S j5      TU '   M     g )Nr   c                    > TU    $ r0   rC   )rZ   orders    r3   r^   Sreordering_to_mimic_autograd_engine.<locals>.insert_node_in_graph.<locals>.<lambda>X  s	    %(r6   r`   c                    > TU    $ r0   rC   r   s    r3   r^   rq  Z  r   r6   )r   lenpopr  all_input_nodesrb   r   )r.   	cur_nodesinsertable_nodesr   r   rp  s      r3   insert_node_in_graphAreordering_to_mimic_autograd_engine.<locals>.insert_node_in_graphK  s    F	0:)nq ==?D'43;  & ---I )nq  ""28JK$D!++D2BCCI %r6   N)rJ   r   r   r   r   	enumerater   rt   r   r   mathinfr  r   GraphModule)rj  r.   idxrx  r  first_node_in_bwdminimum_ordertangentr%  new_gmr   r   rp  s             @@@r3   #reordering_to_mimic_autograd_enginer  )  s0   . 
I"$C ##}#5''.>?D	 6 Erxx~~.	d /D$ &bhhnn=>NHHM!MMDT{]* %d$(! " "  	 RXX^^$U+<%=%?@T" A XX!!"i0FMr6   	fw_module	bw_modulefw_nodebw_nodedevice	rng_countlast_fwd_inputlast_bwd_inputc                    UR                   nUc   eU R                  n	UR                  n
[        R                  R                  R
                  nU R                  R                  U5         U R                  R                  SU 35      n[        U5      UR                  S'   UnSSS5        UR                  R                  U5         UR                  R                  SU 35      n[        U5      UR                  S'   UnSSS5        [        UR                  5      nWUS'   U R                  R                  U5         U	R                  SUUR                  /UR                  Q7US9nSSS5        UR                  W5        U	R!                  U5        [        UR                  5      nWUS'   U
R#                  U5         U
R                  SUUR                  /UR                  Q7US9nUR                  U5        U
R!                  U5        SSS5        Xg4$ ! , (       d  f       GNx= f! , (       d  f       GN.= f! , (       d  f       N= f! , (       d  f       Xg4$ = f)a  
Note [CUDA Graph Safe RNG Functionalization]

CUDA Graph capture doesn't work with get_rng_state and set_rng_state because these functions operate on CPU values,
while CUDA Graph RNG capture uses on-device CUDA tensors. To solve this, we use graphsafe_set_state with a
CUDA Generator registered to the CUDA Graph before capture begins. graphsafe_set_state updates the generator's pointer
to reference a different GeneratorImpl, ensuring subsequent calls are correctly forwarded to the desired generator
(and its cuda-tensor RNG state during graph capture).

For each RNG operation's forward/backward pair:

- We create two generators initialized with identical values
- Each forward and backward call advances its respective generator equally
- This keeps generators synchronized so forward and backward operations use matching RNG values

When forward is called multiple times before backward (causing desynchronization):

- We save the forward RNG state
- We update the backward Generator's state before executing backward

Before each CUDA Graph replay, replay_prologue updates captured RNG pointers with current states, ensuring backward Generator
changes are reflected during replay.

This function modifies both forward and backward computation graphs by:

Creating RNG state placeholders for both passes
Updating the forward node to use graph-safe RNG state
Updating the backward node to use graph-safe RNG state

For more details: https://github.com/pytorch/pytorch/issues/113541
Nfwd_rng_state_r   bwd_rng_state_	rng_stater   r   r   )indexr   r   _prims	rng_primsgraphsafe_run_with_rng_stateinserting_afterr   r    r   ru   r   create_noder   r   replace_all_uses_with
erase_nodeinserting_before)r  r  r  r  r  r  r  r  
device_idxfw_graphbw_graphr  fwd_rng_statebwd_rng_state	fw_kwargsfunctional_fw_node
bwd_kwargs
rng_outputs                     r3   %apply_graphsafe_rng_functionalizationr  s  s   R J!!!HH#(<<#9#9#V#V  
	(	(	8!33nYK4PQ$?
$K5!& 
9 
	(	(	8!33nYK4PQ$?
$K5!&	 
9 W^^$I*Ik		(	(	1%11(..07<<0	 2 
 
2 !!"45  gnn%J+J{		"	"7	+))(..07<<0	 * 

 	%%j1G$ 
, ))M 
9	8 
9	8 
2	1 
,	+ ))s1   )9H9H%;+H7;AI
H"%
H47
I
Inum_sym_nodesc                   ^' [         R                  " 5       nS nS[        [        R                     4S jm'S[        [        R                     4S jnU" U 5      nU" U5      nU" U5      n	0 n
U R
                  R                   H  n[        U5      (       d  M  [        UR                  S5      (       d  M2  [        R                  R                  UR                  R                  ;   d  Mf  X{R                     nXR                     nXR                     nXS.X'   M     [        R                  R                  R                   n[        R                  R                  R"                  nS nUR
                  R%                  SS	9 H  nS
UR                  ;   d  M  Un  O   Uc  ['        S5      e/ n[)        [+        UR
                  R%                  SS	95      5      n[)        [+        UR
                  R%                  SS	95      5      n[-        U'4S jU
R/                  5        5       5      nUR1                  [        R                  " S5      5        [3        U5      S:  n[        R4                  R6                  n[6        R8                  =(       a<    U(       + =(       a.    UR:                  (       + =(       d    UR<                  R>                  n[A        U
RC                  5       5       GH  u  nu  nnUS   nUS   nT'" U5      nUR
                  nUR
                  nU(       a*  Ub'  URD                  S:X  a  [G        UUUUUUUU5      u  nnMe  URI                  U5         URK                  SUUR                  /URL                  Q7URN                  S9nURK                  S[P        RR                  US40 S9nURK                  S[P        RR                  US40 S9n URU                  U 5        URW                  U5        URY                  U5        S S S 5        URI                  U5         S[)        U5       3n!UR[                  U!5      n"U" U5      U"R\                  S'   S S S 5        URI                  U5         URK                  SUW"UR                  /URL                  Q7URN                  S9n URU                  U 5        URW                  U5        S S S 5        GM     U(       a  [)        [_        UR
                  R%                  SS	95      5      n#U#RL                  S   n$[3        U$5      U-
  n%U$S U% [a        U5      -   U$U%S  -   n&UR
                  Rc                  U&5        UR
                  RW                  U#5        URe                  5         URe                  5         X4$ ! , (       d  f       GN= f! , (       d  f       GNV= f! , (       d  f       GM  = f)Nc                 &   0 nU R                   R                   Ht  nUR                  S:X  d  M  [        UR                  S5      (       d  M2  [
        R                  R                  UR                  R                  ;   d  Mf  X!UR                  '   Mv     U$ )Nr   r   )
r   r   r   r   r   r   r   r   r   r   )gmodrandom_nodesr.   s      r3   get_rng_ops*functionalize_rng_ops.<locals>.get_rng_ops  sh    JJ$$D?*DKK00II559I9II*.TYY' % r6   rU   c                 B   SU R                   ;  a  gU R                   S   n[        U[        5      (       d  U4nU HL  n[        U[        R                  5      (       d  M$  UR
                  R                  S:X  d  M@  UR
                  s  $    [        R
                  " S5      $ )zF
Check the example value of the node outputs to find the device type.
r   Ncudacpu)r   r   r   r   r<  r  rG  )r.   
candidates	candidates      r3   
get_device)functionalize_rng_ops.<locals>.get_device  s     		!YYu%
*e,,$J#I)U\\22##((F2$+++ $
 ||E""r6   r  c                     U b.  U R                   S:X  a  [        R                  R                  5       $ [        R                  " 5       $ )Nr  )rG  r   r  get_rng_state)r  s    r3   get_sample_rng_state3functionalize_rng_ops.<locals>.get_sample_rng_state  s5    &++"7::++--""$$r6   r   )fwdbwdr   r   r  zaCouldn't find tangent node in graph inputs. This is unexpected, please file a bug if you see thisc              3   :   >#    U  H  nT" US    5      v   M     g7f)r  NrC   )rY   	node_pairr  s     r3   r[   (functionalize_rng_ops.<locals>.<genexpr>/  s"      6W
9U#$$6Ws   r  r   r  r  r  r   r  r   rng_state_output_r   r   )3r  countr   r   r  r   r   r   r   r   r   r   r   r   r  r  run_and_save_rng_staterun_with_rng_stater   r   nextreversedr   valuesdiscardrs  	_inductorr   graphsafe_rng_functionalizationfallback_randomtest_configs*graphsafe_rng_func_ignores_fallback_randomrz  rF  rG  r  r  r  r   r   r"  r#  r  r  r   r   r   iterr   r   	recompile)(r   r  r  r  uidr  r  joint_graph_rng_opsfw_graph_rng_opsbw_graph_rng_opsrecomputable_rng_ops_mapr.   	base_noder  r  run_and_save_rngr  bw_tangent_start_nodefw_rng_state_outputsr  r  devicesmulti_cuda_devices
ind_config'use_rng_graphsafe_rng_functionalizationr  r  r  r  r  r  stater  
state_namebw_rng_state_nodefw_output_node
fw_outputssym_node_start_idxr   r  s(                                          @r3   functionalize_rng_opsr    sM   2 //
C	#HU\\2 #$%Xell%; % &l3"9-"9-!""((4  V,,		11T[[5E5EE+II6I&yy1G&yy1G:A2R$/ ) ||--DD//BB **m*<		!$(! = $o
 	
 (9??#=#=#=#OPQN(9??#=#=#=#OPQN 6N6U6U6W G OOELL'( W) ''J.. 	
""	
 *** R&&QQ , .7 &&(.)	)Iy E"E"G$???? 4"v%-R	.*NN **73%-%9%9#$!..87<<8">>	 &: &" !,,#$$,a0	 -  &11#$$*  2 
 --j9##G,$++E21 46 **+@A0c<
$,$8$8$D!0DV0L!&&u- B
 **73%11#&+W^^KgllK">>	 2 
 --j9##G, 43w.R d9??#=#=#=#JKL#((+
 _}<**+()*+,-. 	
 	w'"">2w 436 BA
 43s&   B,V	5V%AV-	
V	
V*	-
V=	c                    U R                   R                   H  n[        U5      (       d  M  UR                   HT  n[        U5      (       d  M  UR                  S   UR                  S   :  d  M7  [
        R                  UR                  S'   MV     UR                  R                  SS5      (       d  M  [        S UR                   5       5      (       a  M  [
        R                  UR                  S'   M     U $ )z
If there are two consecutive checkpointed blocks with no operator in
between, we would still want to stash the tensor at the boundary of
checkpointed blocks. The following pass makes the last output node
non-recomputable to allow for that.
ac_graph_idr   has_backward_hookFc              3   8   #    U  H  n[        U5      v   M     g 7fr0   )r   r$  s     r3   r[   )cleanup_recompute_tags.<locals>.<genexpr>  s      E1;t$$r   )	r   r   r   r  r   r   	MUST_SAVEr   r   )r   r.   r%  s      r3   cleanup_recompute_tagsr    s     ""(($

"4((		-0499]3KK-=-G-GDIIk* # yy}}0%88 E15E B B& *:)C)C		+&7 )8 r6   	node_infomin_cut_optionsdont_banc                 0  ^^^^%^&^'^(^)^*^+^,^-^.^/^0 Tc
  [        5       m[        5       m/[        (       aQ  [        S U R                   5       5      nU[        S T/R                   5       5      -
  n[
        R                  SU5        S m&S m'U&U'U/4S jm( SS KnU(UU/4S	 jm*U*UU/4S
 jnU(4S jm)S[        4U)U/4S jjn	UR                  5       m.[        5       m%U%UU.U/4S jn
U R                   GH{  nUR                  S:X  a  M  UTR                  ;   aj  UTR                  ;  a.  T.R                  UR                   S-   S["        R$                  S9  Md  T.R                  UR                   S-   S["        R$                  S9  ['        U5      (       a.  T.R                  UR                   S-   S["        R$                  S9  M  [)        U5      (       d  [+        U5      (       a  U
" U5        TR-                  U5      (       a  U" U5      (       a  U
" U5        SUR.                  ;  =(       a    SUR.                  ;  =(       dB    SUR.                  ;   =(       a,    [1        UR.                  S   [2        R4                  5      (       + n[7        U5      (       a  [        [9        U5      5      nOPU(       aA  [1        UR.                  R;                  S5      [<        5      (       a  SO["        R$                  nOU	" U5      nT.R                  UR                   S-   UR                   S-   US9  UR>                   H<  nT.R                  UR                   S-   UR                   S-   ["        R$                  S9  M>     GM~     S[@        [B        RD                     S[F        S[F        4U(U4S jjnTRH                  (       GaR  TRJ                   GHA  nUR>                   Vs/ s H,  nTR-                  U5      (       d  M  TRM                  U5      PM.     nnUR>                   Vs/ s H  nTR-                  U5      (       d  M  UPM     nn[O        U5      S:  d  M  U" U[Q        U5      5      n[S        UR>                  5       H  nTR-                  U5      (       d  M  TRM                  U5      U:  d  M2  T(" UU5      (       d  MB  UT%;   a  MJ  [
        R                  SUTRM                  U5      UUTRM                  U5      5        U
" U5        M     GMD     TRT                  (       Ga  [        5       nU R                   GHi  nTR-                  U5      (       d  M  TRM                  U5      U4/nTRM                  U5      n[O        U5      S:  d  MR  [V        RX                  " U5      u  nnUU;   a  M2  UR[                  U5        TRM                  U5      US-   :  aP  [O        U5      S:X  aA  [
        R                  SUUTRM                  U5      TRM                  U5      5        U
" U5        M  UR>                   H[  nTR-                  U5      (       d  M  T(" UU5      (       d  M+  UT%;  d  M3  [V        R\                  " UTRM                  U5      U45        M]     [O        U5      S:  a  GM  GMl      UR_                  T.SS5      u  nnUu  nm-[        5       nU.4S  jU 5        H"  u  m0nURm                  U-U04S! jU 5       5        M$     [        5       nU H*  u  n n!U S S" U!S S# :X  d   eU S S" n"UR[                  U"5        M,     [o        U 5      m+[q        U R                  5       V#Vs0 s H  u  n#oU#_M
     snn#m,[s        U+4S$ jU 5       U,4S% jS&9n$U$T%4$ ! [         a  n[        S5      UeS nAff = fs  snf s  snf ! [`         ai    [
        R                  S5        [
        R                  SRc                  URd                  Rf                  Ri                  T.5      5      5        [k        T.5        e f = fs  snn#f )'Nc              3      #    U  HS  nUR                   S :X  d  M  [        UR                  S5      (       d  M2  [        UR                  R                  5      v   MU     g7f)r   _overloadpacketN)r   r   r   r   r  r   s     r3   r[    solve_min_cut.<locals>.<genexpr>  sK      &
)ww/) -.5dkkCT.U -C++,,)s   AA&Ac              3   8   #    U  H  n[        U5      v   M     g 7fr0   )r   rY   is     r3   r[   r    s      4
5qCFF5r   z&Ops banned from re-materialization: %sc                 X   UR                   [        R                  R                  R                  :w  a  gUR
                  S   n[        R                  R                  R                  U5      u  nnU H6  nUR                  U   nXL a    g[        U[        5      (       d  M/  X;   d  M6    g   gNFr   T)r   r   rH  higher_orderauto_functionalizedr   _higher_order_opsauto_functionalizeget_mutable_argsr   r   rt   )ab
mutable_opmutable_arg_namesrD  r   rg  s          r3   !can_fuse_into_auto_functionalized8solve_min_cut.<locals>.can_fuse_into_auto_functionalized  s    88uyy--AAAVVAY
 ##66GG
S	
%D((4.Cx#t$$8 & r6   c                     UR                   [        R                  R                  R                  :w  a  gUR
                  S   nU H  nUR
                  S   U   nXL d  M    g   g)NFtensors_to_cloner   T)r   r   rH  r   triton_kernel_wrapper_functionalr   )r  r  r  r   rg  s        r3   .can_fuse_into_triton_kernel_wrapper_functionalEsolve_min_cut.<locals>.can_fuse_into_triton_kernel_wrapper_functional  s[    88uyy--NNNHH%78%D((8$T*Cx & r6   c                   > [        U5      [        R                  :X  a  gT" X5      (       a  gT" X5      (       a  gU R                  [        R
                  L a?  U R                  S   R                  [        R                  R                  R                  L a  gTR                  U 5      =(       a    TR                  U5      $ )NTr   F)r#   rI  catr   r"  r#  r   r   rH  r  r  r4   )r  r  r  r  op_typess     r3   r4   !solve_min_cut.<locals>.is_fusible  s     1),Q229!??HH(((q	  yy%%FFG
 ""1%@(*=*=a*@@r6   r   zANeed networkx installed to perform smart recomputation heuristicsc                 n  > TR                  U 5      (       a  g[        U /5      n[        U5      S:  a  UR                  5       nUR                   HQ  nTR                  U5      (       d  T" X#5      (       d    gTR                  U5      (       d  M@  UR                  U5        MS     [        U5      S:  a  M  gr  )r>   r   rs  rt  r  rh   r  )r.   rv  curr%  r4   r  r  s       r3   is_materialized_backwards0solve_min_cut.<locals>.is_materialized_backwards  s    D!!v&	)nq --/C		 //55j>S>S##D))MM$'	 " )nq  r6   c                   > U R                   S:w  a  gU R                  [        R                  :X  a  gU R                  R                  SS 5      [        R                  :X  a  g[        R                  (       a  TR                  U 5      (       a  gU R                  [        R                  R                  [        R                  R                  4;   a  gTR                  (       a  TR!                  U 5      (       d  gO-TR#                  U 5      (       d  TR%                  U 5      (       a  gTR&                  (       a8  T" U 5      (       a+  [(        R+                  SU [-        U R.                  5      5        gU R0                  S:  a  U R0                  [        R2                  :  a  gTR4                  (       a/  [7        S U R8                   5       5      n[;        U 5      nUS-  U:  $ g)	Nr   Fr   Tzmaterialized backwards: %s %si  c              3   z   #    U  H1  n[        U[        R                  5      (       d  M$  [        U5      v   M3     g 7fr0   )r   rJ   rK   rL  r  s     r3   r[   Bsolve_min_cut.<locals>.should_ban_recomputation.<locals>.<genexpr>D  s(      %%.*Q2HYs   #;;r   )r   r   r"  r#  r   r   r   r  r   recompute_viewsr>   rI  lift_fresh_copyrK  
lift_freshr   rA   r;   r8   r   r%   debugr   r  dist_from_bwmax_dist_from_bwr   rE  r   rL  )r.   input_tensors_sizeoutput_sizer  r  r  s      r3   should_ban_recomputation/solve_min_cut.<locals>.should_ban_recomputation  s{   77o%;;(***99==d+/?/I/II!!h&6&6t&<&<;;4//779P9PQQ22++D11 2 !!$''8+H+H+N+N 77<U=
 =
 II5tU4::=NO t#(9(9F<S<S(S ++!$ %%)YY% " #4.K?%777r6   c                 r   >^  T R                   S:X  a  g[        UU 4S jT R                   5       5      (       + $ )Nr   Tc              3   6   >#    U  H  nT" TU5      v   M     g 7fr0   rC   )rY   r%  r4   r.   s     r3   r[   9solve_min_cut.<locals>.is_materialized.<locals>.<genexpr>O  s     E*$z$--*s   )r   r*  r  )r.   r4   s   `r3   is_materialized&solve_min_cut.<locals>.is_materializedK  s*    77m#E$**EEEEr6   rU   c           
        > [        U 5      n[        R                  (       a&  TR                  U 5      (       a  [        R
                  $ [        U R                  S   [        5      (       a2  [        U R                  S   [        R                  5      (       d  [        $ [        US[        [        U R                  S5      S5      -  -  5      nT" U 5      (       a  U$ US-  $ )Nr   g?d   r      )rL  r   r  r>   r{  r|  r   r   r   r   r   INT_INFrv   maxminr  )r.   mem_szr  r  s     r3   get_node_weight&solve_min_cut.<locals>.get_node_weightQ  s    $!!h&6&6t&<&< 88Odii&55dii.== Vsc#d.?.?*Eq&IIJK4  MA:r6   c                 X  > TR                  U 5      (       a  gU T;   a  g[        U 5      (       a  gSU R                  ;   a-  [        U R                  S   [        R
                  5      (       a  gTR                  U 5        TR                  SU R                  S-   [        R                  S9  g)NFr   source_incapacityT)r>   r   r   r   r   r   r  add_edger   r{  r|  )r.   banned_nodesr  nx_graphr  s    r3   ban_recomputation_if_allowed3solve_min_cut.<locals>.ban_recomputation_if_allowedm  s    D!!8 $DII*TYYu-=u~~"N"N
 	(DII$5Ir6   r   r&  sinkr'  _outr   r           start_nodes	max_rangec                   > / nU  H,  n[         R                  " UT
R                  U5      US45        M.     [        U5      S:  a  [         R                  " U5      u  pEnU(       d  T
R                  U5      $ UR
                   Hi  nT
R                  U5      (       d  M  T
R                  U5      U:  a  M2  T
R                  U5      UT	" XW5      4nX;  d  MS  [         R                  " X(5        Mk     [        U5      S:  a  M  U$ )zl
Finds the first unfusible node in the chain of nodes starting from
`start_nodes` and returns its position.
Tr   )heapqheappushrr   rs  heappopr  rh   )r1  r2  sorted_nodesrZ   rD  r.   node_is_fusibler%  r   r4   r  s            r3   find_first_unfusible+solve_min_cut.<locals>.find_first_unfusible  s    
 9;ANN<)*@*@*CQ)MN  ,!#',}}\'B$A_" --d33

++D11 --d3i? !..t4"4.6C
 .|9 #	 ,!# r6   z1used above/below fusible %s:(%s) -> %s -> %s:(%s)r  ztoo long %s %s %s %sr%  z-Failed to compute min-cut on following graph:
c              3   0   >#    U  H  oTU   4v   M     g 7fr0   rC   )rY   rZ   r+  s     r3   r[   r  :  s     8i$is   c              3   :   >#    U  H  oT;   d  M
  TU4v   M     g 7fr0   rC   )rY   vnon_reachableus     r3   r[   r  ;  s     Ad=.@fq!fds   	c              3   .   >#    U  H
  nTU   v   M     g 7fr0   rC   rY   r.   name_to_nodes     r3   r[   r  G  s     2	d		s   c                    > TU    $ r0   rC   )r   node_idxs    r3   r^   solve_min_cut.<locals>.<lambda>G  s	    (1+r6   r`   ):r   get_default_op_listr$   r   r-   r%   rT  networkxImportErrorr   floatDiGraphr   rR   rP   r)  r   r{  r|  r   r   r   rh   r   r   r   r<  r   r   r   r   r  rt   rJ   rK   rv   r}   rd   rr   rs  r  r   r~   r4  r6  r  r5  minimum_cut	Exceptionjoin	readwriteedgelistgenerate_edgelistvisualize_min_cut_graphupdateget_name_to_noderz  rb   )1r   r  r  r  joint_module_opsops_ignorednxer  r"  r,  r.   is_non_tensor_nodeweightr%  r9  	used_nodeordersfw_usersfirst_unfusible_usevisited
start_nodefusiblestart_orderrD  r  	cut_value	partition	reachablecutsetnbrs	cut_nodesnode_innode_out	node_namer~  r   r*  r  r  r4   r  r  rE  rG  r?  r+  r  r@  s1    ```                                 @@@@@@@@@@@@r3   solve_min_cutrn    s    <"$H% &
#))&
 

 ' 4
$554
 *
 
 	9;G"A&0dF  2 zz|H(2L * !!77h9...9+++!!$))e"3Vdhh!O dii&0&488L$
 dii%/$((Kd2488(.
 ##D)).Ft.L.L(. "E}DII'EUtyy SDIIe4Dell)S%S 	 t=./F!$))--"6FFDHH  %T*F$))e+TYY-?&QJJDdii&0$))e2CdhhW e "L$rww- C C  4 ,,,"44I &OO+D++D1 -	&&t,+   "+!0I4L4LT4R   6{Q&:8S[&Q#!)//2D!0066%2248;NN&y$77</$O%%229=/ %2248 5T:! 3 5P 111'1|%++J++J77''
3Z@2G $00<Kg,"w/3'>C  **3/+2CCG)HH."!..s3!..z: 15IID!0066&sD11 4w1G1G1Mt0TU &) g,"" ,F!~~h&I	9  )I}*4,F8i84AdAA 9 ",I#s|x},,,CRL	i  $
 $K0L+4[5F5F+GH+Gic4c	+GHH2	28ML %%G
  O
	|
R  @A2<<00BB8LMN)	& IsC   ]4 ^$^	^'^^ `4
^>^

^A3`c                    SS K nSS KnUR                  R                  U 5      R	                  5       nUR                  U5      S   nUR                  5        He  nXR                  5          UR                  5          S   nUR                  [        U5      5        U[        S5      :X  d  MT  UR                  S5        Mg     [        R                  S5        UR                  S5        g )Nr   r(  r|  redz2Visualizing the failed graph to min_cut_failed.svgzmin_cut_failed.svg)rJ  pydotnx_pydotto_pydot	to_stringgraph_from_dot_data	get_edges
get_sourceget_destination	set_labelr   rL  	set_colorr%   rT  	write_svg)r+  rY  rq  
dot_format	dot_graphedger\  s          r3   rT  rT  L  s    %%h/99;J))*5a8I##%//+,T-A-A-CDZPs6{#U5\!NN5! & HHAB,-r6   c                  6   / [         R                  P[         R                  P[         R                  P[         R                  P[         R
                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                   P[         R"                  P[         R$                  P[         R&                  P[         R(                  P[         R*                  P[         R,                  P[         R.                  P[         R0                  P[         R2                  P[         R4                  P[         R6                  P[         R8                  P[         R:                  P[         R<                  P[         R>                  P[         R@                  P[         RB                  P[         RD                  P[         RF                  P[         RH                  P[         RJ                  P[         RL                  P[         RN                  P[         RP                  P[         RR                  P[         RT                  P[         RV                  P[         RX                  P[         RZ                  P[         R\                  P[         R^                  P[         R`                  P[         Rb                  P[         Rd                  P[         Rf                  P[         Rh                  P[         Rj                  P[         Rl                  P[         Rn                  P[         Rp                  P[         Rr                  P[         Rt                  P[         Rv                  P[         Rx                  P[         Rz                  P[         R|                  P[         R~                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[        R                  P[         R                  P[         R                  P[         R                  P[         R                  Pn [         R                  [         R                  [         R                  /nU[         R                  [         R                  [         R                  [        R                  [         R                  [         R                  [         R                  [         R                  /-  nUnU / [        R                  P[        R                  P[         R                  P[         R                  P[         R                  P[        R                  P[        R                  P[         R                  P[         R                  P[        R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[        R                  P[        R                  P-  n U [         R                  [         R                  /-  n X-  n U [        5       -  n U [         R                  /-  n U [         Vs/ s H  n[        U5      PM     sn-  n [        U 5      n[        [         R                  [         R                  [         R                  /5      n[         R                  [         R                  [         R                  [         R                  [         R                  [         R                  [         GR                   [         GR                  [         GR                  [         GR                  [         GR                  /nXE-  nG[        U[        U5      U[        U5      U5      $ s  snf r0   )rI  r  subdivatan2mulr  r   pow	remainderfmod__and____or____xor__
__lshift__
__rshift__eqnegegtleltabsbitwise_notceilfloorfracnegreluroundsilutruncr%   log10log1plog2lgammaexpexpm1erferfccosacoscoshsinasinsinhtanatantanhatanhsqrtrsqrt
reciprocalsigmoidsoftplus	thresholdthreshold_backwardclampwherelerpaddcmulgelugelu_backwardrE  mean_grad_sum_to_sizesum_to_sizeamaxtotype_asr"  r#  squeeze	unsqueezersub_to_copyaliasviewslicetprimsbroadcast_in_dimexpand
as_stridedpermuteselectconvert_element_typeclone	full_likevarstd_unsafe_viewreshapebroadcast_tensorsscalar_tensorones	new_zerosr  arangetriuvar_meanisinfr   fullzerosempty
empty_likeargmaxmaximumiota)_low_memory_max_pool2d_offsets_to_indicesr  gatherrb  
zeros_liker   r   r   native_dropout	rand_like
randn_likemmconvolutionconvolution_backwardbmmaddmm#_scaled_dot_product_flash_attention'_scaled_dot_product_efficient_attention_flash_attention_forward_efficient_attention_forwardupsample_bilinear2d
_scaled_mmr'   )default_recomputable_opsrecomputable_view_opsr,   mr-   r+   r*   r)   s           r3   rI  rI  ]  sx   L0L0L0 	L0 	

	L0
 	L0 	L0 	L0 	L0 	L0 			L0 	L0 	L0 	L0 	L0 	L0  	!L0" 	#L0$ 	%L0& 	'L0( 	)L0* 	+L0, 	-L0. 	/L00 			1L02 	

3L04 			5L06 	7L08 			9L0: 	

;L0< 			=L0> 	

?L0@ 	AL0B 	

CL0D 	

EL0F 			GL0H 	IL0J 	KL0L 	

ML0N 	OL0P 			QL0R 	SL0T 			UL0V 			WL0X 	YL0Z 			[L0\ 			]L0^ 	_L0` 			aL0b 			cL0d 	

eL0f 			gL0h 	

iL0j 	kL0l 	mL0n 	oL0p 	qL0r 	sL0t 	

uL0v 	

wL0x 			yL0z 	{L0| 			}L0~ 	L0@ 	AL0B 			CL0D 	EL0F 	GL0H 			IL0J 	KL0L 	ML0N 	OL0P 	QL0R 	SL0T 			UL0V 	WL0Z "\\4>>4::F		

	 	 %H $!		$!""$! 	

$! 		$!
 	$! 			$! 			$! 	$! 	$! 	$! 	$! 	$! 			$! 	$! 	

$!  	!$!" 	#$!$ 	%$!& 			'$!( 	)$!* 	+$!, 	-$!. 			/$!0 	1$!2 	

3$!4 	5$!6 			7$!8 	9$!: 	

;$!< 	

=$!> 	?$!@ 	A$!B 	C$!D 	

E$!F 	77G$! $L T[[ 99(/!   N1!3A!6 NN!":;T00$..$//RSJ!!

0044%%))   #/K()8 ' !Os   'dc                 L    0 nU R                    H  nX!UR                  '   M     U$ r0   )r   r   )r   rE  r.   s      r3   rV  rV    s'    L"&TYY r6   memoryruntimes
max_memoryall_recomputable_banned_nodesc           	         [         R                  nUS:X  a  [        XU5      $ US:X  a  [        XU5      $ US:X  a  [	        XU5      $ US:X  aR  [
        R                  S5        [        R                  " U UUUS9n[	        UU[        US9R                  [        US95      $ [        U5      (       a  U" XX4U5      u  pS	X4$ [        S
U 35      e)Ngreedyilpdpdynamic_memory_budget_dpzdynamic_memory_budget_dp is an experimental solver. It does not guarantee performance improvements. Additionally, it is not guaranteed to be stable.)r   r   recorded_knapsack_input_memories recorded_knapsack_input_runtimes)graph_info_provider)knapsack_algomax_mem_budgetr0  z,Not aware of memory budget knapsack solver: )r   activation_memory_budget_solverr   r   r   r%   warningr   inialize_from_graphr   get_knee_point_memory_budgetcallabler   )
r   r  r  r  r  r  SOLVERr  saved_node_idxrecomp_node_idxs
             r3   #_optimize_runtime_with_given_memoryr  	  s     33Fv<<	5Fj99	46Z88	-	-?	

 0CC#*G-3-5	
 $7**)) + 	
 		
 
&		*08U+
' ^55I&RSSr6   no_dispatchr   r;  c                    ^ [        U R                  5      nU4S jnU Vs/ s H
  oC" U5      PM     nnU R                  5        Vs/ s H
  oC" U5      PM     nnU R                  X%S9$ s  snf s  snf )Nc                    > [        U TS9$ )Nr:  )r   )dr;  s    r3   realize_symbol8_remove_symbols_without_guarding.<locals>.realize_symbol>  s    H--r6   )stride)rt   shaper  new_empty_strided)r   r;  r  r  r   r  s    `    r3    _remove_symbols_without_guardingr  ;  sk    ME. )..1^AE.)*4AnQF4u44 /4s   A(A-c                 `  ^ ^^	 [         R                  nS nUS:X  a  gUS:X  ac  [        5          SSKJn  [
        R                  " UT R                  T R                  45      u  mm	UR                  UU	U 4S j5      nUsS S S 5        $ US:X  as  SS	K
Jn  [
        R                  " UT R                  T R                  45      u  mm	U" S
S9 nT R                  " T0 T	D6  S S S 5        WR                  5       n[        US5      $ [        SU 35      e! , (       d  f       g = f! , (       d  f       NI= f)Nc                    [        U [        R                  5      (       aC  [        U R                  S   [        R
                  5      (       a  [        U R                  S   SS9$ [        U [        R                  5      (       aC  [        U R                  S   [        R                  5      (       a  [        U R                  S   SS9$ [        U [        R                  5      (       a-  [        U R                  S   [        R                  5      (       a  g[        U [        R                  5      (       a-  [        U R                  S   [        R                  5      (       a  gU $ )Nr   r9  r:        ?T)r   rJ   rK   r   r   r<  r  r   r   r   r   r=  s    r3   materialize_arg)estimate_runtime.<locals>.materialize_argI  s    a!!j&M&M3AFF5MDQQ277##
166%=%,,(O(OAFF5MD99277##
166%=%..(Q(Q277##
166%=%--(P(PHr6   testingr   profiler   )benchmarkerc                  (   > TR                   " T 0 TD6$ r0   )r   )r   r   r.   s   r3   r^   "estimate_runtime.<locals>.<lambda>]  s    4;;3O3Or6   flops)FlopCounterModeF)displayz Not aware of runtime estimator: )r   *activation_memory_budget_runtime_estimatorr  $torch._inductor.runtime.benchmarkingr#  r   tree_mapr   r   benchmark_gputorch.utils.flop_counterr'  r   get_total_flopsr  r   )
r.   RUNTIME_MODEr  r#  msr'  modecounted_flopsr   r   s
   `       @@r3   estimate_runtimer3  F  s    DDL
 y 		"]H!???TYY<TULD&**+OPB ] 
	 <DKK8PQfU+tKK(( ,,,.=!$$=l^LMM# ] ,+s   AD	D
D
D-c           	      
  ^ ^^^^ ^!^"^#^$^%^&^'^(^) US:  d  US:  a  [        SU 35      e[        [        R                  [        R                  [        R
                  [        R                  [        R                  S9n[        R                  (       a  [        USSSSS9nUS:X  a  TR                  $ [        T TU5      u  pEUS:X  a  U$ S[        [        R                     S[        4S	 jm T " TR                  5      m'T " U5      m%T%T'::  a  U$ U%U'4S
 jnS[        [        R                     4U U%U'4S jjm![        USSSS9n[        T TU5      u  pT!" U5      U:  a  U$ [        USS9m[        T TT5      u  pT!" U	5      U:  a  U	$ SSKJm"  [%        U"4S jTR                   5       5      m$S[$        [        R                     S[        [        R                     4U"U$4S jjnU" U
5      n['        US S9n['        U[(        SS9m[+        T5      S:X  a  TR                  $ T Vs/ s H  o" [)        U5      5      PM     snm&T Vs/ s H  n[-        U5      PM     snm)SSKJm(  UUU&U(U)4S jm#[        R2                  (       Ga  U!U#U UU)4S jnU" S5      U" S5      /nUS   SS  US   SS  :w  a  US   US   4/nU(       a  UR5                  5       u  nnUS   US   -
  S:  a$  UR7                  U5        UR7                  U5        MM  U" US   US   -   S-  5      nUSS  USS  :w  a  UR7                  UU45        USS  USS  :w  a  UR7                  UU45        U(       a  M  UR9                  5         SS KJn  U Vs/ s H  nUS   PM
     nnU Vs/ s H  nUS   PM
     nnUR?                  SS9  URA                  UUS S!9  [C        U5       H   u  nnURE                  US" UUU   4S#S$S%S&9  M"     URG                  S'5        URI                  S(5        URK                  S)5        URM                  S5        URO                  5       nURQ                  5         [R        RT                  " 5       n[        RV                  b%  [        RV                  n[R        RX                  " USS*9  S+n[Z        R\                  R_                  5       (       aD  [Z        R\                  Ra                  5       (       a!  S,[Z        R\                  Rc                  5        3n[R        Rd                  Rg                  US-U S.[i        5        S/35      nURk                  U5        [l        Ro                  S0U5        T#" UTT S19S   $ s  snf s  snf s  snf s  snf )2Nr   r   zJThe valid ranges for memory budget are 0 <= m <= 1. The provided value is )r}   r~   r   r   r   F)r}   r~   r   r   r   rU   c                 :    [        [        [        U 5      5      S-  $ N    eA)rE  maprL  )r   s    r3   estimate_activations_size:choose_saved_values_set.<locals>.estimate_activations_size  s    3x./#55r6   c                    > U S-  TT-
  -  $ r6  rC   )szmax_act_sizemin_act_sizes    r3   get_normalized_size4choose_saved_values_set.<locals>.get_normalized_size  s    S\L899r6   activationsc                 &   > T" U 5      T-
  TT-
  -  $ r0   rC   )rA  r9  r=  r>  s    r3   get_mem_ratio.choose_saved_values_set.<locals>.get_mem_ratio  s"    )+6E<'
 	
r6   )r}   r~   r   )r   )get_node_storagec              3   4   >#    U  H  nT" U5      v   M     g 7fr0   rC   )rY   r.   rE  s     r3   r[   *choose_saved_values_set.<locals>.<genexpr>  s     TCS4 0 6 6CSrB  r*  c                    > U  Vs/ s H.  nUR                   [        S5      :  d  M  T" U5      T;  d  M,  UPM0     sn$ s  snf r6  )r  rv   )r*  r  rE  input_storagess     r3   get_recomputable_banned_nodes>choose_saved_values_set.<locals>.get_recomputable_banned_nodes  sP    
 "
! S)	 
 %Q'~= !
 	
 
s   >
>>c                     U R                   $ r0   r   r=  s    r3   r^   )choose_saved_values_set.<locals>.<lambda>  s    PQPVPVr6   r`   TrQ  r  c                   > T" 5          [        UTT[        U S5      UT5      u  nnnS S S 5        [        5       nW H  n UR                  TU   5        M     UR                  T5      (       d   e[        UUT
U5      u  p[        (       a  [        UTWUWTTUS9  UW4$ ! , (       d  f       N}= f! [         a     M  f = f)Nr   )r   r  saved_node_idxsrecomputable_node_idxsexpected_runtimememories_banned_nodesruntimes_banned_nodesmin_cut_saved_values)	r  r  r   r  BaseExceptionissubsetrn  r$   r   )memory_budgetr  r   rQ  rO  rP  r  r~  r   rD  aggressive_optionsr  rR  r  rS  s             r3   get_saved_values_knapsack:choose_saved_values_set.<locals>.get_saved_values_knapsack  s    ]
 4%%M1%-	 &	  )3)C:3?@ *   !>????'	
 ! 4'.K /'=!1&;&;%1	 ---Q ]$ ! s   B B0
B-0
B>=B>c                 F   > T" U TTS9u  pU [        T5      U-
  T" U5      4$ )N)r  r   )rE  )r  r   rQ  rC  rY  r   r  rS  s      r3   estimate_for_budget4choose_saved_values_set.<locals>.estimate_for_budget  s=    -FYK.*L )*-==l+ r6   r0  r  gMbP?r  )
      )figsizeo)markerz.4fzoffset points)r   r^  center)
textcoordsxytexthazMemory Budgetz Runtime of Recomputed Componentsz:Pareto Frontier of Memory Budget vs. Recomputation Runtime)exist_ok _rank_memory_budget_paretorD  z.svgz%Generated Pareto frontier curve at %s)rW  r  r   )8r   r{   r   ban_recompute_used_far_apart!ban_recompute_long_fusible_chains#ban_recompute_materialized_backwardban_recompute_not_in_allowlistban_recompute_reductionsaggressive_recomputationr   rP   rn  rt   rJ   rK   rL  torch._inductor.fx_utilsrE  r   rb   rL  rs  r3  torch.utils._mode_utilsr  visualize_memory_budget_paretort  r   sortmatplotlib.pyplotpyplotfigureplotrz  annotatexlabelylabeltitlegridgcfshowosgetcwdmemory_budget_pareto_dirmakedirsr   distributedis_availableis_initializedget_rankpathrP  r   savefigr%   r	  )*r   r  rW  r  runtime_optimized_saved_valuesrD  r?  more_aggressive_optionsmore_aggressive_saved_values%aggressive_recomputation_saved_valuesr*  rJ  recomputable_banned_nodesr  r.   r\  optionsbisectslhsrhsmidpltitemx_valuesy_valuestxtfigfig_dirrank_suffixfig_namerX  r  r9  rC  rE  rY  rI  r=  rR  r>  r  rS  s*   ``                            @@@@@@@@@@@@r3   choose_saved_values_setr  m  s   
 qMA-XYfXgh
 	
 $$AA#)#K#K%+%O%O & E E88O &&!"'',).$)
 (5)%" --6RWW 6% 6 -Y-=-=>L,-KLL|#--:
4= 
 

 &##(%*	 '4Y 7'#  12]B++  % ;HY 2;7) :;mK449T9CSCSTTN
 )
	bgg
 
 !>l K &'@FV W %+!x%! ()Q.2O2OQHQK(2O ,I+H4+H 4). ).V ,,,	 	 's+-@-EF1:ab>WQZ^+
GAJ/0G";;=Sq6CF?T)NN3'NN3')3q6CF?a*?@qr7c!"g%NNC:.qr7c!"g%NNC:. ' 	'(/0DG0(/0DG0 	

7
#8C0  )FAsLLs)hqk"*   * 	

?#

56		NOggi
))+**655GKK$/))++0A0A0P0P0R0R"5#4#4#=#=#?"@AK77<<+K=:L:N9OtT
 	H;XF %#yk	 	y^ 10s    U&U+8U0U5c          	      &  ^ U R                   R                  5         U R                  5         U R                   n[        R                  (       a  [        U5      nXPl         U R                   n[        U 5      n[        U 5      nU(       a  [        U 5      n U4S jn	U	" U 5      n
[        U
R                  5      S:X  a
  [        XTS9$ [        U R                   R                  5       H  nUR                  S:X  a  [        S5      Ul        M%  U
R#                  U5      (       d	  SUl        MD  [        S5      Ul        UR$                   H+  n['        UR                   UR                   S-   5      Ul        M-     M     [        R(                  nUR                   HC  n[+        UR,                  R/                  SS5      [0        5      (       d  M4  UR,                  S   n  O   [3        UU
US	9n[5        [7        [8        U5      5      n[5        [7        S
 U5      5      n[;        U UUTS9u  nnU(       a!  U(       a  [=        U UU[        U5      5      u  nn[?        U5      n[@        (       Ga  [C        U Vs/ s H  n[E        U5      [G        U5      4PM     sn5      n[I        S U 5       5      S-  n[J        RM                  SU5        [J        RM                  SU5        [O        S UR                   R                   5       5      n[O        S UR                   R                   5       5      nUU-  n[Q        [        5      nUR                   R                   H\  nURR                  U;   d  M  [U        URV                  S5      (       d  M2  U[G        URV                  RX                  5      ==   S-  ss'   M^     [J        RM                  S[        U5      [        U5      [        U5      5        [C        UR[                  5       S SS9n[J        RM                  SU5        UU4$ s  snf )a(  
Partitions the joint graph such that the backward recomputes the forward.
Recomputing helps in trading off memory bandwidth with computation.

To create the fwd and bwd graph, we copy the joint graph, manually set the
outputs to just original forward or backward outputs. And then we run the
resulting graphs through dead code elimination.

.. warning::
    This API is experimental and likely to change.

Args:
    joint_module(fx.GraphModule): The joint forward and backward graph. This
        is the result of AOT Autograd tracing.
    _joint_inputs: The inputs to the joint graph. This is unused.
    compiler: This option determines the default set of recomputable ops.
        Currently, there are two options: ``nvfuser`` and ``inductor``.
    recomputable_ops: This is an optional set of recomputable ops. If this
        is not None, then this set of ops will be used instead of the
        default set of ops.
    num_fwd_outputs: The number of outputs from the forward graph.

Returns:
    Returns the generated forward and backward Fx graph modules.
c                   >^^^ [        U R                  5      m[        5       mU R                  R                   Hy  nUR                  S:X  a"  SUR
                  ;   a  TR                  U5        O![        U5      (       a  TR                  U5        UT;   d  M^  TR                  UR                  5        M{     [        [        [        U R                  R                  5      5      n[        [        [        U R                  R                  5      5      nX#-   n[        U TS9u  pVTR                  S U 5       5        [        U R                  XES5      n[        U4S jUR                   5       5      m[        UU4S jU R                  R                   5       5      nSn	0 n
U R                  R                   H  nUT;   d  M  XU'   U	S	-  n	M     [!        UTTX5      $ )
Nr   r   r   c              3   R   #    U  H  oc  M  UR                   S:w  d  M  Uv   M     g 7f)Nr   r   )rY   ra  s     r3   r[   Nmin_cut_rematerialization_partition.<locals>.classify_nodes.<locals>.<genexpr>  s"      !
"!A8HAA{s   ''	'r   c              3   f   >#    U  H&  nUR                   S :w  d  M  TUR                     v   M(     g7fr  r  rD  s     r3   r[   r    s/      <
0ww(" $L#0s   11c              3   H   >#    U  H  nUT;  d  M  UT;  d  M  Uv   M     g 7fr0   rC   )rY   r.   rR   rd   s     r3   r[   r    s1      :
0,, 15=N1N D0s   
""	"r   r   )rV  r   r   r   r   r   r  r   rU  r  rt   r   r   r   r   r   rN   )r   r.   r
  r  rP   r   r   r.  rS   fw_cntrT   rE  rR   rd   r   s              @@@r3   classify_nodes;min_cut_rematerialization_partition.<locals>.classify_nodes  s   '(:(:;1; &&,,Dww-'J$++,E!%%d+%d++!%%d+((!((4 - VJ0B0B0H0HIJ!%&(:(:(@(@A"
 7#;/$
  	   !
"!
 	
 @Y
 2< <
*00<
 2

 0: :
$**00:
 0

  &&,,D((!'! - %'8/
 	
r6   r   r   r   r7  r   rW  N)rW  c                 "    [        U 5      (       + $ r0   r'  )rZ   s    r3   r^   5min_cut_rematerialization_partition.<locals>.<lambda>  s    [^);r6   r(  c              3   8   #    U  H  n[        U5      v   M     g 7fr0   )rL  r  s     r3   r[   6min_cut_rematerialization_partition.<locals>.<genexpr>  s     'J\\r   z'Theoretical Activations Stored: %.2f GBz,Theoretical Per Activation Storage Sizes: %sc              3   \   #    U  H"  oR                   S :X  d  M  UR                  v   M$     g7fr   Nr  r   s     r3   r[   r    #      %
"7$77o;UIDII"7r  c              3   \   #    U  H"  oR                   S :X  d  M  UR                  v   M$     g7fr  r  r   s     r3   r[   r    r  r  r  z# remat/fw/bw: %d/%d/%dc                     U S   $ rO  rC   r=  s    r3   r^   r    s    !A$r6   TrQ  zCount of Ops Rematerialized: %s).r   r   r  r   cser"   r   r   r  rs  rR   r1  r  r   r   rv   r  rh   r  r   activation_memory_budgetr   r   r   rL  r  rt   r   r   r  r  r  r$   rb   rL  r   rE  r%   rT  r   r   r   r   r   r  rF  )r   r-  compilerr   r   	cse_graphr   graph_has_recomputable_opsgraph_has_recomputable_rng_opsr  r  r.   r%  rW  r   r   r  r  r  sorted_sizestotal_activations_size_gbfw_module_nodesbw_module_nodesremat_nodescountsrematerialized_opss      `                      r3   r)  r)  Y  sz   B **,D zz &	&$$K!5l!C%=l%K"!-l;,
\ |,I
 9&&'1, 
 	
 ++11277h #CD))$// !D #CD

$'(9(94;L;Lq;P$Q! # 3 33M!!diimmOT:EBB IIo6M " +#L 6+|<=O;\JKL 4''	Iy ")#8iC4H$ Iy 4I>IlKlSV4lKL %('J\'J$JS$P!:<UV 	?N$ %
"+//"7"7%
 
 % %
"+//"7"7%
 
 &7!,S!1OO))DyyK'GDKKAR,S,Ss4;;6678A=8 * 	%  		
 $FLLNPTU24FGi9 Ls   ) Ptracedfnamefigname
clear_metaprogparse_stack_tracedot_graph_shapec                 "   U(       aZ  [         R                  " U R                  5      n[        R                  " X5      n U R                  R
                   H
  n0 Ul        M     [        R                  R                  U5      u  pU
(       d  S[        R                  -   n
[        R                  SX5        [        R                  " U UUUS9nUR!                  5       n[#        USU
R%                  S5      -   5      nU	 U
 3nUc	  U" U5        g U" XS9  g )N.zWriting FX graph to file: %s%s)r  r  write_)r  )copydeepcopyr   rJ   r}  r   r   r  r  splitextr   torch_compile_graph_formatr%   rT  r   FxGraphDrawerget_main_dot_graphrY  lstrip)r  r  r  r  r  r  r  r   r.   baseextgr   write_methods                 r3   
draw_graphr  	  s     MM&,,/	2LL&&DDI '  'IDF555HH-t9""+'		A 	
A1hC89LfSENE|UU&r6   r0   )r   )inductor)fx_graphTNFN)r  rw   r4  r  loggingr{  r"  r  os.pathrS  r   dataclassesr   r   typingr   r   r	   r
   r   torch._inductor.inductor_primstorch.distributedtorch.fxrJ   torch.utils._pytreeutils_pytreer   ;torch._functorch._activation_checkpointing.ac_logging_utilsr   %torch.fx.experimental._backward_stater   "torch.fx.experimental.proxy_tensorr   r   torch.fx.experimental.sym_noder   r   %torch.fx.experimental.symbolic_shapesr   r   r   r   torch.fx.passesr   torch.utils._ordered_setr   torch.utils.checkpointr   rh  r   -_activation_checkpointing.graph_info_providerr   "_activation_checkpointing.knapsackr   r   r   ,_activation_checkpointing.knapsack_evaluatorr   _aot_autograd.logging_utilsr   _aot_autograd.utilsr    r!   compile_utilsr"   r#   sympydebug_partitionerr$   ry   rI   	getLoggerrD   r%   LoggerrH  rI  r  r'   rN   r{   rK   r   r}  r   r   rv   r   r   r   r   rt   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r1  r  r6  rL  rV  	lru_cacherb  ru   ri  r  r  r  r  r  rn  rT  rI  rV  rL  r  rr  r  r<  r  r3  r  r)  r  rC   r6   r3   <module>r     sF          	  # * ; ;  %   $ $ @ H L  ) / 3  L 
 L ; M 8  %66 t 6''1W^^ 1yy~~		 > > >2      :    T r~~ $ 2>> d  C  
  #	DDMD "'']D sm	D
 XXDNRWW  Gbgg G$ Gbgg $ bgg $ XRWW X XCrww C4 CKrww K4 Krww 4 $..$
4=$rww-'($$rww- s `"..`"rww-`" "'']`"
 `" 2>>2>>)*`"FS..S
2>>2>>)*Sl c("# " "277 s :Jbhh J T "Hbggsl!3 HU277C<=P8Q HGBNN Gr~~ GTZ*xx##Z*xx##Z* XX]]Z* XX]]	Z*
 LLZ* Z* HHMMZ* HHMMZ*zH ..H ~~H  ~~H  	H 
 2>>2>>)*H V# #BNN #T /3	H&H&H& #H& z"''*+	H&V."bW bJBHH +T+TK+T 5k+T 	+T
 +T $(=+T 5$s)T#Y&'+T\ 05 5 5 5$NT i	i	i	 
"'']	i	^ m ..m  2>>2>>)*m f ,0#%)'HH  '' ' 	'
 5d3i(
)' ' c]' 
'r6   