
    [Th6                        S SK r S SKrS SKrS SKrS SKrS SKJr  S SKJr  S SK	J
r
Jr  S SKrS SKrS SKJr  S SKJr  S SKJs  Jr  S SKJr  S SKJr  S SKJr  SS	KJrJrJr  SS
KJ r   SSK!J"r"J#r#J$r$  \RJ                  " \&5      r'S r(\S 5       r)\S\RT                  S\
4S j5       r+S S jr,S r-\S\RT                  S\
4S j5       r. " S S\R^                  5      r0\S\RT                  S\
4S j5       r1\S 5       r2S r3\Rh                  Rj                  r5\5Rl                  \5Rn                  \5Rp                  \5Rr                  \5Rt                  \5Rv                  \5Rx                  \5Rz                  \5R|                  \5R~                  \5R                  \5R                  \5R                  \5R                  R                  \5R                  R                  \5R                  \5R                  \5R                  \5R                  \5R                  \5R                  \5R                  1rM\" \M5      rM\S 5       rNS\\
\R                  4   4S jrPS rQS qRS rSS rTS!S jrUg)"    N)contextmanager)partial)CallableUnion)SymInt)get_decompositions)bind_symbols   )aot_function
aot_modulemake_boxed_compiler)strip_overloads)default_partition
draw_graph#min_cut_rematerialization_partitionc                     U R                   R                  S[        R                  R                  R
                  S9 H,  n[        R                  R                  R                  Ul        M.     U R                  5         U $ )Ncall_functionoptarget)	graph
find_nodestorchopsaten_to_copytor   	recompile)fx_gnodes     R/var/www/auris/envauris/lib/python3.13/site-packages/torch/_functorch/compilers.py_canonicalizer"   $   s[    

%%599>>#:#: &  iinn'' 	NNK    c               #      #    [         R                  R                  S5      n  S v   [         R                  R                  U 5        g ! [         R                  R                  U 5        f = f7f)NF)r   _C_jit_set_autocast_mode)old_jit_autocast_flags    r!   _disable_jit_autocastr(   -   sH     !HH;;EB?''(=>''(=>s    A+A  A+!A((A+r   returnc                 p   [        5          [        U 5        U R                  R                  S[        R
                  R                  R                  S9 Ht  n[        UR                  5      S:X  d  M  [        UR                  5      S:X  d  M9  SUR                  ;   d  MK  [        R
                  R                  R                  Ul        Mv     U R                  R                   H]  n0 nUR                  R                  5        H4  u  pE[        U[        R                   5      (       a  UR"                  nXSU'   M6     X2l
        M_     U R                  R%                  5         U R'                  5         [        R(                  R+                  U 5      n[        R,                  R/                  UR                  5        [        R(                  R1                  UR3                  5       5      n[        R(                  R5                  U5      n[7        S U 5       5      (       d  U" U6   SSS5        U$ ! , (       d  f       W$ = f)z
Compiles the :attr:`fx_g` with Torchscript compiler.

.. warning::
    This API is experimental and likely to change.

Args:
    fx_g(fx.GraphModule): The input Fx graph module to be compiled.

Returns:
    Torch scripted model.
r   r   r
   dtypec              3   j   #    U  H)  n[        U[        R                  R                  5      v   M+     g 7fN)
isinstancer   _subclasses
FakeTensor).0ts     r!   	<genexpr>ts_compile.<locals>.<genexpr>`   s&     M1:a!2!2!=!=>>s   13N)r(   r   r   r   r   r   r   r   lenargskwargsr   r   nodesitemsr.   devicetypelintr   jitscriptr%   _jit_pass_remove_mutationfreezeevaloptimize_for_inferenceany)r   inpsr    
new_kwargskvfs          r!   
ts_compilerI   6   s    
	 JJ))uyy~~'>'> * 
D 499~"s4;;'71'<DKKAW#iinn//	
 JJ$$DJ))+a..A !1 , %K % 	

IIT"**1773IIQVVX&II,,Q/MMMMtH9 
!: H; 
!	 : Hs   A"H&1H&H&E>H&&
H5c                 D    [        U R                  5        [        XUS9  U $ )N)
clear_meta)printcoder   )r   _namerK   s       r!   _draw_graph_compilerP   e   s    	$))tj1Kr#   c                 0    [        [        [        U S95      $ )NrO   )r   r   rP   rR   s    r!   draw_graph_compilerS   k   s    w':FGGr#   c                     U $ )z
Returns the :attr:`fx_g` Fx graph module as it is. This is a no-op compiler
and can be used to check accuracy.

.. warning::
    This API is experimental and likely to change.

 r   rN   s     r!   noprW   o   s	     Kr#   c                   4   ^  \ rS rSrU 4S jrU 4S jrSrU =r$ )DebugInterpreter|   c                 V   > [        U R                  /UQ76 U l        [        TU ]  " U6   g r-   )r	   modulesymbol_mappingsuperrun)selfr6   	__class__s     r!   r_   DebugInterpreter.run}   s%    *4;;>>Tr#   c                   >^ ^
^^^ U 4S jmU4S jmU4S jm
U
U4S jn[         TT ]  U5      nSUR                  ;   a  [        R                  " UR                  S   5      u  pE[        R                  " U5      u  pg[        U5      [        U5      :X  d   [        U5       S[        U5       35       e[        [        [        U5      5      XF5       H5  u  mp[        U	[        R                  5      (       d  M'  U" XUU 4S j5        M7     U$ )Nc                    > [        U [        5      (       d  U $ [        R                  " U R                  R
                  R                  TR                  5      5      nUR                  (       d   U5       e[        U5      $ r-   )
r.   r   sympyexpandr    exprxreplacer]   	is_numberint)nirr`   s     r!   subst_symint/DebugInterpreter.run_node.<locals>.subst_symint   sV    b&))	RWW\\2243F3FGHA;;!!;q6Mr#   c                 .   > [        U4S jU  5       5      $ )Nc              3   4   >#    U  H  nT" U5      v   M     g 7fr-   rU   )r1   rk   rm   s     r!   r3   HDebugInterpreter.run_node.<locals>.subst_symint_tuple.<locals>.<genexpr>   s     8Cbb))Cs   )tuple)nisrm   s    r!   subst_symint_tuple5DebugInterpreter.run_node.<locals>.subst_symint_tuple   s    8C888r#   c                   > T" U R                  5       5      S:  ae  [        U R                  5       HL  nT" U R                  U5      5      UR                  U5      :w  d  M/  T" U R	                  U5      5      S:  d  ML    g   g)Nr   r
   FT)numelrangendimstridesize)abidxrm   s      r!   check_significant_strides<DebugInterpreter.run_node.<locals>.check_significant_strides   sb    AGGI&* =C$QXXc]3qxx}D(59$ ) r#   c           	      8  > [        U5      (       d   eU R                  UR                  :X  d(   U" 5        SU R                   SUR                   35       eT" U R                  5       5      UR                  5       :X  dG   U" 5        SU R                  5        ST" U R                  5       5       SUR                  5        35       eT" X5      nU(       dG   U" 5        SU R                  5        ST" U R                  5       5       SUR                  5        35       eg )Nz:  != z aka )callabler+   r{   rz   )nvrvdescsame_stridesr   rt   s       r!   check(DebugInterpreter.run_node.<locals>.check   s    D>>!>88rxx'NDF82bhhZtBHH:)NN'"2779-:[&BGGI;e,>rwwy,I+J$rwwykZ[:4R<La&BIIK=.@.M-NdSUS\S\S^R_`ar#   valr   c                  (   > ST  STR                    3$ )Nzoutput z where r]   )ir`   s   r!   <lambda>+DebugInterpreter.run_node.<locals>.<lambda>   s    s'$:M:M9N&Or#   )r^   run_nodemetapytreetree_flattenr5   ziprx   r.   r   Tensor)r`   nr   rl   n_vals_n_specr_vals_r_specr   r   r   r   rm   rt   ra   s   `         @@@@r!   r   DebugInterpreter.run_node   s    		9			a GQAFF?$11!&&-@OF$11!4OF v;#f+-P#f+d3v;-/PP- s6{!3VD	2!"ell33bOP E r#   r   )__name__
__module____qualname____firstlineno__r_   r   __static_attributes____classcell__)ra   s   @r!   rY   rY   |   s    / /r#   rY   c                 ,    [        U 5      R                  $ )z
Returns a (slow) interpreter over the FX graph module that also checks
various debugging properties (e.g., that tracing strides matched real
strides.)
)rY   r_   rV   s     r!   	debug_nopr      s     D!%%%r#   c                     [        U 5        [        R                  R                  U 5      n[        R                  R	                  UR                  5       5      nU$ r-   )r   r   r=   r>   r@   rA   )r   rN   rH   s      r!   simple_ts_compiler      s=    D		A		"AHr#   c                 "    [        U [        5      $ r-   )r   r   )rH   s    r!   nnc_jitr      s    ,--r#   c                 0    [        U R                  5        U $ r-   )rL   rM   rV   s     r!   print_compiler      s    	$))Kr#   fnc                     [         [         [        [        S.nUR                  U5        [	        U [
        R                  R                  5      (       a  [        U 40 UD6$ [        U 40 UD6$ )a;  
Wrapper function over :func:`aot_function` and :func:`aot_module` to perform
memory efficient fusion. It uses the
:func:`min_cut_rematerialization_partition` partitioner to perform efficient
recomputation. It uses NVFuser to compile the generated forward and backward
graphs.

.. warning::
    This API is experimental and likely to change.

Args:
    fn (Union[Callable, nn.Module]): A Python function or a ``nn.Module``
        that takes one ore more arguments. Must return one or more Tensors.
    **kwargs: Any other overrides you want to make to the settings

Returns:
    Returns a ``Callable``  or ``nn.Module`` that retains the eager behavior
    of the original :attr:`fn`, but whose forward and backward graphs have
    gone through recomputation optimizations, and the graphs have been
    compiled with nvfuser.

fw_compilerbw_compilerpartition_fndecompositions)
rI   r   default_decompositionsupdater.   r   nnModuler   r   )r   r7   configs      r!   memory_efficient_fusionr      sZ    6 "!;0	F MM&"ehhoo&&"'''B)&))r#   c                     U R                  S5        [        SU Vs/ s H  o"R                  UR                  4PM     sn S35        SSKJn  U" 5       R                  5       " U6   [        X5      $ s  snf )NfooaQ  
##############################################################
# To minimize FX graph, copy and paste the below and run it  #
##############################################################

import torch
import torch.fx as fx
from functorch.compile import minifier, check_nvfuser_subprocess, check_nvfuser_correctness_subprocess

inps = a?  
inps = [torch.ones(shape, dtype=dtype, device='cuda') for (shape, dtype) in inps]
from foo import FxModule
mod = FxModule().cuda()

with torch.jit.fuser("fuser2"):
  # check_nvfuser_subprocess can be replaced with check_nvfuser_correctness_subprocess
  minifier(fx.symbolic_trace(mod), inps, check_nvfuser_subprocess)
r   )FxModule)	to_folderrL   shaper+   r   r   cudarI   )r   rD   r   r   s       r!   debug_compiler     sm    NN5		 &**T''177	T*+ ,	( JOOtd!! 	+s   !A0
c                 P   / n[        U S5       n[        R                  " U5      n/ nU H  n[        U5      S:X  a  UnU" [        R
                  " 5       5      nOUu  pWpn
U	[        R                  [        R                  [        R                  [        R                  [        R                  [        R                  [        [        1;   a  [        R                  " SSXyU
S9nO[        R
                  " XyU
S9nUR                  U5        M     SSS5        U$ ! , (       d  f       U$ = f)zR
Return a random input for the given inputs meta generated from _save_fx_default.
rbr
   r   )r+   r:   N)openpickleloadr5   randomrandr   rj   int32int64booluint8floatrandintappend)input_data_pathinputsrH   inputs_metar   r;   inputr   _strider+   r:   s              r!   
get_inputsr   3  s     F	ot	$kk!nD4yA~V[[]+6:3WVIIKKKKJJIIKK	 	 "MM!Q6RE!JJu&IEMM% '   
%. M/ 
%	$. Ms   C=D
D%c           	      x   ^ ^^^	^
 SSK Jn  U	4S jm	U UUU	4S jm
U
4S jnU
4S jnU
4S jnU" UUUUU[        S9$ )	a  
The forward, backward, and joint computation graph will be stored in
{folder_name}/{current_name}/{current_name}_forward_{graph_index},
{folder_name}/{current_name}/{current_name}_backward_{graph_index}, and
{folder_name}/{current_name}/{current_name}_joint_{graph_index} respectively.
The input shape of the graphs will be stored in the .input files.
These files can be loaded with pickle,
and is a list of format (type, shape, stride, dtype, device).
In the case of type = int or float, it is just (type,).
For joint graph input, it is a nested list [[],[]]
where the two inner lists have the same format.
If dump_example_input is True, example_inputs will be stored in .pt file.
Since each function might produce multiple graphs,
the graph_index is used to distinguish difference graphs
r   )aot_module_simplifiedc                   > / n[        U 5      S:  a6  [        U S   [        5      (       a  UT" U S   5      -  nUT" U S   5      -  nU$ U  H  n[        U5      [        :X  d  [        U5      [
        :X  a  UR                  [        U5      45        MF  UR                  [        U5      UR                  UR                  5       UR                  UR                  45        M     U$ )Nr   r
   )r5   r.   rr   r;   rj   r   r   r   rz   r+   r:   )r6   
input_metaargget_input_metas      r!   r   (_save_fx_default.<locals>.get_input_metad  s    
t9q=ZQ77.a11J.a11JCCyC49#5!!49,/!!#Y		3::<CJJO	  r#   c                   > [        U R                  R                  5      S:X  a,  [        R                  [        R
                  STU[        5        g [        R                  " U 5      nUR                  R                  [        R                  R                  R                  5       5        UR                  5         T" U5      n[        R                  " T ST 3SS9  UR!                  T ST ST SU S[         3	5        ["        R$                  " U['        T ST ST SU S[         ST SU S[         S3S5      5        T(       a8  [        R(                  " UT ST ST SU S[         ST SU S[         S	35        g g )
Nr   z!No nodes in graph {%s}_{%s}_{%s}./T)exist_okrN   z.inputwbz.pt)r5   r   r8   logloggingWARNINGgraph_indexcopydeepcopyset_codegenr   fxCodeGenr   osmakedirsr   r   dumpr   save)	
gm_to_saver6   	type_namegmr   current_namedump_example_inputfolder_namer   s	        r!   graph_saver_helper,_save_fx_default.<locals>.graph_saver_helpers  s   z%%&!+GG3 ]]:&
UXX^^3356
#D)

{m1\N3dC
m1\N!L>9+Q{mT	
 	-qa~Qyk;-WXYeXffghqgrrst  tA  AG  H	
 JJ-qa~Qyk;-WXYeXffghqgrrst  tA  AD  E r#   c                    > T" XS5        U $ )NforwardrU   )r   fw_argsr   s     r!   graph_saver_forward-_save_fx_default.<locals>.graph_saver_forward  s    2	2	r#   c                 ,   > T" XS5        [         S-  q U $ )Nbackwardr
   )r   )r   bw_argsr   s     r!   graph_saver_backward._save_fx_default.<locals>.graph_saver_backward  s    2
3q	r#   c                 ,   > T" XS5        [        X5      $ )Njoint)r   )r   
joint_argsr   s     r!   graph_saver_joint+_save_fx_default.<locals>.graph_saver_joint  s    273 00r#   r   )functorch.compiler   r   )r   r   r   r   example_inputsr   r   r   r   r   r   s   ```      @@r!   _save_fx_defaultr  R  sH      8! !F1 !
'(&- r#   c                 (    Sq [        [        XU5      $ )aK  
Dump the forward, backward, and joint computation graph.
Example Usage:
save_fx_func = graph_dumper_aot(current_name, folder_name, dump_example_input = False)
optimize_ctx = torchdynamo.optimize(
    save_fx_func
)
with torch.enable_grad():
    with optimize_ctx:
        result = forward_and_backward_pass(model, example_inputs)
r   )r   r   r  )r   r   r   s      r!   graph_dumper_aotr    s     K#\@RSSr#   )T)F)Vr   r   r   r   r   
contextlibr   	functoolsr   typingr   r   re   r   torch.fxr   torch.nnr   torch.utils._pytreeutils_pytreer   r   torch._decompr   %torch.fx.experimental.symbolic_shapesr	   aot_autogradr   r   r   compile_utilsr   partitionersr   r   r   	getLoggerr   r   r"   r(   GraphModulerI   rP   rS   rW   InterpreterrY   r   r   r   r   r   detachgelu_backwardleaky_relu_backwardsigmoid_backwardthreshold_backwardhardtanh_backwardhardsigmoid_backwardhardswish_backwardtanh_backwardsilu_backwardelu_backwardcudnn_batch_normcudnn_batch_norm_backwardmasked_fillScalarr   elu
leaky_reluhardtanh	hardswishhardsigmoidconj_physicalis_same_sizer   r   r   r   r   r   r   r  r  rU   r#   r!   <module>r,     s[     	   %  "     $ $  , > G G *  !
 ? ? +R^^ +h + +\H 	bnn 	H 	 	4r~~ 4n &BNN &( & &  . yy~~KK""HHOOMMNN- 2 ,,BC   
$*h		!"$*N": >YzTr#   