
    [ThW                   >   % S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	r	S SK
r
S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKJrJrJrJrJr  S SKJr  S SK	Jr  S SKJ r J!r!J"r"J#r#J$r$J%r%J&r&J'r'J(r(J)r)J*r*  S SK+J,r,J-r-J.r.J/r/J0r0  S SKJ1r1  S SK2r2S SK3r3S S	K4J5r5  S S
K6J7r7  S SK8J9r9  \((       a\  S SKJ:r:J;r;J<r<  S SK3J=r=J>r>J?r?  S SK@JArA  S SKBJCrC  S SKDJErE  S SKFJGrG  SSKHJIrI  SSKJJKrK  SSKLJMrM  SSKNJOrOJPrPJQrQJRrRJSrSJTrT  SSKUJVrV  SSKWJXrXJYrY  / SQrZ\)" S5      r[\R                  " S5      SS j5       r]S SK^J_r_  S SK`Jara  S SKbJcrc  S SKdJere  S S KfJgrg  S S!KhJiri  S S"KjJkrkJlrlJmrmJnrnJoro  S S#KpJqrqJrrr  S S$KsJtrtJuru  SS%KvJwrw  SS&KxJyrz  \R                  S':H  r{\R                  " \}5      r~\)" S(5      r\\2GR                  \2GR                  4   r\&\*\3GR                  \\3R~                  4      rS)S*S+.rS,rS,rS,rS-rS.r\\S-
  -  S :X  a  \S/:  d   S05       eSS1 jrSS2 jr " S3 S4\2GR                  5      r S       SS5 jjr\R                  " S5      SS6 j5       rSS7 jrSS8 jrSS9 jrSS: jr      SS; jrySS< jr    SS= jr    SS> jrGS S? jrS@ 4     GSSA jjr        GSSC jrGSGSSD jjr  GS         GSSE jjr     GS             GSSF jjrGS	SG jrGS
SH jrGSSI jrGSSJ jrGSSK jr\." SL5      r\)" SMSNSO9r " SP SQ\'\#\\4   5      rGSSR jr    GSSS jr      GSST jr      GSSU jr GS     GSSV jjr      GSSW jrGSSX jrGSSY jrGSSZ jrGSS[ jrGSS\ jrGSS] jrGSS^ jrGSS_ jrGSS` jr    GSSa jrGSSb jr/ rSB\Sc'   GS Sd jrGS!Se jrS SKrGS!Sf jr\GR|                     GS"       GS#Sg jj5       rGS$Sh jr      GS%Si jr\R                  " S/5      GS&Sj j5       r " Sk Sl\%5      r\GR                   " Sm Sn5      5       r " So Sp5      r " Sq Sr\5      r\GR|                  GS'Ss j5       r " St Su5      r " Sv Sw\5      r\R                  " S5      GS(GS)Sx jj5       r\R                  GS*Sy j5       rGS*Sz jr      GS+S{ jrSS| jr      GS,S} jrGS-S~ jrGS-S jrSSS.       GS.S jjrGS/S jrGS0S jr\R                  " S5      GS1S j5       r\R                  " S5      GS2S j5       rGS3S jrGS0S jrGS3S jrGS3S jr        GS4S jr    GS5               GS6S jjrSS jr " S S5      r        GS7S jr        GS8S jrGS9S jrGS:S jrGS;S jrGS;S jr        GS<S jr\GR|                        GS=S j5       r GS     GS>S jjrGS?S jrGS@S jrGSAS jrGSAS jrGSBS jrGSCS jr\GR|                  GSDS j5       rGS*S jr\R                  " S5      GS*S j5       r\R                  " S5      GS&S j5       r\R                  " S5      GS*S j5       rGS*S jrGSES jrGSFS jrSS jrSS jrGSGS jrGSS jr " S S\GR                  5      r          GSHS jrGSIS jr    GSIS jr GS     GSJS jjGr GSKS jGrGSLS jGrGSLS jGr      GSMS jGr        GSNS jGrS 4           GSOS jjGrS 4           GSOS jjGrGSPS jGrGSQS jGr	\GR                   " S S5      5       Gr
\GR|                  GSRS j5       GrGSSS jGrGSTS jGrGSUS jGrGSVS jGr              GSWS jGrGSXS jGrGSYS jGrGSZS jGrGS[S jGr        GS\S jGrGS]S jGr      GS^S jGrGS_S jGr      GS`S jGr      GSaS jGrGSbS jGr      GScS jGrSS jGrGSSS jGrSSSSSSS.GrG\GRA                  5        V Vs0 s H  u  pX_M	     snn Gr!\GRD                  " S5      Gr#GSdS jGr$GSeS jGr%GSfS jGr&GSfS jGr'\R                  " S5      GSgS j5       Gr(\GR                   " S S5      5       Gr)0 Gr*S\S'           GShS jGr+GSiS jGr,\)" S5      Gr-\)" S5      Gr. " S S\G\-G\.4   5      Gr/\-" SNS9GSSNS.GSjS jjj5       Gr0GSkS jGr1      GSlS jGr2 " S S\GR                  5      Gr3\R                  " S5      GSmS j5       Gr4SS jGr5gs  snn f (n      )annotationsN)
CollectionIteratorMappingMutableMapping
MutableSet)datetime)StringIO)AnyCallablecastGenericLiteral
NamedTupleOptionalProtocolTYPE_CHECKINGTypeVarUnion)Concatenatedataclass_transform	ParamSpecSelf	TypeGuard)mock)DeviceProperties)
OrderedSet)tree_map_only)IterableSequence
ValuesView)SymBoolSymFloatSymInt)ELEMENTWISE_TYPE_PROMOTION_KIND)GraphModule)ShapeEnv)Node   )WorkspaceArgPythonWrapperCodegenGraphLowering)BufferExternKernelIRNodeLayout	OperationReinterpretViewCompiledFxGraph)BaseSchedulerNodeSchedulerBuffer)cudampsxpuTc                     [          V s/ s H*  n [        [        U 5      R                  5       (       d  M(  U PM,     nn [	        U5      S::  d   e[	        U5      S:X  a  SnU$ UR                  5       nU$ s  sn f )Nr)   r   r9   )	GPU_TYPESgetattrtorchis_availablelenpop)x
avail_gpusgpu_types      M/var/www/auris/envauris/lib/python3.13/site-packages/torch/_inductor/utils.pyget_gpu_typerH   P   sh    &KY'%*;*H*H*J!YJKz?aZA-vHO 4>>>3CHO Ls
   'A2A2)get_interface_for_device)detect_fake_mode)
DeviceType)	EventList)GraphTransformObserver)	ShapeProp)CeilDivCleanDivFloorDivIdentityModularIndexing)make_symbolSymT)bound_sympyValueRanges)config)ceildivwin32_Tz.cubinz.spv)r9   r;         @      zmust be power of 2c                *    U [         -   S-
  [         * -  $ )z/Round up to the nearest multiple of ALIGN_BYTESr)   )ALIGN_BYTES)nbytess    rG   _alignrc      s    [ 1$44    c                   [        U [        R                  [        R                  45      (       a#  [	        [        [        U R                  5      5      $ [        U [        5      =(       d"    [        R                  " U [        5      [        :H  $ )z:v can be statically proven to be a multiple of ALIGN_BYTES)
isinstancesympyAddMaxallmap_is_alignedargsaligngcdra   )vs    rG   rl   rl      sT    !eii+,,3{AFF+,,aK599Q#<#KKrd   c                  4    \ rS rSrSrSrSr\SS j5       rSr	g)	rn      z<Symbolically round up to the nearest multiple of ALIGN_BYTESr)   Tc                    [        U[        [        R                  45      (       a  [	        [        U5      5      $ [        U5      (       a  U$ g N)rf   intrg   Integerrc   rl   )clsvalues     rG   eval
align.eval   s<    ec5==122#e*%%uL rd    N)ry   
sympy.ExprreturnzOptional[sympy.Expr])
__name__
__module____qualname____firstlineno____doc__nargs
is_integerclassmethodrz   __static_attributes__r|   rd   rG   rn   rn      s!    FEJ rd   rn   c                   U " 5         [         R                  R                  5         [         R                  " [	        S5      [         R                  SS9n[         R                  R                  SS9n[         R                  R                  SS9nUR                  5         [        S5       H  nUR                  5         U " 5         M     UR                  5         [         R                  R                  5         UR                  U5      S-  n[        S[	        X-  5      5      n[        S[	        X'-  5      5      n	[        U5       H
  nU " 5         M     [         R                  R                  [         R                  R                  R                  /S9 n
[        U	5       H  nUR                  5         U " 5         M     [         R                  R                  5         S	S	S	5        [        R!                  S
5        [        R!                  W
R#                  5       R%                  SSS95        ['        U
R)                  5        Vs/ s H7  nUR*                  [,        R                  :X  d  M#  UR.                  S:w  d  M5  UPM9     sn5      n[1        U5      U	-  S:w  a  [3        S[1        U5      U	5      e[1        U5      U	-  n['        [5        U5       VVs/ s H  u  pX-  S:w  d  M  UPM     snn5      nUR7                  5         UR#                  5       n[        R!                  S5        [        R!                  UR%                  SS95        [9        S U 5       5      S-  U	-  n[        R!                  SU5        U$ ! , (       d  f       GN= fs  snf s  snnf )a:  
Returns benchmark results by examining torch profiler events.
This could be more accurate as it doesn't count CPU side overhead.
However, this also requires manually excluding irrelevant event, e.g.
vectorized_elementwise_kernel which is used to fill L2 cache,
various CUDA events, etc, so could also be fragile.
g    Ar9   )dtypedeviceT)enable_timing   r)   )
activitiesNz
raw eventsself_device_time_total)sort_by	row_limitzContext Syncr   zYFailed to divide all profiling events into #repeat groups. #CUDA events: %d, #repeats: %szprofiling time breakdown)r   c              3  8   #    U  H  oR                   v   M     g 7fru   )device_time_total).0events     rG   	<genexpr>+do_bench_using_profiling.<locals>.<genexpr>   s     A=%%%=   g     @@zprofiling results: %s ms)r@   r9   synchronizeemptyrv   Eventrecordrangezero_elapsed_timemaxprofilerprofileProfilerActivityCUDAlogdebugkey_averagestablerL   eventsdevice_typerK   namerB   RuntimeError	enumerate_build_treesum)fnwarmuprepcachestart_event	end_event_estimate_msn_warmupn_repeatpir   filtered_eventsnum_event_per_groupactual_eventsress                    rG   do_bench_using_profilingr      s    D	JJKKJuyyHE **"""6K

  t 4I1X
  	JJ**959K 1c&./0H1c#+,-H 8_
  
		NN++00
 
  
 
xAKKMD	 ! 	

 
 IIlIIann$$-EQS$TU 	
#  JOO3 8=

n8T #	
O ?h&!+- 	
 	
 o.9 &o6	
6&!+ 6	
M !..0MII()IIm!!B!/0
A=A
AF
JX
UCII(#.J_
 
$	
	
s+   AM1<"N"N4N
N
N
1
N c                     SSK Jn   [        R                  R	                  SS5        U S L=(       a%    [        [        [        R                  SS 5      S5      $ ! [         a     g[         a  nS[        U5      ;   d   e S nAgS nAff = f)	Nr   )	roi_alignztorchvision::nmsMetatorchvisionr   Fztorchvision::nms does not exist)torchvision.opsr   r@   _C%_dispatch_has_kernel_for_dispatch_keyhasattrr?   opsImportErrorr   str)r   es     rG   has_torchvision_roi_alignr      s|    -667I6R$ 
EII}d3[*
 	
   0CF:::s   AA 
B$	B-BBc                t   U c   [         R                  " S5      R                  $ [        U [        5      (       a  [         R                  " U 5      n U R
                  S;  aY  U R                  cL  [        U R
                  5      n[         R                  " U R
                  UR                  R                  5       S9$ U $ )Ng        )cpumeta)index)
r@   tensorr   rf   r   typer   rI   Workercurrent_devicer   device_interfaces     rG   decode_devicer      s    ~||C '''&#f%{{/)fll.B3FKK@||FKK/?/F/F/U/U/WXXMrd   c                ~    [         R                  " [        R                  U [        R
                  R                  5      $ ru   )	functoolsreduceoperatormulrg   SOne)its    rG   sympy_productr   	  s#    HLL"eggkk::rd   c           	         [        U 5      [        U5      :X  d   e[        R                  " [        S [	        X5       5       5      5      $ )Nc              3  .   #    U  H  u  pX-  v   M     g 7fru   r|   )r   abs      rG   r   sympy_dot.<locals>.<genexpr>  s     >odaAEos   )rB   rg   expandr   zip)seq1seq2s     rG   	sympy_dotr     s6    t9D	!!!<<>c$o>>??rd   c                b    U  Vs0 s H  n[        U5      U_M     snR                  5       $ s  snf ru   )idvalues)r   rD   s     rG   uniquer     s+     !bBqE1Hb!((**!s   ,c           
        [        U [        R                  5      (       d  [        U[        R                  5      (       a4  [        [        R                  " U 5      [        R                  " U5      5      $ [        U [
        5      (       a  [        U[
        5      (       d$   U  S[        U 5       SU S[        U5       35       e[        X5      $ )Nz: , )rf   rg   ExprrO   sympifyrv   r   runtime_ceildiv)numerdenoms     rG   rY   rY     s     %$$
5%**(E(Eu}}U+U]]5-ABB eS!!j&<&< 'DK=5'DK=9< 5((rd   c                >   U c  g[        U 5      R                  S5      S   n0 SS_SS_SS	_S
S_SS_SS_SS	_SS_SS_SS_SS_SS_SS_SS_SS_S S!_S"S_S#S$S%S&.En[        UR                  5       5       H  nX2U'   M	     [	        U [         5      (       a  U $ S'X!    3$ )(Nz*i8.r   booli1
float8e4nvfp8e4nvfloat8e5fp8e5float8e4b15fp8e4b15float8e4b15x4
fp8e4b15x4float8_e4m3fnfloat8_e5m2float8_e8m0fnuu8float16fp16bfloat16bf16float32fp32float64fp64int8i8int16i16int32i32int64i64uint8u16u32u64)uint16uint32uint64*)r   splitlistr   rf   )key	dtype_strtysrp   s       rG   _type_ofr$  $  sA   
 {Cs#B'Ii 	G 	z	
 	 	 	w 	$ 	6 	F 	6 	6 	  	!" 	#$ 	%& 	'( -C2 #**,A  S#&&3@a/?,@@rd   c                Z    U  Vs/ s H  n[         R                  " U5      PM     sn$ s  snf )z
Gets the shape and stride of a tensor. For non-symbolic tensors, this is
trivial. But for symbolic tensors, we need to map from SymIntNode into
sympy.Expr.
)rg   r   )lstr   s     rG   convert_shape_to_inductorr'  J  s%     '**cEMM!c***s    (c                   SSK Jn  U  Vs/ s Hr  n[        U[        5      (       a  UOW[        U[        R
                  5      (       a  [        U5      O-UR                  R                  R                  R                  USS9PMt     sn$ s  snf )zn
Takes a list of shapes from Inductor and converts them into symints (or just
ints if all shapes are static).
r)   VN)hint)
virtualizedr*  rf   rv   rg   rw   graphsizevars	shape_envcreate_symintnode)r&  r*  r   s      rG   convert_shape_to_symintr1  U  s       A !S!!  a// AWW%%//AA!$AO   s   A9Bc                N    [        S U R                  R                   5       5      $ )z%
Does this op overload have aliasing
c              3  <   #    U  H  oR                   S Lv   M     g 7fru   )
alias_infor   r   s     rG   r   is_view.<locals>.<genexpr>p  s     F1EA||4'1Es   )any_schema	arguments)ops    rG   is_viewr;  l  s     F1E1EFFFrd   c                    gNFr|   )r   s    rG   <lambda>r>  u  s    rd   c                  ^ U R                   S:X  d  g[        U R                  [        R                  R
                  5      (       d  U R                  [        R                  L d  g[        [        R                  R
                  U R                  5      nU[        R                  L d  [        U5      (       a  [        U4S jU R                   5       5      $ [        R                  R                  UR                  ;   =(       d    T" U5      $ )z
Do all uses of this op have torch.Tag.pointwise or return True for optional `is_pointwise_fn`

Uses in views ops will follow the views uses
call_functionFc              3  <   >#    U  H  n[        UT5      v   M     g 7fru   )is_pointwise_use)r   uis_pointwise_fns     rG   r   #is_pointwise_use.<locals>.<genexpr>  s     KA#A77s   )r:  rf   targetr@   _ops
OpOverloadr   getitemr   r;  rj   usersTag	pointwisetags)userD  rF  s    ` rG   rB  rB  s  s     66_$3::uzz4455xGWGW9W%**''4F!!!WV__KKKK99&++-H1HHrd   	list[Any]c           	       ^^ [         R                  R                  5       m/ mSUU4S jjnTR                  " U /[	        [         R
                  X1U45      Q76 n[        U R                  R                  5      S:X  a3  [        U R                  R                  S   R                  5      S:X  a  U4nTR                  U5        [         R                  R                  0 T5      nUT4$ )Nc                `   > TR                  U 5        TR                  S[        T5       35      $ )Narg)appendplaceholderrB   )rR  g
graph_argss    rG   add_tensor_arg)gen_gm_and_inputs.<locals>.add_tensor_arg  s,    #}}s3z?"3455rd   r)   r   Tensor)rR  torch.Tensorr~   r(   )r@   fxGraphr@  r   rY  rB   r8  returnsr   r   outputr&   )rF  rm   kwargsrW  nodegmrU  rV  s         @@rG   gen_gm_and_inputsrb    s     	A%'J6 6 ??u||^F^LD 	FNN""#q(&&q)../8;wHHTN			b!	$Bz>rd   c                t    U S:X  a  g [        U 5      nUR                  5       (       a  UR                  5         g g Nr   )rI   rA   r   r   s     rG   r   r     s7    /7$$&&$$& 'rd   c                    [        U5        [        R                  " S5        [        R                  " 5       n[        U5       H  nU " U6 n[        U5        M     [        R                  " 5       nWc   eXt-
  $ )Ni9  )r   r@   manual_seedtimeperf_counterr   )modelexample_inputstimesr   t0r   resultt1s           rG   timedro    sk     	d				B5\'F  
			B7Nrd   c                    [         R                  " [        U5       Vs/ s H  n[        XX%5      PM     sn5      n[         R                  " U5      U-  n[        X-  S 5        UR                  5       $ s  snf )Nz.6f)r@   r   r   ro  medianprintitem)	ri  rj  rk  repeatbaseliner   r   timingstooks	            rG   print_performancerx    se     ll>CFmLmue	4mLG << 5(D	T_S!#99;	 	Ms   A3c                F   ^ [        X5      " 5       m[        XU4S j5        g)zKReplace obj.method() with a new method that returns a precomputed constant.c                    > T $ ru   r|   )rm  s   rG   r>  #precompute_method.<locals>.<lambda>  s    rd   N)r?   setattr)objmethodrm  s     @rG   precompute_methodr    s    S!#FC(rd   c                ,    U H  n[        X5        M     g)zFReplace methods with new methods that returns a precomputed constants.N)r  )r}  methodsr~  s      rG   precompute_methodsr    s    #& rd   c                8    [        X:  5      [        X:  5      -
  $ ru   )rv   )r   r   s     rG   cmpr    s    qu:AE
""rd   c                    [        U [        5      (       a  U /U-  $ [        U 5      S:X  a  [        U 5      " U S   /5      U-  $ U $ )Nr)   r   )rf   rv   rB   r   )rD   sizes     rG   pad_listliker    sD    !SsTz
1v{Aw!v%%Hrd   c                @    [        U 5      S:X  a  / $ SS jn[        XS9$ )Nr   c                    [        U [        5      (       a  U $ SSKJn  [        X5      (       d   eU R	                  5       $ )Nr)   )r7   )rf   r   	schedulerr7   get_name)elemr7   s     rG   	sort_functuple_sorted.<locals>.sort_func  s4    dC  K0$2222}}rd   r!  )r  r[   r~   r   )rB   sorted)rD   r  s     rG   tuple_sortedr    s$    
1v{	 !##rd   PRVT)	covariantc                  2    \ rS rSr\SS j5       rSS jrSrg)CachedMethodi  c                    g ru   r|   )r   s    rG   clear_cacheCachedMethod.clear_cache  s    ),rd   c                    g ru   r|   selfrm   r_  s      rG   __call__CachedMethod.__call__  s    rd   r|   N)r   r   r~   None)rm   P.argsr_  P.kwargsr~   r  )r   r   r   r   staticmethodr  r  r   r|   rd   rG   r  r    s    , ,Drd   r  c           	        ^ U R                   nSU S3mSU 0n[        SU ST ST S3R                  5       U5        [        R                  " U 5      " X! S3   5      nS
U4S	 jjnXCl        U$ )N___cacher   z        def zC_cache_on_self(self):
            try:
                return self.zy
            except AttributeError:
                pass
            rv = fn(self)
            object.__setattr__(self, "z%", rv)
            return rv
        _cache_on_selfc                B   > [        U T5      (       a  [        U T5        g g ru   )r   delattrr  r!  s    rG   r  "cache_on_self.<locals>.clear_cache  s    4D# rd   )r  r   r~   r  )r   execlstripr   wrapsr  )r   r   ctxwrapperr  r!  s        @rG   cache_on_selfr    s    ;;DtfF
C *CF  E "' (+e ,			 FH oob!#n&=">?G &Nrd   c           
        SSK Jn  [        U [        5      (       ay  [        R
                  " [        R                  U  Vs/ s H?  n[        US5      (       d  M  UR                  (       d  M)  UR                  R                  PMA     sn[        5       5      $ [        XR                  5      (       a  U R                  $ [        5       $ s  snf )Nr)   irr`  ) r  rf   r   r   r   r   or_r   r`  originsr   r0   )node_scheduler  r`  s      rG   aggregate_originsr    s     -&&LL *)D4( "-1YY "		!!)
 L
 	
 
M??	3	3$$$|s   C
C
+C
c                &   [        U 5      nUS:X  a~  U Vs/ s H\  nUR                  S:X  d  M  SUR                  ;   d  M'  UR                  S   c  M9  UR                  S   R                  R                  PM^     nn[        [        U5      5      nOUS:X  a  / nU H  nUR                  S:X  d  M  SUR                  ;   d  M'  UR                  S   S   n[        US   [        5      (       a  UR                  US   5        Mg  UR                  US   R                  5        M     [        [        U5      5      nO:US:X  a.  U Vs/ s H   o3R                  S:X  d  M  UR                  PM"     nnO[        eUnSR                  S	/U-   5      $ s  snf s  snf )
Noriginal_atenr@  r@   source_fn_stackr   r)   inductor_noder   fused)r  r:  r   _overloadpacketr   r  r   rf   r   rS  r   NotImplementedErrorjoin)r  descriptive_namesall_originsoriginsources	source_fns         rG   get_fused_kernel_namer  4  s    $M2KO+ &
%yyO+ B  6;;. B O,	 BFKK(88AA% 	 
 G,-	g	%!FyyO+0AV[[0P"KK(9:2>	ilC00NN9Q<0NN9Q<#8#89 " G,-	o	-&1
&1FYY/5QKFKKk 	 
 "!G88WI'((5
(
s"   F	F	 F	'F	FFc                  ^ [        U 5      nU Vs/ s H  o3R                  S:X  d  M  UPM     nn[        R                  " [        5      n[        R                  " [        5      nS m[        U5      (       a  [        S U 5       5      n[        U5      S:X  a^  US   R                  m[        TS5      (       d+  0 n[        TR                  5       H	  u  pXU
'   M     UTl        UR                  U4S jS9  U H  nSUR                  ;   aO  UR                  S   b?  [        UR                  S   R                  5      nXl   R!                  UR"                  5        S	UR                  ;   d  Mt  UR                  S	   S   R"                  nX\   R!                  UR"                  5        M     Tb  S
OSnUR$                   SU SSR'                  UR)                  5       5       SSR'                  UR)                  5       5       S3nUR$                   S3/n[+        UR-                  5       5       HA  u  nnUR!                  UR$                   SU SSR'                  [+        U5      5       35        MC     TbU  UR!                  UR$                   S35        U H1  n
UR!                  UR$                   SU
R/                  5        35        M3     USR'                  U5      4$ s  snf )Nr@  c              3  8   #    U  H  oR                   v   M     g 7fru   )r-  )r   ns     rG   r   &get_kernel_metadata.<locals>.<genexpr>g  s     "CNq77Nr   r)   r   )_inductor_kernel_metadata_node_to_idx_mapc                "   > TR                   U    $ ru   )r  )r  single_graphs    rG   r>  %get_kernel_metadata.<locals>.<lambda>q  s    lTTUVWrd   r  r  	from_nodezTopologically SortedUnsorted z Source Nodes: [r   z], Original ATen: []z" Source node to ATen node mapping:z   z => z Graph fragment:
)r  r:  collectionsdefaultdictr   rB   r   r-  r   r   nodesr  sortr   r   r  rS  r   commentr  keysr  itemsformat_node)r  r  r  r  inductor_nodesfrom_node_dictoriginal_aten_dictunique_graphsnode_to_idx_mapidxr  r`  r!  sort_strmetadatadetailed_metadataoriginal_noder  r  s                     @rG   get_kernel_metadatar  X  s    $M2K+6W;)):Vf;NW ,,T2N$006
 L
>""CN"CC}")!,22L<)TUU"$'(:(:;FC),A& <IXFW    dii'DIIo,F,Rdii0@@AC#**4995$))#))K(+00C&&tyy1  *6)A%zH??
1XJ&6tyyATATAV7W6X Y99%7%<%<%>?@	C  $OO,,NOP &~';';'= >u  s=/diiu6N5OP	
 !?   GOO#44D!EFA $$'8AMMO;L%MN  
 TYY0111g Xs
   KKc                   [        U 5      n [        U 5      nU (       ak  U R                  5       nUR                   HB  nU(       a  U" U5      (       a  M  XB;  d  M   UR	                  U5        U R                  U5        MD     U (       a  Mk  U$ )zJReturns the set of nodes whose values depend on those within initial_queue)r   r   rC   rJ  addrS  )initial_queueskip_filterdominated_setr`  users        rG   dominated_nodesr    sx    
 'M}-M
  "JJD{400(!!$'$$T*  - rd   c                B  ^^ SS K nSSKJm  SUU4S jjmUR                  5        Vs/ s H  nT" U5      (       d  M  UR                  PM      nnU  Vs/ s H  nT" U5      (       d  M  UR                  PM      nn[        UR                  " / UQUQ76 5      $ s  snf s  snf )Nr   r)   r  c                  > [        U TR                  5      (       a  T" U R                  5      $ [        U TR                  5      (       a  T" U R                  5      $ [        U TR                  5      =(       a    [        U TR
                  5      $ ru   )rf   	TensorBoxdata
StorageBoxr1   	Pointwise)r  r  is_unrealized_nodes    rG   r  *gather_origins.<locals>.is_unrealized_node  sd    a&&%aff--a''%aff--!RYY'GJq",,,GGrd   )r  r1   r~   r   )	itertoolsr  r  r   r  r   chain)	rm   r_  r  valkwarg_originsrR  arg_originsr  r  s	          @@rG   gather_originsr    s     H H -3MMOWOS?QRU?V[S[[OMW*.J$32DS2I;3;;$KJiooC{C]CDD XJs   BBB(Bc                ^   [        U [        R                  5      (       a  U R                  $ [        U [        R                  5      (       a)  SR                  [        [        U R                  5      5      $ [        U [        R                  5      (       a)  SR                  [        [        U R                  5      5      $ [        U [        [        [        [        45      (       aC  U R                  R                   SSR                  [        [        U R                  5      5       S3$ [!        U 5      $ )z
Normal sympy str is very slow, this is a lot faster.  The result are
somewhat worse, as it doesn't do as much simplification.  So don't
use this for final codegen.
z + z * (r   ))rf   rg   Symbolr   rh   r  rk   	sympy_strrm   MulrS   rP   rQ   rR   funcr   r   )exprs    rG   r   r     s     $%%yy$		""zz#i344$		""zz#i344$(HhGHH))$$%QtyyY		1J'K&LANNt9rd   c                    SSK Jn  [        R                  (       a9  [	        UR
                  SS 5      =n(       a  UR                  S:w  a  [        U 5      $ [        R                  " 5       $ )Nr)   r)  current_node
index_expr)
r,  r*  rX   compute_all_boundsr?   interpreterrF  rV   rW   unknown)r   r*  fx_nodes      rG   get_bounds_index_exprr    sN     	!!~tDDWDNNl*5!!""$$rd   c                    U S   S:H  $ )Nr   rr|   )prefixs    rG   prefix_is_reductionr    s    !9rd   c                D    U [         R                  :w  d   e[        XSSS9$ )1
Used to generate an integer-nonnegative symbol.
Tintegernonnegative)rU   SIZErT   )r  r  s     rG   sympy_index_symbol_with_prefixr    s'     TYY vDdCCrd   c                b    U =(       d    [         R                  =(       a    [         R                  $ ru   )rX   debug_index_assertsassert_indirect_indexing)checks    rG   generate_assertr    s    /V//TV5T5TTrd   c                D    U S   S:w  d   e[         R                  " U SSS9$ )r  r   sTr  )rg   r  r   s    rG   sympy_index_symbolr    s)     7c>> <<d==rd   c                          SS jn[         R                  " U 5      R                  UR                  5        VVs0 s H  u  p4X2" X45      _M     snn5      $ s  snnf )z
When the passed replacement symbol v is a string, it is converted to a symbol with name v that
have the same replaced expression integer and nonnegative properties.
c                    [        U [        R                  5      (       d   e[        U[        5      (       a*  [        R                  " UU R
                  U R                  S9$ U$ )Nr  )rf   rg   r   r   r  r   is_nonnegative)replacedreplacements     rG   	to_symbolsympy_subs.<locals>.to_symbol   sV     (EJJ////k3''<< ++$33  rd   )r#  r}   r$  zUnion[sympy.Expr, str]r~   sympy.Symbol)rg   r   xreplacer  )r  replacementsr%  krp   s        rG   
sympy_subsr+    sh    +A	 ==''(4(:(:(<=(<IaO	(<= =s   A
c                   [        U [        R                  5      =(       dd    [        U [        R                  5      =(       aC    [	        S [
        R                  " U R                  5       U R                  5       5       5       5      $ )Nc              3  8   #    U  H  n[        U5      v   M     g 7fru   is_symbolicr   rD   s     rG   r   is_symbolic.<locals>.<genexpr>  s     N(M1A(Mr   )	rf   r@   r$   rY  r7  r  r  r  stride)r   s    rG   r/  r/    sS    a& 1ell# 	ON	!((*(MNNrd   c                 &    [        S U  5       5      $ )Nc              3  8   #    U  H  n[        U5      v   M     g 7fru   r.  r5  s     rG   r   "any_is_symbolic.<locals>.<genexpr>  s     ,t!{1~~tr   r7  )rm   s    rG   any_is_symbolicr7    s    ,t,,,rd   c                V   SSK Jn  [        / SQ5      n[        R                  " 5       (       a  UR                  S5        U R                  R                   HQ  n[        UR                  5      U;   a  Us  $ UR                  R                  S5      =nc  M@  U" U5      (       d  MO  Us  $    g )Nr   )free_unbacked_symbols)z,aten._fused_moving_avg_obs_fq_helper.defaultz7aten._fused_moving_avg_obs_fq_helper_functional.defaultzfbgemm.dense_to_jagged.defaultz%fbgemm.jagged_to_padded_dense.defaultrun_and_save_rng_staterun_with_rng_statezaten._local_scalar_densezaten._assert_scalar)zaten._unsafe_index_put.defaultz0aten._unsafe_masked_index_put_accumulate.defaultzaten.index_put.defaultzaten.index_put_.defaultzaten.scatter.srczaten.scatter.reducezaten.scatter.value_reducezaten.scatter_add_zaten.scatter_add.defaultzaten.scatter_reduce.twozaten.scatter_reduce_.twozaten.scatter_reduce.two_outr  )%torch.fx.experimental.symbolic_shapesr9  r   r@   $are_deterministic_algorithms_enabledupdater-  r  r   rF  r   get)ra  r9  forbidden_setr`  r  s        rG   %get_first_incompatible_cudagraph_noderA    s     L	
M  1133	
" t{{},K99==''C49Ns9S9SK	 
 rd   c                    [        [        [        U R                  R                  5      5      5      nUR
                  S:X  d   eU$ )z$Get the output node from an FX graphr^  )nextiterreversedr-  r  r:  )ra  	last_nodes     rG   output_noderG  M  s6    T(288>>234I<<8###rd   _registered_cachesc                    [        U S5      (       a  [        U R                  5      (       d  [        U  S35      e[        R                  U 5        U $ )ze
Use this decorator to register any caches that should be cache_clear'd
with fresh_inductor_cache().
cache_clearz# does not have a cache_clear method)r   callablerJ  AttributeErrorrH  rS  r}  s    rG   clear_on_fresh_inductor_cacherN  W  sE    
 3&&hs.G.Gu$GHIIc"Jrd   c                 >    [          H  n U R                  5         M     g)z
Clear all registered caches.
N)rH  rJ  rM  s    rG   clear_inductor_cachesrP  c  s     " "rd   c                 Z   [        [        R                  R                  5       5       GH  n U R	                  S5      (       d  M  [        R                  U    nUR
                  R                  5        H  nUR	                  S5      (       d  M  [        X5      n[        U[        R                  R                  R                  R                  5      (       d  Me  UR                   H1  nUR                  R                  R                   R#                  5         M3     M     [        R                  U 	 GM     S[        R                  ;   aR  [        R                  S   n[%        UR&                  R(                  R*                  5      ?UR&                  R(                  ?[.        R0                  " 5         g )Nz&torch._inductor.runtime.compile_tasks.triton_ztriton.runtime.driver)r   sysmodulesr  
startswith__dict__r?   rf   r@   	_inductorruntimetriton_heuristicsCachingAutotunercompile_resultskernelrunmod__del__r   driveractiveutilsinstancegccollect)module_namem	attr_namer\  rm  r^  s         rG   unload_xpu_triton_pydsri  n  s&   CKK,,./%%&NOOKK$*I##I.. .EOO33EEVV  #)"8"8))--557 #9 + KK$ 0 #++-kk12""(()2JJ#JJLrd   c              #    ^#    [        5         [        R                  " US9m [        R                  R                  [        R                  ST05         [        R                  ST5        [        R                  R                  TS5      n[        R                  R                  [        R                  SU05         Sv   [        U [
        5      (       a  [        U 5      S:X  d   S5       e[        R                  R                  U5      (       a{  [        R                  " U5      nU R!                  U Vs0 s HH  nS	U;  d  M  U[        R                  R#                  [        R                  R                  X55      5      _MJ     sn5        SSS5        SSS5        U(       aU  [%        5       (       a-  [&        R(                  R+                  5       (       a
  [-        5         [.        R0                  " TU4S
 jS9  [        5         gs  snf ! , (       d  f       N= f! , (       d  f       N= f! [2         a    [        R5                  ST5        e f = f! [        5         f = f7f)z
Contextmanager that provides a clean tmp cachedir for inductor.

Optionally, pass a dict as 'cache_entries' to get a list of filenames and sizes
generated with this cache instance.
)dirTORCHINDUCTOR_CACHE_DIRzUsing inductor cache dir %stritonTRITON_CACHE_DIRNr   z!expected empty cache_entries dictz.lockc                .   > [         R                  STUS9$ )Nz*Failed to remove temporary cache dir at %s)exc_info)r   warning)r  pathrp  inductor_cache_dirs      rG   r>  &fresh_inductor_cache.<locals>.<lambda>  s    S[[@&% 6A 6rd   )onerrorz(on error, temporary cache dir kept at %s)rP  tempfilemkdtempr   patchdictosenvironr   r   rr  r  rf   rB   existslistdirr>  getsize
is_windowsr@   r;   rA   ri  shutilrmtree	Exceptionrq  )cache_entriesrk  deletetriton_cache_dirfilesfrs  s         @rG   fresh_inductor_cacher    s     !))c2& ZZ__JJ24FG
 II35GH!ww||,>I.@BR-STmT22}-2W4WW2ww~~&677 "

+; <%,, */).A#*!#3 !V277??277<<@P3T#U U). U
$ ||		 6 6 8 8&(MM"
 	3 UT
 
B  >@RS 	st   I0H' A'H:A9H3
H AH H	HA$H' 5I H
H	H
H$ H' '"I		I IIc           
     z    U R                   n[        [        U 5      5      n[        [	        [        X!SS95      5      $ )NT)r!  reverse)__getitem__r   rB   r   rE  r  )seqgettera_rs      rG   argsortr    s/    __F
C/C>?@@rd   c           	     D  ^  SU 4S jjn[        U5       VVs/ s H>  u  p4U[        U[        R                  5      (       a  UR                  R
                  OU4PM@     nnn[        U[        R                  " U5      S9nU VVs/ s H  u  p6UPM	     nnnU$ s  snnf s  snnf )Nc                ~   > U u  p#Uu  pESU4S jjnU" X5:  5      (       a  gU" X5:  5      (       a  gX$:  a  gX$:  a  gg)Nc                R   > [        U [        5      (       a  U $ TR                  U SS9$ )NT)size_oblivious)rf   r   evaluate_expr)r  r/  s    rG   evaluate*argsort_sym.<locals>.cmp.<locals>.evaluate  s+    $%%**4*EErd   r   r)   r   )r  z%Union[bool, torch.SymInt, sympy.Expr]r~   r   r|   )r   r   a_idxa_valb_idxb_valr  r/  s          rG   r  argsort_sym.<locals>.cmp  sN    	F
 EM""EM""
 ==rd   r  )r   tuple[int, sympy.Expr]r   r  r~   rv   )	r   rf   r@   r$   r`  r  r  r   
cmp_to_key)r/  r  r  r  r  exprsr   rm  s   `       rG   argsort_symr    s    4  n$FC 
Z5<<88affkka@$ 
  5i22378E %&fccF&M
 's   ABBc                r    U [         R                  :X  a  g[         R                  " SU S9R                  5       $ )Nr_   r|   r   )r@   r  r   element_sizer  s    rG   get_dtype_sizer    s-     ;;r'4466rd   c                       \ rS rSr% S\S'   Srg)LineContexti  r   contextr|   Nr   r   r   r   __annotations__r   r|   rd   rG   r  r    s    Lrd   r  c                  *    \ rS rSr% S\S'   S\S'   Srg)ValueWithLineMapi  r   ry   zlist[tuple[int, LineContext]]line_mapr|   Nr  r|   rd   rG   r  r    s    J++rd   r  c                      \ rS rSrSrSSS jjrSS jrSS jrSS jrSS jr	SS jr
SS	 jrSS
 jrSS jr    SS jrSSS jjrSS S jjrSS S jjr S!     S"S jjrS#S jrSS jrS$S jrSrg)%IndentedBufferi     c                    / U l         Xl        g ru   )_lines_indent)r  initial_indents     rG   __init__IndentedBuffer.__init__   s    GI%rd   c                   [        5       nSn/ nU R                   H  n[        U[        5      (       a  U" 5       nUc  M$  O5[        U[        5      (       a  UR                  X$R                  45        MX  Un[        U[        5      (       d   eUR                  U5        UR                  S5        USUR                  S5      -   -  nM     [        UR                  5       U5      $ )Nr)   r  )r
   r  rf   DeferredLineBaser  rS  r  r   writecountr  getvalue)r  bufr   linemaplilines         rG   getvaluewithlinemap"IndentedBuffer.getvaluewithlinemap  s    j13++B".//t<  B,,::/dC((((IIdOIIdOTZZ%%%A   88rd   c                6    U R                  5       R                  $ ru   )r  ry   r  s    rG   r  IndentedBuffer.getvalue  s    '')///rd   c                   [        5       nU R                   H  n[        U[        5      (       a  U" 5       nUc  M$  O[        U[        5      (       a  M<  Un[        U[
        5      (       d   eUR                  S5      (       a  UR                  US S 5        M  UR                  U5        UR                  S5        M     UR                  5       $ )N\r   r  )	r
   r  rf   r  r  r   endswithr  r  )r  r  r  r  s       rG   getrawvalueIndentedBuffer.getrawvalue  s    j++B".//t<  B,,dC((((}}T""		$s)$		$		$   ||~rd   c                8    U R                   R                  5         g ru   )r  clearr  s    rG   r  IndentedBuffer.clear/  s    rd   c                ,    [        U R                  5      $ ru   )r   r  r  s    rG   __bool__IndentedBuffer.__bool__2  s    DKK  rd   c                :    SU R                   U R                  -  -  $ )Nr  )r  tabwidthr  s    rG   r  IndentedBuffer.prefix5  s    dllT]]233rd   c                &    U R                  S5        g )Nr  	writeliner  s    rG   newlineIndentedBuffer.newline8  s    trd   c                   [        U[        5      (       a  U R                  R                  U5        g [        U[        5      (       a9  U R                  R                  UR                  U R                  5       5      5        g UR                  5       (       a.  U R                  R                  U R                  5        U 35        g U R                  R                  S5        g Nr  )rf   r  r  rS  r  with_prefixr  stripr  r  s     rG   r  IndentedBuffer.writeline;  s    dK((KKt$.//KKt//>?ZZ\\KK$++-78KKr"rd   c                8    U H  nU R                  U5        M     g ru   r  )r  linesr  s      rG   
writelinesIndentedBuffer.writelinesE  s     DNN4  rd   c                L   ^ ^ [         R                  SUU 4S jj5       nU" 5       $ )Nc               3     >#    T=R                   T -  sl          S v   T=R                   T -  sl         g ! T=R                   T -  sl         f = f7fru   r  )offsetr  s   rG   r  "IndentedBuffer.indent.<locals>.ctxL  s8     LLF"L'&&s   A4 AAAr~   Iterator[None])
contextlibcontextmanager)r  r  r  s   `` rG   indentIndentedBuffer.indentK  s$    		"	"	' 
#	' urd   c                .    U =R                   U-  sl         g ru   r  r  r  s     rG   	do_indentIndentedBuffer.do_indentV      rd   c                .    U =R                   U-  sl         g ru   r  r  s     rG   do_unindentIndentedBuffer.do_unindentY  r  rd   c           	        [        U[        5      (       a  [        S5      nUR                   HR  n[        U[        5      (       a  M  U(       d  M#  [        U[        U5      [        UR                  5       5      -
  5      nMT     [        R                  " U5      (       a  SnUR                   HV  n[        U[        5      (       a  U R                  R                  U5        M5  [        R                  X[        U5      S  5        MX     g [        R                  " U5      nU(       a  UR                  5       nU(       d  g UR                  5       nUR!                  S5       H  nU R                  U5        M     g )Ninfr   r  )rf   r  floatr  r  minrB   r  mathisinfrS  r  rv   textwrapdedentrstripr  )r  
other_coder  r  r  r  s         rG   spliceIndentedBuffer.splice\  s    j.115\F"))!$44 TS5G)GHF * zz&!!"))dK00KK&&t,",,TF3FG	 * "4J'..0
#**,J%%d+q! ,rd   c                    [        U R                  S9nU R                   Vs/ s H
  o1" U5      PM     snUl        U$ s  snf N)r  )r  r  r  )r  r  r   r  s       rG   rk   IndentedBuffer.mapu  s8    DLL9-1[[9[Td4j[9

 :s   =c                @    [        U 5       SU R                  5        S3$ )Nr  r  )r   r  r  s    rG   __repr__IndentedBuffer.__repr__z  s     t*Qt}}/q11rd   c                    U R                   UR                   :X  d   e[        U R                   S9nUR                  U R                  5        UR                  UR                  5        U$ r   )r  r  r  r  )r  otherr   s      rG   __add__IndentedBuffer.__add__}  sK    ||u}},,,DLL9t{{#u||$
rd   )r  r  Nr   )r  rv   r~   r  )r~   r  r~   r   r~   r  r~   r   )r  z)Union[LineContext, DeferredLineBase, str]r~   r  )r  z3Sequence[Union[LineContext, DeferredLineBase, str]]r~   r  rs   )r  rv   r~   'contextlib.AbstractContextManager[None])r  rv   r~   r  )F)r  zUnion[IndentedBuffer, str]r  r   r~   r  )r  zCallable[[Any], Any]r~   r  )r  r   r~   r  )r   r   r   r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rk   r  r  r   r|   rd   rG   r  r    s    H&9(0(!4#!H!	!	 EJ"4"=A"	"2
2rd   r  c                  6   ^  \ rS rSrSU 4S jjrSS jrSrU =r$ )FakeIndentedBufferi  c                "   > [         TU ]  5         g ru   )superr  )r  	__class__s    rG   r  FakeIndentedBuffer.__init__  s    rd   c                V    US:X  a  [         R                  X5      $ [        SU S35      e)Nr  zTried to call self.z on FakeIndentedBuffer. This bufferis currently used on TritonTemplateKernel to prevent actualwrites to the body without explicitly specifying the body with`TritonTemplateKernel.set_subgraph_body(name)`)object__getattribute__r   )r  r   s     rG   r  #FakeIndentedBuffer.__getattribute__  s9    ;**466!$ (= =
 	
rd   r|   r  )r   r   r~   r   )r   r   r   r   r  r  r   __classcell__r  s   @rG   r  r    s    
 
rd   r  c               #     #    [         R                  [         R                  p S v   Xs[         l        [         l        g ! Xs[         l        [         l        f = f7fru   )rS  stdoutstderr)initial_stdoutinitial_stderrs     rG   restore_stdout_stderrr    s9     %(ZZN@!/
CJ
CJs    A> AAAc                  h    \ rS rSrSrSS jrSS jrSS jrSS jrSS jr	SS jr
SS	 jrSS
 jrSrg)r  i  z.A line that can be 'unwritten' at a later timec                >    UR                  5       (       d  SnXl        g r  )r  r  r  s     rG   r  DeferredLineBase.__init__  s    zz||D	rd   c                    [         e)zJReturns either self.line or None to indicate the line has been 'unwritten'r  r  s    rG   r  DeferredLineBase.__call__      !!rd   c                    [         e)z3Returns a new deferred line with the same conditionr$  r  s     rG   	_new_lineDeferredLineBase._new_line  r&  rd   c                @    U R                  U U R                   35      $ ru   r(  r  )r  r  s     rG   r  DeferredLineBase.with_prefix  s    ~~455rd   c                T    U R                  U R                  R                  5       5      $ ru   )r(  r  r  r  s    rG   r  DeferredLineBase.lstrip  s    ~~dii..011rd   c                >    U R                  U R                  U   5      $ ru   r+  )r  r   s     rG   r  DeferredLineBase.__getitem__  s    ~~dii.//rd   c                ,    [        U R                  5      $ ru   )r   r  r  s    rG   r  DeferredLineBase.__bool__  s    DIIrd   c                ,    [        U R                  5      $ ru   )rB   r  r  s    rG   __len__DeferredLineBase.__len__  s    499~rd   )r  N)r  r   )r~   zUnion[str, None])r  r   r~   r   )r  r   r~   r   )r~   r   )r   zUnion[int, slice]r~   r   r  r~   rv   )r   r   r   r   r   r  r  r(  r  r  r  r  r4  r   r|   rd   rG   r  r    s-    8
""620rd   r  c                  D   ^  \ rS rSrSrSU 4S jjrSS jrS	S jrSrU =r	$ )
DelayReplaceLinei  z6At end of codegen call `line.replace(key, value_fn())`c                <   > [         TU ]  U5        Xl        X l        g ru   )r  r  r!  value_fn)r  r!  r:  r  r  s       rG   r  DelayReplaceLine.__init__  s     rd   c                j    U R                   R                  U R                  U R                  5       5      $ ru   )r  replacer!  r:  r  s    rG   r  DelayReplaceLine.__call__  s#    yy  4==?;;rd   c                D    [        U R                  U R                  U5      $ ru   )r8  r!  r:  r  s     rG   r(  DelayReplaceLine._new_line  s    $-->>rd   )r!  r:  )r!  r   r:  zCallable[[], str]r  r   r
  )r  r   r~   r8  )
r   r   r   r   r   r  r  r(  r   r  r  s   @rG   r8  r8    s    @!
<? ?rd   r8  c                   [        U [        R                  5      (       a  U nO[        R                  " [        5       U 5      n[        R
                  " U5      n[        R                  R                  (       aF  UR                  c   eUR                  S:  d  UR                  S:X  a  [        R                  S5        ggUR                  S:X  a  SOSnUR                  nXC:  a  [        R                  S	X4S
.S9  gg)N	   
   z6GPU arch does not support max_autotune_gemm mode usageFTr;   r\   D   z,Not enough SMs to use max_autotune_gemm mode)min_sms	avail_sms)extra)rf   r@   r   rH   r   createversionhipmajorr   rq  r   multi_processor_count)index_or_devicer   proprE  rF  s        rG   
is_big_gpurO    s    /5<<00 lno>""6*D }}zz%%%::>TZZ2-KKPQKK5(bbG**I:%> 	 	
 rd   c                 T    [         R                  R                  S5      R                  $ )Nr9   )r@   r9   get_device_propertiesrL  r|   rd   rG   get_max_num_smsrR    s    ::++F3IIIrd   c                 f    [         R                  R                  5       n [        5       U b  U -
  $ S-
  $ )zFHandle experimental carveout if set otherwise return hardware SM countr   )r@   r   _get_sm_carveout_experimentalrR  )carveouts    rG   get_num_smsrV    s1     xx557HH,@HHaHHrd   c                    SSK JnJn  UR                  S5      n[	        5       U -  [
        -  nU" UUUUR                  " 5       S9$ )zKBuilds and returns a WorkspaceArg for the device side TMA workspace buffer.r)   )r*   WorkspaceZeroModeF)r  	zero_moder   
outer_name)codegen.commonr*   rX  	from_boolrV  TMA_DESCRIPTOR_SIZEunique_name)num_tma_descriptorsr   r*   rX  rY  r  s         rG   get_tma_workspace_argr`    sM    
 @!++E2I=..1DDD++-	 rd   c                 ~    [         R                  =(       d'    [         R                  =(       d    [         R                  $ ru   )rX   max_autotunemax_autotune_gemmsearch_autotune_cacher|   rd   rG   use_max_autotunere    s&    Wv77W6;W;Wrd   c                    [        U R                  R                  5      =(       a+    U R                  U;   =(       a    [	        U R                  5      $ ru   )is_gpur   r   r   rO  )layoutallowed_layout_dtypess     rG   _use_template_for_gpurj    s>     	v}}!!" 	&LL11	&v}}%rd   c                    U R                  5       [        R                  R                  5       R                  S5       Vs/ s H  oR	                  5       PM     sn;   $ s  snf N,)upperrX   max_autotune_gemm_backendsr  r  backendrD   s     rG   _use_autotune_backendrr    P    ==?!<<BBDJJ3OOa	O      Ac                    U R                  5       [        R                  R                  5       R                  S5       Vs/ s H  oR	                  5       PM     sn;   $ s  snf rl  )rn  rX   max_autotune_conv_backendsr  r  rp  s     rG   _use_conv_autotune_backendrw    rs  rt  F)enable_int32enable_float8c                  SSK JnJn  [        R                  [        R
                  [        R                  /nU(       a>  [        R                  [        R
                  [        R                  [        R                  /nU(       a/  UR                  [        R                  [        R                  /5        [        U R                  R                  5      =(       a    [        X5      =(       d/    U R                  R                  S:H  =(       a    U R                  U;   =(       a@    [!        5       =(       a/    [#        S5      =(       a    U" U R                  UR$                  5      $ )Nr)   )BackendFeaturehas_backend_featurer   TRITON)r[  r{  r|  r@   r  r	  r  r  extendr  r  rg  r   r   rj  r   re  rr  TRITON_TEMPLATES)rh  rx  ry  r{  r|  layout_dtypess         rG   use_triton_templater  #  s     D]]ENNEMMBMu{{Se1153D3DEF v}}))* A)&@O ""e+M0M		P 		P "(+		P  ~/N/NOrd   c                    ^^ SSK Jn  SSKJm  SU4S jjm[        R
                  R                  =(       a#    U" 5       =(       a    [        U4S jU  5       5      $ )Nr   )has_triton_tma_devicer)   r)  c                  > [        U R                  5       5      S:w  a  gU R                  5       nU[        R                  [        R
                  4;  a  gU R                  5       nUR                  5       nUR                  5       (       d  U(       d  gUR                  S   nU(       a  UR                  S   nXAR                  -  nTR                  R                  R                  U[        5      $ )N   Fr)   r   )rB   get_size	get_dtyper@   r  r	  
get_layoutis_transposedis_contiguousr  itemsizer-  r.  statically_known_multiple_ofTMA_ALIGNMENT)rD   r   rh  
transposed	inner_diminner_bytesr*  s         rG   _is_tma_compatible3use_triton_tma_template.<locals>._is_tma_compatible@  s    qzz|!77))+
$$&&*KKN	AI..0ww<<[-XXrd   c              3  4   >#    U  H  nT" U5      v   M     g 7fru   r|   )r   rg  r  s     rG   r   *use_triton_tma_template.<locals>.<genexpr>V  s     8x!"1%%x   rD   r1   r~   r   )torch.utils._tritonr  r,  r*  rX   rm  enable_persistent_tma_matmulrj   )matricesr  r*  r  s     @@rG   use_triton_tma_templater  ;  sA    9Y( 	22 	9!#	98x88rd   c                $   SSK Jn  UR                  R                  R	                  X-  U-  SS9nUS::  d  U[
        R                  R                  :  a  gSSKJ	n  [        R                  R                  (       a  g[        R                  [        R                  [        R                  [        R                   /n[#        X5      =(       a    [%        5       =(       a    ['        S5      nU(       a"  U" 5       (       d  [(        R+                  S	5        gU$ )
Nr)   r)  r   fallbackr   F)try_import_cutlassCUTLASSzFailed to import CUTLASS lib. Please check whether _inductor.config.cuda.cutlass_dir is set correctly. Skipping CUTLASS backend for now.)r,  r*  r-  r.  	size_hintrX   r9   cutlass_backend_min_gemm_sizecodegen.cuda.cutlass_utilsr  r@   rI  rJ  r  r	  r  r  rj  re  rr  r   rq  )	rh  rg  r  r*  r*  	gemm_sizer  r  r   s	            rG   use_cutlass_templater  Z  s      **1519r*BIA~V[[%N%NN> }}]]ENNEMM5;;OMf4 	-	-!),  !##KK4
 Jrd   c                T    [         R                  R                  U 5      R                  $ ru   )r@   r9   rQ  gcnArchNamer   s    rG   _rocm_native_device_arch_namer  x  s    ::++F3???rd   c                      SS K n SSKJnJn  SSKJn  [        R                  R                  U R                  5      nXAX#4$ ! [         a    SS jnSS jn " S S5      nS n N&f = f)	Nr   )gen_ops_librarygen_ops_preselected)CKGemmOperationc                     / $ ru   r|   r|   rd   rG   r  *try_import_ck_lib.<locals>.gen_ops_library      Ird   c                     / $ ru   r|   r|   rd   rG   r  .try_import_ck_lib.<locals>.gen_ops_preselected  r  rd   c                      \ rS rSrSrg)*try_import_ck_lib.<locals>.CKGemmOperationi  r|   N)r   r   r   r   r   r|   rd   rG   r  r    s    rd   r  )r~   rO  )ck4inductor(ck4inductor.universal_gemm.gen_instancesr  r  ck4inductor.universal_gemm.opr  rz  rr  dirname__file__r   )r  r  r  r  package_dirnames        rG   try_import_ck_libr  }  sh    	
	
 ''//+*>*>? -@QQ  			 	 s   ;A  A$#A$c                   [        5       (       d  g[        R                  R                  (       d  gU R                  R
                  S:X  d  g[        U R                  5      n[        R                  R                   Vs0 s H  o"R                  S5      S   U_M     sn=(       d    UR                  S5      S   U0nUR                  5       [        R                  R                  -   Vs/ s H  nX2   PM	     nnU(       d  gU R                  [        R                  [        R                  [        R                   4;  a  g[#        5       u  n    nU(       d  [$        R'                  S5        g[        R(                  " 5       (       a  U[        R                  l        [        R                  R*                  (       d  [$        R'                  S5        gU[        R                  R*                  :w  a  [$        R'                  S5        ggs  snf s  snf )	NFr9   :r   z,Please pip install Composable Kernel packagez,Please set TORCHINDUCTOR_CK_DIR env variablezInvalid path to CK libraryT)re  r@   rI  rJ  r   r   r  rX   rocmarchr  r  ck_supported_archr   r  r	  r  r  r   rq  	is_fbcodeck_dir)rh  native_archr*  requested_archsrequested_supported_archsck_package_dirnamer   s          rG   use_ck_templater    s   ====' 0>K39;;3C3CD3Cawws|A)3CD #q!;IO
 !%%'&++*G*GG!GA 	G  ! %||EMM5>>5==II"3"51aBC/;;BCV[[///01= E!s   =H(Hc                    SSK Jn  [        S5      =(       a>    [        U 5      =(       a,    UR                  R
                  R                  X-  U-  SS9S:  $ )Nr)   r)  CKr   r  r   )r,  r*  rr  r  r-  r.  r  )rh  rg  r  r*  r*  s        rG   use_ck_gemm_templater    sP     	d# 	CF#	CGG&&quqy2&>Brd   c                <    [        S5      =(       a    [        U 5      $ )Nr  )rw  r  rh  s    rG   use_ck_conv_templater    s    %d+G0GGrd   c                V    [        5       =(       a    U R                  R                  S:H  $ rd  )re  r   r   r  s    rG   _use_template_for_cpur    s    =&--"4"4"==rd   c                    SSK Jn  [        UR                  U5      (       d   e[	        XUSS9=(       a    UR                  R                  5       $ )Nr)   )r2   F)require_constant_mat2)r  r2   rf   rh  use_cpp_gemm_templater  )rh  mat1mat2r2   s       rG   use_cpp_bmm_templater    sF     dkk6**** 	fDN 	(KK%%'rd   c                   SSK Jn  SSKJn  SSKJn	  SSKJn
  [        U 5      (       a  [        S5      (       d  g[        R                  R                  (       d  gUR                  5       [        R                  [        R                   4;   n[        R"                  [        R$                  [        R&                  [        R                  /nU
" UUU(       a  U R(                  OS UUS9u  ppp[+        X45      (       a  g[-        X'R.                  5      (       a  UR1                  5       nU	" UR                  5       5      u  nnU" S	UUUUR                  5       UR                  5       U[3        5       U(       + US
9
nSS jnU R(                  U;   =(       aT    US L=(       aI    U" U5      =(       a:    [-        X'R4                  5      =(       a    UR7                  5       =(       d    U(       + $ )Nr)   r  )create_micro_gemm)*get_gemm_template_output_and_compute_dtype)mm_argsCPPF)	out_dtypemat2_transposeduse_4x2_dim
micro_gemm)input_dtypeinput2_dtypeoutput_dtypenum_threadsuse_refq_group_sizec                N    U R                  5         U R                  5       S   S:H  $ )Nr   r)   )freeze_layout
get_striderD   s    rG   is_last_dim_stride12use_cpp_gemm_template.<locals>.is_last_dim_stride1  s"    	||~b!Q&&rd   r  )r  r  codegen.cpp_micro_gemmr  codegen.cpp_utilsr  kernel.mm_commonr  r  rr  rX   cppweight_prepackr  r@   r  r  r  r	  halfr   has_free_symbolsrf   BaseViewunwrap_viewparallel_num_threadsr  is_module_buffer)rh  r  r  r  r  is_woq_int4r  r  r  r  r  	int8_gemmr  rg  r  r*  r  r   r  r  s                       rG   r  r    s    9M) ((0Ee0L0L::$$ U[[%**$==I]]ENNEJJLM")"+&,,'#A!T $$$!@AQROL!"			NN$^^%!(*!J'
 	% 	Cd"	C%	C t]]+	C ""$A,A(Ard   c                 D    [        5       (       + =(       d    [        S5      $ )NATEN)re  rr  r|   rd   rG   use_aten_gemm_kernelsr  '  s    !!B%:6%BBrd   c                  b    \ rS rSr% \R
                  " S5      rS\S'   S
S jrS
S jr	SS jr
Srg	)DebugDirManageri+  r   r   prev_debug_namec                @    [        [        R                  5      U l        g ru   )rC  r  counterr   r  s    rG   r  DebugDirManager.__init__/  s    ../rd   c                    [         R                  R                  R                  U l        U R                   SU R
                   3U l        U R                  [         R                  R                  l        g )N_tmp_)r@   _dynamorX   debug_dir_rootr  r   new_namer  s    rG   	__enter__DebugDirManager.__enter__2  sM    $}}33BB//0dggY?.2mm+rd   c                    [         R                  " U R                  5        U R                  [        R
                  R                  l        g ru   )r  r  r  r  r@   r  rX   r  )r  rm   s     rG   __exit__DebugDirManager.__exit__7  s*    dmm$.2.B.B+rd   )r   r  r  Nr  )rm   r   r~   r  )r   r   r   r   r  r  r  r  r  r  r  r   r|   rd   rG   r  r  +  s&    ooa G0<
Crd   r  c                   ^ SSK Jn  / mSU4S jjn[        R                  R	                  USU5         [
        R                  R                  5         U " U0 UD6nS S S 5        UT4$ ! , (       d  f       WT4$ = f)Nr)   r-   c                (   > TR                  U 5        g ru   rS  codesource_codess    rG   save_output_code*run_and_get_code.<locals>.save_output_codeE      D!rd   r  r  r   r~   r  r-  r.   r   rx  r  r@   r  reset)r   rm   r_  r.   r  rm  r  s         @rG   run_and_get_coder  <  su    
 % L" 
		=*<>N	OT$V$ 
P < 
P	O <s   'A&&
A7c                    [        U /UQ70 UD6u  p4/ nU H8  nUR                  [        R                  " SU[        R                  5      5        M:     X54$ )Nz	'''.*?''')r  r~  refindallDOTALL)r   rm   r_  rm  r  kernelsr  s          rG   run_and_get_kernelsr  N  sO     ,B@@@FGrzz,bii@A ?rd   c                *   ^  SU 4S jjn[        U5      $ )Nc                 R   > T" 5       n U R                  5       R                  5         U $ ru   )r   backward)rm  r   s    rG   run_with_backward1run_fw_bw_and_get_code.<locals>.run_with_backwardY  s!    

rd   )r~   r   )r  )r   r   s   ` rG   run_fw_bw_and_get_coder"  X  s    
 -..rd   c                t  ^^ SSK Jn  / mSU4S jjmS	U4S jjn[        R                  R	                  USU5         [        R                  R	                  UST5         [
        R                  R                  5         U " U0 UD6nSSS5        SSS5        T$ ! , (       d  f       N= f! , (       d  f       T$ = f)
zLGet the inductor-generated code, but skip any actual compilation or running.r)   r-   c                (   > TR                  U 5        g ru   r  r  s    rG   r  "get_code.<locals>.save_output_codeg  r  rd   c                   >  " S S5      nU R                   (       a  U R                  5       OU R                  5       u  p#T" UR                  5        U(       a  T" UR                  5        U" 5       $ )Nc                  ,    \ rS rSrSrSS jrSS jrSrg)	@get_code.<locals>.patched_compile_to_module.<locals>.DummyModuleik  z4This is empty to replace the generated triton modulec                    g ru   r|   r  s    rG   r  Iget_code.<locals>.patched_compile_to_module.<locals>.DummyModule.__init__n  s    rd   c                    g ru   r|   r  s      rG   callEget_code.<locals>.patched_compile_to_module.<locals>.DummyModule.callq  s    rd   r|   Nr  rm   r   r_  r   r~   r  )r   r   r   r   r   r  r,  r   r|   rd   rG   DummyModuler(  k  s    Frd   r/  )cpp_wrappercodegen_with_cpp_wrappercodegenry   )r  r/  wrapper_codekernel_coder  s       rG   patched_compile_to_module+get_code.<locals>.patched_compile_to_modulej  s[    	 	 04/?/?D))+T\\^ 	"
 	++,[../}rd   compile_to_moduler  Nr  )r  r.   r~   r   r  )r   rm   r_  r.   r5  r   r  r  s         @@rG   get_coder8  a  s    $ L". 	

.0I	
 	

-);=MN	 	O	
  	ON	
 	
 s#   "B('BB(
B%	!B((
B7c                    [        U /UQ70 UD6nS[        U5      s=::  a  S::  d  O   S[        U5       35       eUS   $ Nr)   r  z%expected one or two code outputs got r   )r8  rB   )r   rm   r_  r  s       rG   get_triton_coder;    sQ    B000LL!&Q& 
/L0A/BC& ?rd   c                    [        U /UQ70 UD6u  p4S[        U5      s=::  a  S::  d  O   S[        U5       35       eUS   $ r:  )r  rB   )r   rm   r_  r   r  s        rG   run_and_get_triton_coder=    sS    &r;D;F;OAL!&Q& 
/L0A/BC& ?rd   c                   ^^^ SSK Jm  SSKJn  UR                  m/ mSUUU4S jjn[
        R                  R                  USU5         U " U0 UD6nS S S 5        UT4$ ! , (       d  f       WT4$ = f)Nr   r-   r5   c                 h   > T" U 0 UD6  U S   n[        UT5      (       d   eTR                  U5        g )Nr  )rf   rS  )rm   r_  r-  r.   graph_lowerings	real_inits      rG   	fake_init-run_and_get_graph_lowering.<locals>.fake_init  s:    4"6"Q%////u%rd   r  r.  )torch._inductor.graphr.   torch._inductor.output_coder6   r  r   rx  r  )	r   rm   r_  r6   rB  rm  r.   r@  rA  s	         @@@rG   run_and_get_graph_loweringrF    sv     4;((IO& & 
		?J		BT$V$ 
C ?"" 
C	B ?""s   		A
A/c              #     #    SSK Jn  UR                  U    n [        R                  " X5      UR                  U '   Sv   X2R                  U '   g! X2R                  U '   f = f7f)zs
Override the lowering of aten_op with override_fn.
The first argument of override_fn is the original lowering fn.
r   )loweringN)torch._inductorrH  	loweringsr   partial)aten_opoverride_fnrH  orig_fns       rG   override_loweringrO    sY      )  )G.&/&7&7&M7#&-7#g7#s   A"'A  A"AA"c                   ^ ^^ SSK Jn  UR                  mSUUU 4S jjn[        R                  R
                  R                  USU5      $ )zf
Add hook functions to be called at the beginning and end of Scheduler.__init__.
Used for unit tests.
r   )	Schedulerc                F   > T" X5        T" X5      nT(       a  T" X5        U$ ru   r|   )r  r  outrN  post_fnpre_fns      rG   r  (add_scheduler_init_hook.<locals>.wrapper  s%    y i'I%
rd   r  )r  r   r  r   r~   r   )torch._inductor.schedulerrQ  r  unittestr   rx  r  )rU  rT  rQ  r  rN  s   ``  @rG   add_scheduler_init_hookrY    s>     4  G  ==%%iWEErd   c                    [         R                  (       a  [        R                  U 5        g[        R	                  U 5        g)z
Warnings that will be actionable for PyTorch developers, but not
end users.  Allows us to easily disable them in stable releases but
keep them on for nightly builds.
N)rX   developer_warningsr   rq  info)msgs    rG   developer_warningr^    s$       Crd   c                     [         R                  R                  S5      n U S-   [        [         R                  5      :  aV  [        [         R                  U S-      5      S:  a3  [         R                  U S-      S   S:w  a  [         R                  U S-      $ [         R                   H)  nUR                  S5      (       d  M  U[        S5      S s  $    g! [         a     NJf = f)a  
An experimental API used only when config.benchmark_kernel is true.

The benchmark name is only available at codegen time. So we can not
directly call it in benchmark_all_kernels which is run after codegen.

The function assumes the argument after --only is the benchmark name.
It works for torchbench.py/hugginface.py/timm_models.py. But for ad-hoc
scripts, this function may return None.

There are 2 flavors of --only argument we need handle:
1. --only model_name
2. --only=model_name
z--onlyr)   r   -z--only=N)rS  argvr   rB   
ValueErrorrU  )r  rR  s     rG   get_benchmark_namerc    s    	hhnnX&!Gc#((m#CHHS1W%&*q!!$+88C!G$$ xx>>)$$s9~'((    s   BC 
C"!C"c                &    [        S U  5       5      $ )Nc              3  *   #    U  H	  oS :H  v   M     g7fr)   Nr|   r0  s     rG   r   is_ones.<locals>.<genexpr>	       %u!Avu   rj   r  s    rG   is_onesrl        %u%%%rd   c                &    [        S U  5       5      $ )Nc              3  *   #    U  H	  oS :H  v   M     g7f)r   Nr|   r0  s     rG   r   is_zeros.<locals>.<genexpr>  rh  ri  rj  rk  s    rG   is_zerosrq    rm  rd   c                &    [        S U  5       5      $ )Nc              3     #    U  HI  n[        U[        R                  5      (       d  M$  UR                  [        R                  " S 5      :H  v   MK     g7f)r   N)rf   r@   rY  r   )r   rs  s     rG   r    is_cpu_device.<locals>.<genexpr>  s9      DdELL) 	+u||E**s
   #A*Arj  )inputss    rG   is_cpu_devicerv    s       rd   c                    [        U [        R                  5      (       d   S5       eU R                  (       a  [        R
                  $ [        R                  $ )Nz8only support sympy.Expr as input to get_sympy_Expr_dtype)rf   rg   r   r   r@   r  r  )r  s    rG   get_sympy_Expr_dtyperx    s@    c5::&& B& ~~{{}}rd   c              /     #    U (       a.  [         R                  R                  " U0 UD6 nUv   S S S 5        g S v   g ! , (       d  f       g = f7fru   )r@   r   r   )should_profilerm   r_  r   s       rG   maybe_profiler{  "  s;     ^^##T4V4G 54 	 54s   (A=A
AAc                 p    [         R                  R                  n U S:  a  [        R                  " 5       n U $ Nr)   )rX   r  threadsr@   get_num_threads)r~  s    rG   r  r  +  s+    jj  G{'')Nrd   c                     SSK Jn   U " 5       nUR                  S[        R                  R
                  (       a  S5      $ S5      $ )Nr)   )get_backend_options
num_stagesr     )runtime.triton_helpersr  r?  r@   rI  rJ  )r  optionss     rG   get_backend_num_stagesr  2  s2    ;!#G;;|%--*;*;QCCCCrd   c                .   SSK JnJn  U [        R                  [        R
                  [        R                  4;   d   e[        R                  " U5      R                  R                  S5      (       a  SSKJn  U" 5       nU [        R                  [        R
                  4;   a  U" X5      $ [        R                  R                  R                  R                   (       a  U" [        R                  U5      $ U" [        R                  U5      $ U [        R                  [        R
                  4;   a  U" U 5      $ [        R                  R                  R                  R                   (       a  U" [        R                  5      $ U" [        R                  5      $ )Nr   )get_max_simd_tflopsget_max_tensorcore_tflops
clock_rate)max_clock_rate)triton.testingr  r  r@   r  r	  r  inspect	signature
parametersr?  torch._utils_internalr  backendsr9   matmul
allow_tf32)r   r  r  r  sm_clocks        rG   get_device_tflopsr  :  s   MU]]ENNEMMBBBB,-88<<\JJ8!#U]]ENN33,U==>>%%00,U]]HEE&u}}h??U]]ENN33,U33>>%%00,U]];;&u}}55rd   c                     SSK Jn   U " 5       $ )Nr   get_dram_gbps)r  r  r  s    rG   get_gpu_dram_gbpsr  V  s    ,?rd   c                 x    SSK Jn   U R                  R                  R	                  S5      R                  SS5      $ )Nr   r`  max_shared_mem)triton.runtimer`  ra  rb  rQ  r?  r  s    rG   get_gpu_shared_memoryr  ]  s.    %==44Q7;;<LaPPrd   c                $    U R                  S5      $ )Nwelford)rU  reduction_types    rG   is_welford_reductionr  c  s    $$Y//rd   c                4    [        U 5      (       a  gU S:X  a  gg)Nr  online_softmax_reducer  r)   )r  r  s    rG   reduction_num_outputsr  g  s    N++	2	2rd   c                 2    [         R                  " 5       S:H  $ )NLinux)platformsystemr|   rd   rG   is_linuxr  p  s    ??''rd   c                 (    [         R                  S:H  $ )NrZ   )rS  r  r|   rd   rG   r  r  t  s    <<7""rd   c                &    [        S U  5       5      $ )Nc              3     #    U  H7  n[        U[        R                  5      =(       a    UR                  (       + v   M9     g 7fru   )rf   rg   r   	is_numberr0  s     rG   r   #has_free_symbols.<locals>.<genexpr>y  s)     Jcz!UZZ(<_<cs   ?Ar6  )itrs    rG   r  r  x  s    JcJJJrd   c            	        SSK Jn  U  H  n[        X!R                  UR                  UR
                  UR                  UR                  45      (       aR  [        UR                  5       =(       d    S5      (       d'  [        UR                  5       =(       d    S5      (       a    gM  [        X!R                  5      (       d  M  [        S[        U5       35      e   g)Nr)   r  r|   Tzunexpected type for is_dynamic F)r  r  rf   r  r  r  ComputedBufferr/   r  maybe_get_sizemaybe_get_strider1   	TypeErrorr   )rm   r  ts      rG   
is_dynamicr  |  s    bmmR[[":K:KRYYW
 
   0 0 2 8b99=M""$*> > > Ayy))=d1gYGHH  rd   c                      \ rS rSrSrSrSrg)Placeholderi  KERNEL_NAMEDESCRIPTIVE_NAMEr|   N)r   r   r   r   r  r  r   r|   rd   rG   r  r    s      K *rd   r  c                x   SSK Jn  [        R                  " SSSS9 n[        R
                  " 5       n[        R
                  " 5       n[        U[        U5      S9R                  " U6   [        SUR                   3US	9  [        UR                  US	9  [        R                  " 5       n[        X5         U " UR                  5        S S S 5        [        R                  " 5       U-
  n	U" UR                  5        UR                  R                  5         UR                  5         [        S
UR                   3US	9  [        UR                  US	9  UR!                  5       UR!                  5       :H  n
["        R%                  SUUR&                  U
U	5        S S S 5        g ! , (       d  f       N= f! , (       d  f       g = f)Nr)   )stable_topological_sortwzutf-8F)modeencodingr  )ra  	fake_modezBefore:
)filezAfter:
zZ%s, save before/after graph to %s, graph before/after are the same = %s, time elapsed = %s)pattern_matcherr  rv  NamedTemporaryFileior
   rN   rJ   	propagaterr  r-  r	   nowrM   lint	recompiler  r   r\  r   )r  ra  inpr]  r  r  	before_ioafter_io
start_timetime_elapsedr  s              rG   pass_execution_and_saver    sH    9		$	$
 
KKM	;;=R#3C#89CCSI	"(($1-bhhY'\\^
#B,N -||~
2)


#!,bhhX& H$5$5$77hFF	
-
 
 -,
 
s%   BF+3FCF+
F(	$F++
F9c                    SSK Jn  [        XR                  5      =(       a     [        U R                  UR
                  5      $ )z:
Check if input buffer is a multi-outputs template buffer
r)   r  )r  r  rf   CppTemplateBufferrh  MultiOutputLayout	input_bufr  s     rG   is_multi_outputs_templater    s7     i!5!56 :"..< rd   c                    SSK Jn  [        XR                  5      =(       a7    [	        U R
                  5      S:H  =(       a    [        U R
                  S   5      $ )zD
Check if input buffer is a output of multi-outputs template buffer
r)   r  r   )r  r  rf   MultiOutputrB   ru  r  r  s     rG   #is_output_of_multi_outputs_templater    sJ      	9nn- 	;	  !Q&	;%i&6&6q&9:rd   c                V   U c  gSSK Jn  [        U 5      UR                  :H  =(       a    US L =(       d    U R                  UL =(       Gd`    [        U 5      UR
                  :H  =(       Ga@    [        [        R                  R                  S5      =(       a;    U R                  [        R                  R                  R                  R                  :H  =(       d    [        [        R                  R                  S5      =(       a;    U R                  [        R                  R                  R                  R                  :H  =(       df    [        [        R                  R                  S5      =(       a;    U R                  [        R                  R                  R                  R                  :H  $ )NFr)   r  all_to_all_singleall_gather_into_tensorreduce_scatter_tensor)r  r  r   _CollectiveKernelop_overloadFallbackKernelr   r@   r   torchrecr  defaultr  r  r`  r:  r  s      rG   is_collectiver    s;    | 	T
b***Ud
0Td>N>NRT>T  	T
b''' 	
 	

 		**,?@ U$$		(:(:(L(L(T(TT
 		**,DE E$$99%%<<DDE 		**,CD Y$$		(:(:(P(P(X(XX+rd   c                >    SSK Jn  [        U 5      UR                  :H  $ Nr)   r  )r  r  r   _WaitKernel)r`  r  s     rG   is_waitr    s    :''rd   c                    SSK Jn  [        X5      (       a  [        S U R                   5       5      $ [        U R                  5      $ )Nr   GroupedSchedulerNodec              3  8   #    U  H  n[        U5      v   M     g 7fru   )contains_collectiver0  s     rG   r   &contains_collective.<locals>.<genexpr>  s     @<a&q))<r   )rW  r  rf   r7  snodesr  r`  snoder  s     rG   r  r    s4    >%..@5<<@@@$$rd   c                    SSK Jn  [        X5      (       a  [        S U R                   5       5      $ [        U R                  5      $ )Nr   r  c              3  8   #    U  H  n[        U5      v   M     g 7fru   )contains_waitr0  s     rG   r    contains_wait.<locals>.<genexpr>  s     :\=##\r   )rW  r  rf   r7  r  r  r`  r  s     rG   r  r    s4    >%..:U\\:::uzz""rd   c                    SSK Jn  [        U[        R                  R
                  5      (       a  U/n[        XR                  5      =(       a    U R                  U;   $ r  )r  r  rf   r@   rG  rH  r  r  r  s      rG   is_fallback_opr    sF     "ejj++,,Td--.I43C3Cr3IIrd   c                @    X!U    R                   R                  5          $ ru   )defining_opr  )buf_namename_to_bufname_to_fused_nodes      rG   buf_name_to_fused_snoder    s!     (3??HHJKKrd   c                    gr=  r|   r  s    rG   r>  r>  *      urd   c           	         U" U 5      (       a  g UR                  U 5        U R                   H-  n[        UR                  X#5      nXa;   a  M   [	        UUUUUS9  M/     g )Ncriteria_cb)r  unmet_dependenciesr  r   find_recursive_deps_of_node)r  collected_node_setr  r  r  depdefining_op_for_deps          rG   r  r  %  sf     55!''5HHk
 4##	
 (rd   c                    gr=  r|   r  s    rG   r>  r>  C  r   rd   c           
        U" U 5      (       a  g UR                  U 5        U R                  5        H  nUR                   H  nUR                  c   eUR                  R	                  5       S:X  a  M2  UR                  R	                  5       U;  a  MR  X6R                  R	                  5          nXq;   a  Mu  [        UUUUUS9  M     M     g )NOUTPUTr  )r  get_outputsrJ  r`  r  find_recursive_users_of_node)r  r  r  r  r  or  user_ops           rG   r  r  >  s     55! GGD99(((yy!!#x/yy!!#+==(););)=>G,(""'  !rd   c                j    [         R                  R                  R                  (       a  SOSnX-
  U-
  $ )zaComputes the number of inputs to the aot fw graph which have fixed addresses (params and buffers)r  r   )r@   
_functorchrX   functionalize_rng_ops)dynamo_gm_num_inputsaot_fw_gm_num_inputsnum_rng_seed_offset_inputss      rG   num_fw_fixed_argumentsr  [  s3     $$::   69SSSrd   c                   SS jnSn/ nU R                   R                   H8  nUR                  S:X  d  M  U" U5      (       a  UR                  U5        US-  nM:     U[	        [        [        U5      5      5      :X  d   e[        U5      $ )z6
Infers which inputs are static for a backwards graph
c                    SU R                   ;  =(       a;    SU R                   ;  =(       a%    SU R                   ;  =(       a    SU R                   ;  $ )Ntangentsbwd_seedbwd_base_offsetbwd_rng_stater  r  s    rG   is_saved_tensor'count_tangents.<locals>.is_saved_tensork  sH    aff$ .!&&(.!/.  qvv-		
rd   r   rT  r)   )rD   r(   r~   r   )r-  r  r:  rS  r   r   rB   )fx_gr  	arg_countstatic_arg_idxsr  s        rG   count_tangentsr"  f  s    

 IOZZ44= q!!&&y1NI	  d5_)=#>????rd   c                  >    \ rS rSr% S\S'   SS jr\S	S j5       rSrg)
	BoxedBooli  r   ry   c                    U R                   $ ru   )ry   r  s    rG   r  BoxedBool.__bool__  s    zzrd   c                @    [        U [        5      (       a	  SU l        U $ gr=  )rf   r$  ry   rM  s    rG   disableBoxedBool.disable  s    c9%%CIJrd   r|   Nr  )r}  r   r~   zUnion[BoxedBool, bool])	r   r   r   r   r  r  r  r(  r   r|   rd   rG   r$  r$    s     K  rd   r$  c              #     ^ ^#    SSK Jn  UR                  m   S             SU U4S jjjn[        R                  R                  USU5         S v   S S S 5        g ! , (       d  f       g = f7f)Nr)   r+   c                :   > TR                  U5        T" XX#XE5      $ ru   r  )r  kernel_namer4  r  gpucpp_definitionkernel_listorig_define_kernels         rG   define_kernel.collect_defined_kernels.<locals>.define_kernel  s'     	;'!{c
 	
rd   r1  )NTN)r  r,   r,  r   r4  r   r  Optional[str]r-  r   r.  r3  r~   r   )codegen.wrapperr,   r1  r   rx  r  )r/  r,   r1  r0  s   `  @rG   collect_defined_kernelsr5    s     5-;; #'(,
"

 
  	

 
 &
 

 
 
		/-	P 
Q	P	Ps   AA2A!	A2!
A/+A2c                    U S-   $ )N__original__r|   r  s    rG    get_cloned_parameter_buffer_namer8    s    .  rd   c                    U [         ;   $ ru   )r>   r  s    rG   rg  rg    s    Yrd   c                    [        U 5      $ ru   )rg  r  s    rG   device_need_guardr;    s    &>rd   c                `   [         R                  " 5       (       aZ  U [        R                  :X  aF  [        R                  R                  5       (       a#  [        R                  R                  5       S:  a  gU [        [        R                  [        R                  [        R                  /5      ;   $ )N)rB  r   F)
rX   r  r@   r	  r9   rA   get_device_capabilityr   r  r   r  s    rG   ,needs_fallback_due_to_atomic_add_limitationsr>    sm    
 	U^^#JJ##%%JJ,,.&8
EKKU^^#LMMMrd   c                   U R                   [        R                  R                  R                  [        R                  R                  R
                  4;   a  Uc  gU R                   [        R                  R                  R                  :X  a  SOSnUS U4;  =(       Gd&    U=(       a    [        U5      =(       a    [        U5      =(       d    U R                   [        R                  R                  R                  :H  =(       ap    US:H  =(       ad    U=(       a[    US:H  =(       aO    [        R                  R                  =(       a.    [        R                  R                  =(       d    [        5       S:g  =(       dJ    X:H  =(       a#    U[        R                  [        R                  4;   =(       d    [        R                   " 5       $ )NFr  r   r   r)   )overloadpacketr@   r   atenscatter_reduce_scatter_reducescatter_rg  r>  rX   r  fallback_scatter_reduce_sumdynamic_threadsr  r   r  r=  )r  r  
self_dtype	src_dtypesrc_device_typesrc_is_tensor	reduce_tys          rG   use_scatter_fallbackrL    s]    	""IINN**EIINN,I,IJ	K" ++uyy~~/F/FFE 
 	tY// 	8 	8 H'H<YG		8 &&%))..*H*HH L%'LL  5(L 

66	L
 ++J/C/E/J	8 'SJ5::u{{:S,S	8 557!rd   c                   SSK JnJn  SSKJn  [        S[        U 5       S35        [        U 5       GH.  u  pE[        SUS S35        XRL a  [        S	5        M'  XQL a  [        S
5        M8  [        XS5      (       a  UR                  5       n[        U(       a  SOS S35        U(       a;  UR                  c   e[        SUR                  R                  R                   35        [        S5        UR                  R                   H  n[        U5        M     [        S5        UR                  R                   H  n[        U5        M     GM  [!        S[#        U5       35      e   g)z
An API that can be used in pdb to dump a node_schedule.
Right mainly dump the read/write dependencies but can add more as needed.
r   DisableReductionEnableReduction)SchedulerNodezNode schedule with z nodesr  3r  zenable reductionzdisable reductionredpwz scheduler nodeNzoriginal reduction hint zReadDep:z	WriteDep:zUnrecognized node type: )torch._inductor.codegen.simdrO  rP  rW  rQ  rr  rB   r   rf   is_reductionr`  r  reduction_hintread_writesreadswritesr   r   )r  rO  rP  rQ  r  r`  is_redr  s           rG   dump_node_scheduler\    s&   
 O7	M 236
:;}-	#al"$%%%&,,&&(FfU$/?@yy,,,01N1N0OPQ*''--c
 .+''..c
 / !9$t*FGG' .rd   c                z    SSK Jn  U" U R                  5       [        U R                  5      -  [
        -  S:H  5      $ )Nr   )statically_known_true)r<  r^  storage_offsetr  r   GPU_ALIGN_BYTES)r   r^  s     rG   tensor_is_alignedra  	  s:     L 				 >&,,#?	??RVWW rd   c                    [        U R                  R                  5      (       d  g[        R                  =(       d    [        U 5      $ r=  )rg  r   r   rX   assume_aligned_inputsra  )example_inputs    rG   should_assume_input_alignedre  	  s5     -&&++,,''K+<]+KKrd   c                    [         R                  R                  R                  5       n U (       d  [        R
                  " 5       $ U R                  R                  nU(       d  [        R
                  " 5       $ UR                  5       $ ru   )	r@   _guardsTracingContexttry_getr  nullcontextr  r/  suppress_guards)tracing_contextr/  s     rG   #maybe_get_suppress_shape_guards_ctxrm  	  sb    
 mm22::<O%%''  ))33I%%''$$&&rd   c                "   [         R                  R                  R                  [        SS5         [
        R                  R                  5         SS KnSS K	nUR                  " 5       nUR                  " U5      nSSKJn  UR                  U5        UR                  nUR!                  UR"                  5        U " U0 UD6n	UR%                  5       n
UR!                  U5        UR'                  U5        S S S 5        X4$ ! , (       d  f       W	W
4$ = f)Nr   Tr   )output_code_log)rX  r   rx  r  rX   r@   r  r  r  loggingr
   StreamHandlertorch._inductor.codecachero  
addHandlerlevelsetLevelDEBUGr  removeHandler)r   rm   r_  r  rp  log_capture_stringchro  
prev_levelrm  r  s              rG   run_and_get_cpp_coder{  .	  s     
			#	#FGT	:[[]""#56=""2&$**
  /T$V$'')  ,%%b) 
;  9! 
;	:  19s   CC==
Dc                    [        U 5      nUb  UR                  $ U  H:  n[        U[        R                  5      (       d  M$  UR
                  R                  s  $    g ru   )rJ   r/  rf   r@   r$   r`  )ru  r  inputs      rG   shape_env_from_inputsr~  G	  sR     (I """ eU\\**::''' 
 rd   c                >   ^ ^ [        T5      S:X  a  T $ SUU 4S jjnU$ )Nr   c                ,   > [        U T5        T" U 5      $ ru   )copy_misaligned_inputs)
new_inputsinputs_to_checkri  s    rG   r]  )align_inputs_from_check_idxs.<locals>.runb	  s    z?;Z  rd   )r  list[InputType]r~   r   )rB   )ri  r  r]  s   `` rG   align_inputs_from_check_idxsr  [	  s(     ?q ! ! Jrd   c                X   SU R                  5       ;   a  SnO;[        S [        U R                  5       U R                  5       5       5       5      S-   n[        R
                  " X4S5      R                  5       n[        R
                  " X R                  5       U R                  5       5      $ )Nr   c              3  6   #    U  H  u  pUS -
  U-  v   M     g7frf  r|   )r   shaper2  s      rG   r   )clone_preserve_strides.<locals>.<genexpr>o	  s     T:Sf$:Ss   r)   rs   )r  r   r   r2  r@   
as_stridedclone)rD   needed_sizebuffers      rG   clone_preserve_stridesr  i	  s    AFFH} T#affh
:STTWXX 	 a6<<>FFFFHahhj99rd   c                    U HS  nX   n[        U[        R                  5      (       d   eUR                  5       [        -  (       d  MF  [        U5      X'   MU     g ru   )rf   r@   rY  data_ptr	ALIGNMENTr  )r  check_inputs_idxsr   _inps       rG   r  r  u	  sI     }$----==?Y&&248JM	 rd   c                    / nU HV  nX   n[        U[        R                  5      (       d  M(  UR                  5       [        -  S:X  d  ME  UR                  U5        MX     [        U5      [        U5      :w  a  U$ U$ )zO
We require all inputs to be aligned, so introduce a copy for any
that aren't.
r   )rf   r@   rY  r  r  rS  rB   )ru  static_input_idxsaligned_static_input_idxsr  r}  s        rG   remove_unaligned_input_idxsr  	  sp     !# eU\\**0@90LQR/R%,,S1 ! $%->)??((rd   c                   SSK Jn  [        R                  " [        R                  5      R
                  nUR                  R                  R                  nUR                  R                  R                  R                  nUR                  R                  R                  X:*  5      (       a  gU" U 5      =(       a    U" U 5      U:*  $ )Nr)   r)  T)r,  r*  r@   iinfor  r   r-  r.  r  r/  has_hintis_expr_static_and_true)r   r*  int_maxr  r  s        rG   expr_fits_within_32bitr  	  s    kk%++&**G  **Iww))22H 	ww//==A;29Q<722rd   c                6  ^^^ [         R                  R                  R                  5       nUb  UR                  b  [        UR                  5      S:X  d   e[        U 5      mUR                  c   eUR                   H  nUc  UR                  R                  S 5        M#  Sm[         R                  R                  R                  5       =n(       a  UR                  mSUU4S jjmUR                  R                  [        U4S jU 5       5      5        M     g g g )Nr   Fc                r   > Tc  [        U 5      $ T(       a  TR                  U 5      $ TR                  U 5      $ ru   )rv   deserialize_symexprevaluate_symexpr)r   fakify_first_callr/  s    rG   map_expr4set_tracing_context_output_strides.<locals>.map_expr	  s7     ("1v((<<Q??$55a88rd   c              3  4   >#    U  H  nT" U5      v   M     g 7fru   r|   )r   r   r  s     rG   r   5set_tracing_context_output_strides.<locals>.<genexpr>	  s     5u!(1++ur  )r   r   r~   z,Union[float, int, SymInt, SymFloat, SymBool])
r@   rg  rh  ri  output_stridesrB   r~  rS  r  tuple)rj  compiled_graphr  r  r  r  r  r/  s        @@@rG   "set_tracing_context_output_stridesr  	  s     mm**224Gw55A7))*a///).9	,,888#22E}&&--d3$)!--66>>@@3@(+(=(=%9 9 &&--5u55 3	  Brd   c                 4   [         R                  b  [         R                  $ [         R                  " 5       (       d  g[        R                  R                  5       (       a  g SSKJn   U [        R                  R                  S5      :  $ ! [         a     gf = f)NFr   REMOTE_CACHE_VERSIONz.pytorch/remote_cache:fx_graph_memcache_version)
rX   fx_graph_remote_cacher  r@   _utils_internalis_fb_unit_testtorch._inductor.fb.remote_cacher  ModuleNotFoundErrorjustknobs_getval_intr  s    rG    should_use_remote_fx_graph_cacher  	  s    ##/+++,,..H  5#8#8#M#M8$    s   "B
 

BBc                2    [         R                  " SSU 5      $ )Nz[^a-zA-Z0-9_]r   )r  subr  s    rG   normalize_namer  	  s    66"C..rd   ztl.int1ztl.float8e4nvztl.float8e5ztl.float8e4b8ztl.float8e5b16ztl.uint8)ztl.boolztl.float8_e4m3fnztl.float8_e5m2ztl.float8_e4m3fnuzztl.float8_e5m2fnuzztl.float8_e8m0fnuz^.*[.]c                j    [         R                  S[        U 5      5      n[        R	                  X5      $ )z"Convert torch.dtype to triton typetl.)_triton_type_rer  r   _triton_type_mappingr?  )r   triton_type_names     rG   triton_typer  	  s+    &**5#e*=##$4GGrd   c                    [         R                  X 5      nUR                  SS5      n[        [        U5      n[        U[        R                  5      (       d   eU$ )Nr  r  )_torch_triton_mappingr?  r=  r?   r@   rf   r   )r   adjusted_type	type_namer  s       rG   triton_type_to_torchr  	  sM    )--e;M%%eR0Iy)Ii----rd   c                   U R                   (       + =(       a    U R                  5       UR                  5       :H  =(       a    U R                  5       UR                  5       :H  =(       a    U R                  UR                  :H  =(       a    U R                  UR                  :H  =(       ae    U R                  5       R                  5       UR                  5       R                  5       :H  =(       a!    U R                  5       UR                  5       :H  $ ru   )	is_mkldnnr  r2  r   r   untyped_storager  r_  r  ry   s     rG   is_same_tensorr  	  s    NN 	<IIK5::<'	<KKMU\\^+	< JJ%++%	< KK5<<'		<
   "++-1F1F1H1Q1Q1SS	< !U%9%9%;;rd   c                   U R                   =(       a    U R                  5       UR                  5       :H  =(       a    U R                  UR                  :H  =(       as    U R                  UR                  :H  =(       aS    [        R
                  R                  R                  U 5      [        R
                  R                  R                  U5      :H  $ ru   )r  r  r   r   r@   r   mkldnnr  r  s     rG   is_same_mkldnn_tensorr  	  s     	PIIK5::<'	PJJ%++%	P KK5<<'	P II%%d+uyy/?/?/H/H/OOrd   c                     g)N)r  isnanlogical_notlogical_andsignbitand_leltgegteqner  xorr|   r|   rd   rG   boolean_opsr  	
  s    rd   c                  *    \ rS rSr% S\S'   S\S'   Srg)OpDtypeRulei
  r%   type_promotion_kindOptional[torch.dtype]override_return_dtyper|   Nr  r|   rd   rG   r  r  
  s    8800rd   r  zdict[str, OpDtypeRule]op_dtype_propagation_rulesc                (    [        X5      [        U '   g ru   )r  r  )r   r  r  s      rG   #register_op_dtype_propagation_rulesr  &
  s    
 (3(t$rd   c                    [         R                  R                  (       a4  U [        R                  [        R
                  4;   a  [        R                  $ U $ )z"Maybe upcast [b]float16 to float32)rX   rm  codegen_upcast_to_fp32r@   r  r	  r  r  s    rG   upcast_compute_typer  0
  s3    }}++%--00}}Lrd   KeyTypeValTypec                  v    \ rS rSrSrSS jrSS jrSS jrSS jrSSS jjr	SS	 jr
SS
 jrSS jrSS jrSrg)
ScopedDicti=
  z
A dictionary-like object that allows for scoped updates. It maintains
an original dictionary and a set of new items that can override
the original items within the scope.  The original dictionary is
unmodified.
c                    Xl         0 U l        g ru   original_dict	new_items)r  r  s     rG   r  ScopedDict.__init__E
  s    *13rd   c                \    XR                   ;   a  U R                   U   $ U R                  U   $ ru   r  r  r  s     rG   r  ScopedDict.__getitem__I
  s,    .. >>#&&!!#&&rd   c                     X R                   U'   g ru   )r  )r  r!  ry   s      rG   __setitem__ScopedDict.__setitem__N
  s    #srd   c                H    XR                   ;   =(       d    XR                  ;   $ ru   r  r  s     rG   __contains__ScopedDict.__contains__Q
  s    nn$A/A/A(AArd   Nc                t    XR                   ;   a  U R                   U   $ U R                  R                  X5      $ ru   )r  r  r?  )r  r!  r  s      rG   r?  ScopedDict.getT
  s2    .. >>#&&!!%%c33rd   c                    [        U R                  5      nU R                   H  nX R                  ;  d  M  US-  nM     U$ r}  )rB   r  r  )r  r  r*  s      rG   r4  ScopedDict.__len__Y
  s<    ""#A***Q   rd   c              #     #    U R                    S h  vN   U R                   H  nXR                   ;  d  M  Uv   M     g  N-7fru   r  )r  r*  s     rG   __iter__ScopedDict.__iter__`
  s8     %%%%A***   	&s   AA  A
Ac                R    [        U R                  =(       d    U R                  5      $ ru   )r   r  r  r  s    rG   r  ScopedDict.__bool__f
  s    D&&8$..99rd   c                    [         eru   r$  r  s     rG   __delitem__ScopedDict.__delitem__i
  s    !!rd   r  )r  zMapping[KeyType, ValType])r!  r  r~   r  )r!  r  ry   r  r~   r  )r!  r  r~   r   ru   )r!  r  r  Optional[ValType]r~   r  r6  )r~   zIterator[KeyType]r  )r!  r  r~   r  )r   r   r   r   r   r  r  r  r  r?  r4  r  r  r  r   r|   rd   rG   r  r  =
  s5    4'
$B4
:"rd   r  )frozen_defaultfrozenc              .   ^ SU4S jjnU c  U$ U" U 5      $ )Nc                   > [         R                  S:  a  [        R                  " U STS9$ [        R                  " U TS9$ )N)r  rC  T)kw_onlyr  r  )rS  version_infodataclasses	dataclass)rx   r  s    rG   wrapir_dataclass.<locals>.wrapo
  s;    w&((d6JJ ((V<<rd   )rx   r[   r~   r[   r|   )rx   r  r  s    ` rG   ir_dataclassr  m
  s    = {9rd   c                     [         R                  R                  R                  5       n U b'  U R                  (       a  U R                  R
                  $ g ru   )r@   rg  rh  ri  fw_metadatabw_donated_idxs)rl  s    rG   get_donated_idxsr  |
  s=    mm22::<O"'B'B**:::rd   c                    SSK JnJn  SSKJn  U  Hc  nXSU4;  d  M  UR
                  c  M  UR
                  R                   Vs/ s H  nUR                  PM     snUR                  R                  U'   Me     g s  snf )Nr)   rN  r)  )
codegen.simd_kernel_featuresrO  rP  r,  r*  r`  r  r   r   ._inductor_triton_kernel_to_post_grad_node_info)r  r,  rO  rP  r*  r`  r  s          rG   'set_kernel_post_grad_provenance_tracingr  
  so     P)9::yy$ #'))"3"3W"3 KK"3WFF{S Ws   A9c                  (    \ rS rSrSrSrSrSrSrSr	g)	TritonAttrsDescriptorVersioni
  r   r)   r  r  r  r|   N)
r   r   r   r   V0_NO_TRITONV1_COMPILERV2_BACKENDSV3_BACKENDS_TUPLEV4_DICTr   r|   rd   rG   r  r  
  s     LKK	  Grd   r  c                 f   [         R                  R                  S5      c  [        R                  $ SS Kn SS Kn [        U R                  R                  S5      (       a  [        R                  $ [        U R                  R                  S5      (       a  [        R                  $ [        R                  $ )Nrm  r   AttrsDescriptor)	importlibutil	find_specr  r  triton.backends.compilertriton.compiler.compilerr   r  compilerr  r  r  )rm  s    rG   #get_triton_attrs_descriptor_versionr'  
  s    ~~)1+888##v''):;; ,777	))+<	=	=+777 ,333rd   c                 8    [        5       [        R                  :H  $ ru   )r'  r  r  r|   rd   rG   triton_version_uses_attrs_dictr)  
  s    .04P4X4XXXrd   r
  )rb   rv   r~   rv   )rp   r}   r~   r   )   d   )r   zCallable[[], Any]r   rv   r   rv   r~   r  r  )r   z"Union[Optional[torch.device], str]r~   torch.device)r   zIterable[sympy.Expr]r~   r}   )r   Sequence[sympy.Expr]r   r-  r~   r}   )r   zIterable[_T]r~   zValuesView[_T])r   Union[int, sympy.Expr]r   r.  r~   r.  )r!  r  r~   r   )r&  z"Iterable[Union[int, torch.SymInt]]r~   zlist[sympy.Expr])r&  z Iterable[Union[int, sympy.Expr]]r~   zlist[Union[int, torch.SymInt]])r:  torch._ops.OpOverloadr~   r   )rN  r(   rD  z'Callable[[torch._ops.OpOverload], bool]r~   r   )rF  r   rm   rO  r_  dict[str, Any]r~   z&tuple[GraphModule, list[torch.Tensor]])r9   )r   r   r~   r  )r)   r9   )
ri  Callable[..., Any]rj  Sequence[Any]rk  rv   r   r   r~   r  )r|   rC  rC  g      ?r9   )ri  r1  rj  r2  rk  rv   rt  rv   ru  r  r   r   r~   r  )r}  r   r~  r   r~   r  )r}  r   r  	list[str]r~   r  )r   rv   r   rv   r~   rv   )rD   zUnion[int, Sequence[int]]r  rv   r~   Sequence[int])rD   ztuple[_T, ...]r~   zlist[_T])r   z!Callable[Concatenate[Any, P], RV]r~   zCachedMethod[P, RV])r  0Union[Sequence[BaseSchedulerNode], ExternKernel]r~   zOrderedSet[Node])r  Sequence[BaseSchedulerNode]r  z8Literal[True, 'torch', 'original_aten', 'inductor_node']r~   r   )r  r5  r  r,   r~   ztuple[str, str]ru   )r  zIterable[torch.fx.Node]r  zOptional[Callable[[Any], bool]]r~   zOrderedSet[torch.fx.Node])rm   zSequence[IRNode]r_  zdict[str, IRNode]r~   zOrderedSet[IRNode])r  r}   r~   r   )r   r}   r~   zValueRanges[Any])r  r   r~   r   )r  rU   r  rv   r~   r'  )r  r   r~   r   )r   r   r~   r'  )r  r}   r)  zdict[sympy.Expr, Any]r~   r}   )r   r   r~   z,TypeGuard[Union[torch.SymInt, torch.Tensor]])rm   r   r~   r   )ra  torch.fx.GraphModuler~   zOptional[torch.fx.Node])ra  r7  r~   r(   )r}  r   r~   r   r  )NNT)r  zOptional[dict[str, Any]]rk  r3  r  r   r~   r  )r  r2  r~   	list[int])r/  r'   r  z.Sequence[Union[int, torch.SymInt, sympy.Expr]]r~   r8  )r   torch.dtyper~   rv   r  r	  )rM  zUnion[int, torch.device]r~   r   r6  )r_  rv   r   r,  r~   r*   )rh  r2   ri  zlist[torch.dtype]r~   r   )rq  r   r~   r   )rh  r2   rx  r   ry  r   r~   r   )r  r1   r~   r   )
rh  r2   rg  rv   r  rv   r*  rv   r~   r   )r   r   r~   r   )r~   zQtuple[Optional[str], Callable[[], list[Any]], Callable[[], list[Any]], type[Any]])rh  r2   r~   r   )rh  r2   r  zUnion[ReinterpretView, Buffer]r  r1   r~   r   )FTFN)rh  r2   r  r1   r  r1   r  r   r  r   r  r   r  zOptional[int]r~   r   )r   zCallable[P, _T]rm   r  r_  r  r~   ztuple[_T, list[str]])r   r1  rm   r   r_  r   r~   tuple[Any, list[str]])r   r1  r~   r:  )r   r1  rm   r   r_  r   r~   r3  )r   r1  rm   r   r_  r   r~   r   )r   r1  rm   r   r_  r   r~   ztuple[Any, list[GraphLowering]])rL  r1  rM  r1  r~   r  )rU  r1  rT  zOptional[Callable[..., Any]]r~   r   )r]  r   r~   r  )r~   r3  )r  r2  r~   r   )ru  zSequence[torch.Tensor]r~   r   )r  r}   r~   r9  )rz  r   rm   r   r_  r   r~   zIterator[Any])r  r   r~   r   )r  r   r~   rv   )r  zIterable[Any]r~   r   )
r  r1  ra  r&   r  r2  r]  r   r~   r  )r  z"Optional[Union[Buffer, Operation]]r~   r   )r`  z Optional[Union[Node, Operation]]r:  z!Optional[torch._ops.OperatorBase]r~   r   )r`  z"Optional[Union[IRNode, Operation]]r~   r   )r  r7   r~   r   )r`  zOptional[Operation]r:  z?Union[torch._ops.OpOverload, Collection[torch._ops.OpOverload]]r~   r   )r  r   r  r0  r  r0  r~   r   )r  r7   r  zMutableSet[BaseSchedulerNode]r  zdict[str, SchedulerBuffer]r  zdict[str, BaseSchedulerNode]r  zCallable[[Any], bool]r~   r  )r  rv   r  rv   r~   rv   )r  r7  r~   rv   )r/  r3  r~   r  )r   r   r~   r   )r   r3  r~   r   )r   r   r~   r   )r   r9  r~   r   )r  r/  r  r3  rG  r9  rH  r9  rI  r   rJ  r   r~   r   )r  r6  r~   r  )r   rZ  r~   r   )rd  rZ  r~   r   )r~   r  )r   r1  rm   r   r_  r   r~   ztuple[Any, str])ru  Sequence[InputType]r~   zOptional[ShapeEnv])ri   Callable[[list[InputType]], Any]r  r4  r~   r<  )rD   rZ  r~   rZ  )r  r  r  r4  r~   r  )ru  r;  r  r4  r~   r4  )r   r}   r~   r   )rj  r2  r  r6   r~   r  )r   r9  r~   r   )r   r   r~   r9  )r  rZ  ry   rZ  r~   r   )r~   ztuple[str, ...])r   r   r  r%   r  r  r~   r  )r   r9  r~   r9  )rx   zOptional[type[Any]]r  r   r~   r   )r~   zOptional[list[int]])r  r6  r,  r   r~   r  )r~   r  (6  
__future__r   r  r  r  enumr   r!  r  r  r  rp  r  r   rz  r  r  r  rS  rv  r  rg  rX  collections.abcr   r   r   r   r   r	   r
   typingr   r   r   r   r   r   r   r   r   r   r   typing_extensionsr   r   r   r   r   r   rg   r@   torch._inductor.runtime.hintsr   torch.utils._ordered_setr   torch.utils._pytreer   r   r    r!   r"   r#   r$   torch._prims_commonr%   torch.fxr&   r<  r'   torch.fx.noder(   r[  r*   r4  r,   r-  r.   r  r/   r0   r1   r2   r3   r4   output_coder6   r  r7   r8   r>   r<   	lru_cacherH   torch._dynamo.device_interfacerI   torch._dynamo.utilsrJ   torch.autogradrK   torch.autograd.profiler_utilrL   (torch.fx.passes.graph_transform_observerrM   torch.fx.passes.shape_proprN   torch.utils._sympy.functionsrO   rP   rQ   rR   rS   torch.utils._sympy.symbolrT   rU   torch.utils._sympy.value_rangesrV   rW   r  rX   runtime.runtime_utilsrY   r   _IS_WINDOWS	getLoggerr   r   r[   ry  r   	VarRangesrY  rv   	InputTypeGPU_KERNEL_BIN_EXTSr`  r  r  r]  ra   rc   rl   Functionrn   r   r   r   r   r   r   r$  r'  r1  r;  rB  rb  r   ro  rx  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r+  r/  r7  rA  rG  rH  r  rN  rP  rd  ri  r  r  r  r  r  r  r  r  r  r  r  r  r8  rO  rR  rV  r`  re  rj  rr  rw  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r"  r8  r;  r=  rF  rO  rY  r^  rc  rl  rq  rv  rx  r{  r  r  r  r  r  r  r  r  r  r  r  Enumr  r  r  r  r  r  r  r  r  r  r  r  r  r"  r$  r5  r8  rg  r;  r>  rL  r\  ra  re  rm  r{  r~  r  r  r  r  r  r  r  r  r  r  r  compiler  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r'  r)  )r*  rp   s   00rG   <module>r\     s   "        	     	  	  
     U U          : / - >>//C$>",5$TT,= #	CL
 T  D 0 % 2 K 0  8 D  = llg%!T]UZZ'(	U5<<ell:;<	'7 	 {Q'A-+2B XDX XB5
LENN  9<QQ#&Q25Q
Qh T ;@
+)!)*@))#AL+	+++	)#.G @OI	I<I 
I0 *8+0' ' 	!  	
 ( %'!  	
    )'#$  cNTT"E8WQU^ E:C*!).!)O!) 	!)H82C82!82 82z 48*0 (E
E$5EE&$%	DU	>2-,,,^ !# I "	 
2 .24 +4 	4  4  	4  4 nA!!L!!H Q7 7*  , , ,
F FR
 
 @ @ @?' ? T 8 J JI "+<	 -2%)BF	0>< T@ @ TR R:+\H>

8
@F
	
" ""&"&<<
< < 	<
  < <  < 
<~CC C"      	 $#&25/)X###&#25#$#* ...@.. .$ IMFF)EFF*	B&&   TD D T6 66 T Q0(#K(*$)) *!

!
"-!
4A!
HK!
	!
H1	" -1!
*!)! 
!H(%#J
JGJ 
JLL .LDRLL *=

5
 ,
 5	

 '
 

< *=5 , 5	
 ' 
:T 2     ,!N$&$!$ $ 	$
 $ $ 
$NH>L'"#&252(+" &	:994A9	9$ $3!3B	:&/ '#)* $	  +?*D*D*FG*F$!*FG  **Y'H	 T & 1 1 1
 68 2 7
8 1 
	 )

)
-" 01 -"` D)t   *.=@	499  T4 42Yo Hs   7f