
    [Th#6                    .   % S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKJr  S SKJ	r	  S SK
JrJrJrJrJr  S SKJr  S SKJr  S SKJr  \(       a  S S	KJr  S qS qS q/ qS
\S'   / rS\S'   S qS q\R                   " S S5      5       r / q!S\S'   S q"S q#S q$S q%S5S jr&\ " S S5      5       r'S6S jr( " S S5      r)0 r*S\S'   \ " S S5      5       r+\+RY                  S/ SQ5        \+RY                  S/ SQ5        \+RY                  S / S!Q5        \+RY                  S"/ S#Q5        \+RY                  S$/ S%Q5        S7S& jr-S8S' jr.S9S( jr/      S:S) jr0S;S* jr1S8S+ jr2S<S, jr3S=S- jr4      S>S. jr5        S?S/ jr6S5S0 jr7S@S1 jr8\	SAS2 j5       r9SBS3 jr:SCS4 jr;g)D    )annotationsN)	dataclass)	lru_cache)CallablecastOptionalTYPE_CHECKINGUnion)config)get_benchmark_name)
OrderedSet)BaseSchedulerNodez#list[tuple[BaseSchedulerNode, int]]nodes_num_elemz%list[tuple[BaseSchedulerNode, float]]node_runtimesc                  .    \ rS rSr% S\S'   SrS\S'   Srg)CppOuterLoopFusedCount(   intinner_kernel_numberr   local_buffer_number N)__name__
__module____qualname____firstlineno____annotations__r   __static_attributes__r       O/var/www/auris/envauris/lib/python3.13/site-packages/torch/_inductor/metrics.pyr   r   (   s      r   r   zlist[CppOuterLoopFusedCount]!cpp_outer_loop_fused_inner_countsc                     Sq SqSq[        R	                  5         [
        R	                  5         SqSq[        R	                  5         Sq	Sq
SqSqg )Nr   )generated_kernel_countgenerated_cpp_vec_kernel_countnum_bytes_accessedr   clearr   ir_nodes_pre_fusioncpp_to_dtype_countr    num_comprehensive_padding)num_matches_for_scatter_upon_const_tensornum_loop_reorderingparallel_reduction_countr   r   r   resetr,   ;   s^     %&"%++- !01- r   c                  V    \ rS rSr% SrS\S'   S\S'   S\S'   S\S'   S\S'   S\S	'   S
rg)CachedMetricsDeltasU   zQ
The subset of metrics we want update across cache hits, e.g., the
FxGraphCache.
r   r"   r#   r&   r'   r$   r)   r   N)r   r   r   r   __doc__r   r   r   r   r   r.   r.   U   s-    
  $''/22r   r.   c                 t    [         R                  " [        5       V s/ s H  o R                  PM     sn $ s  sn f N)dataclassesfieldsr.   name)fields    r   get_metric_fieldsr7   d   s*    $/$6$67J$KL$K5JJ$KLLLs   5c                  @    \ rS rSrSrSS jrS	S jr\S
S j5       rSr	g)CachedMetricsHelperh   z
A helper class to help calculate and apply counter deltas for those
metrics we want to save with cache entries (e.g., FxGraphCache) and
apply on a cache hit.
c                h    0 U l         [        5        H  n[        5       U   U R                   U'   M     g r2   )cached_metricsr7   globals)selfmetrics     r   __init__CachedMetricsHelper.__init__o   s.     ')F*1)F*;D' *r   c                |    0 n[        5        H"  n[        5       U   U R                  U   -
  X'   M$     [        S0 UD6$ )Nr   )r7   r=   r<   r.   )r>   delta_metricsr?   s      r   
get_deltasCachedMetricsHelper.get_deltast   sC    ')F$+If$58K8KF8S$SM! * #3]33r   c                b    [        5        H!  n[        5       U==   [        X5      -  ss'   M#     g r2   )r7   r=   getattr)deltar?   s     r   apply_deltas CachedMetricsHelper.apply_deltas{   s&    ')FIf!77 *r   )r<   NreturnNone)rL   r.   )rH   r.   rL   rM   )
r   r   r   r   r0   r@   rD   staticmethodrI   r   r   r   r   r9   r9   h   s%    <
4 8 8r   r9   zdict[str, MetricTable]REGISTERED_METRIC_TABLESc                  |    \ rS rSr% S\S'   S\S'   SrS\S'       SS	 jrSS
 jrSS jrSS jr	\
SS j5       rSrg)MetricTable   str
table_name	list[str]column_namesr   r   num_rows_addedc                   U R                   [        5       ;  a  g U" 5       n[        U R                  5      [        U5      :X  d(   [        U R                  5       S[        U5       35       e[	        U R                  5      [	        UR                  5       5      :X  d6   [	        U R                  5       S[	        UR                  5       5       35       e[        5       nU/U R                   Vs/ s H  oBU   PM	     sn-   n[        S U 5       5      (       d   eU R                  [        [        [           U5      5        g s  snf )Nz v.s. c              3  B   #    U  H  n[        U[        5      v   M     g 7fr2   )
isinstancerS   ).0is     r   	<genexpr>&MetricTable.add_row.<locals>.<genexpr>   s     3s!:a%%ss   )rT   enabled_metric_tableslenrV   r   keysr   all
_write_rowr   listrS   )r>   row_fnrow_dictbncolumn_namerows         r   add_rowMetricTable.add_row   s    ??"7"9984$$%X6 	
4$$%&fS]O<	
6 $++,
8==?0KK 	
$++,-VJx}}4O3PQ	
K  !dt?P?PQ?P{+?PQQ3s33333T#Y,- Rs   +D>c                "    SU R                    S3$ )Nmetric_table_z.csv)rT   )r>   s    r   output_filenameMetricTable.output_filename   s    t/t44r   c                    U R                  5       n[        US5       n[        R                  " USS9nUR	                  S/U R
                  -   5        S S S 5        g ! , (       d  f       g = f)Nw
lineterminator
model_name)rn   opencsvwriterwriterowrV   )r>   filenamefdrx   s       r   write_headerMetricTable.write_header   sQ    '')(C BZZ48FOO\NT->->>? !  s   5A
A)c                   U R                  5       nU R                  S:X  a4  [        R                  R	                  U5      (       d  U R                  5         U =R                  S-  sl        [        U5       H+  u  p4[        U[        5      (       a  US nOUc  SnOUnXQU'   M-     [        US5       n[        R                  " USS9nUR                  U5        S S S 5        g ! , (       d  f       g = f)Nr      z.6f arr   rs   )rn   rW   ospathexistsr|   	enumeraterZ   floatrv   rw   rx   ry   )r>   ri   rz   idxorig_valnew_valr{   rx   s           r   rc   MetricTable._write_row   s    '')!#BGGNN8,D,Dq &s^MC(E**%cN!"H , (C BZZ48FOOC  !  s   0'C  
C.c                ,    [        X5      nU[        U '   g r2   )rQ   rO   )r5   rV   tables      r   register_tableMetricTable.register_table   s    D/). &r   r   N)re   z4Callable[[], dict[str, Optional[Union[str, float]]]]rL   rM   )rL   rS   rK   )ri   rU   rL   rM   )r5   rS   rV   rU   rL   rM   )r   r   r   r   r   rW   rj   rn   r|   rc   rN   r   r   r   r   r   rQ   rQ      sP    ONC.J.	.(5@!( / /r   rQ   slow_fusion)kernel1_pathkernel1_latencykernel2_pathkernel2_latencyfused_kernel_pathfused_kernel_latencyslow_down_ratiograph_stats)graph_idnum_nodes_before_fusionnum_nodes_after_fusionpersistent_red_perf)
kernel0_pathr   r   kernel3_pathkernel0_latencyr   r   kernel3_latency
size_hintsreduction_hint'fusion_failure_due_to_indexing_mismatch)pre_grad_graph_idpost_grad_graph_id
node1_name
node2_namenode1_debug_strnode2_debug_strcommon_buffer_namesfailure_reasonkernel_metadatakernel_namekernel_pathkernel_categoryr   r   line_of_codenum_load	num_storenum_for_loopnum_atomic_addnum_argsxnumelynumelrnumelkernel_args_num_gbc                    SSK Jn  SSKJn  UR	                  U 5      nU" U5      n[
        R                  " UR                  R                  5      $ )z
The kernel_module_code is the python module that contains kernel function code.
kernel function is the proper triton kernel function annotated with
@triton.jit
r   )PyCodeCache)get_triton_kernel)	codecacher   wrapper_benchmarkr   loadinspect	getsourcefn)kernel_module_coder   r   modkernels        r   _parse_kernel_fn_coder     s@     '4


-
.Cs#F VYY\\**r   c                4    [        U R                  5       5      $ )zB
Return the line of code for the kernel excluding the decorators.
)r`   
splitlines)proper_kernel_fn_codes    r   _parse_kernel_line_of_coder   &  s     $//122r   c                |    US:X  a  g [         R                  " SU 5      nU(       d   S5       eUR                  S5      $ )Nforeachzsize_hints=(\[[0-9, ]*\]),zsize_hints missing!r   researchgroup)r   r   ms      r   _parse_size_hintsr   -  s;    )#
		/1CDA###1771:r   c                |    U S;  a  g [         R                  " SU5      nU(       d   S5       eUR                  S5      $ )N)	reductionpersistent_reductionz$reduction_hint=ReductionHint\.(\w*),z/reduction_hint not found in kernel source code!r   r   )r   r   r   s      r   _parse_reduction_hintr   6  s>     CC
		9;MNA???1771:r   c                $    U R                  U5      $ r2   )count)r   patterns     r   _count_patternr   @  s     &&w//r   c                    U R                  5       S   nUR                  S5      (       d   eUR                  S5      nUR                  S5      nXS-   U nUR                  S5      n[	        U5      $ )Nr   def (z):r   ,)r   
startswithindexsplitr`   )r   def_line	start_idxend_idxdecl_csvcompss         r   _count_argsr   D  sn    $//1!4Hv&&&&s#InnT"GA0HNN3Eu:r   c                ,    U R                  S5      nXS $ )z
Skip decorators.
r   N)r   )kernel_fn_code	start_poss     r   _parse_proper_kernel_fn_coder   N  s     $$V,I*%%r   c                z    [         R                  " U S3U 5      nU(       a  [        UR                  S5      5      $ g )Nz
 = ([\d]+)r   )r   r   r   r   )r   numel_arg_namer   s      r   _parse_numelr   V  s3    
		^$K02GHA1771:r   c                v    [         R                  " SU 5      nU(       a  [        UR                  S5      5      $  g)zu
inductor meta looks like:
    inductor_meta={... 'mutated_arg_names': [], 'no_x_dim': False, 'kernel_num_gb': 2.0},
z.kernel_num_gb.:\s*([0-9.]+)r   N)r   r   r   r   )r   r   r   s      r   _parse_kernel_args_num_gbr   ^  s5     			1>BAQWWQZ  	 r   c           
        ^ ^^^^^^^	 SSK Jn  U" U5      m[        TU5      m[        UT5      m	[	        U5      m[        T5      m[        T5      m[        S5      R                  UUUU UUUU	4S j5        g)z
An utility to log kernel metadata. We may parse metadata from kernel source code here.

It's fine to parse the generated kernel code here since the logging is
disabled by default. It would hurt compilation time.
r   )"get_kernel_category_by_source_coder   c                    > TTT TTT[        TS5      [        TS5      [        TS5      [        TS5      [        T5      [        TS5      [        TS5      [        TS5      [        TT 5      S.$ )	Nztl.loadztl.storezfor ztl.atomic_addr   r   r   r   )r   r   r   r   )r   r   kernel_line_of_coder   r   r   r   r   s   r   <lambda>%log_kernel_metadata.<locals>.<lambda>  s}    &&.$,/&'<iH'(=zJ*+@&I,-BOT#$9:"#8(C"#8(C"#8(C";#
r   N)	r   r   r   r   r   r   r   get_metric_tablerj   )
r   r   r   r   r   r   r   r   r   r   s
   ``  @@@@@@r   log_kernel_metadatar   s  sp     F89KLO*?<NON"#5GJ*+=>N8H 55JK&'//	
 	
r   c                    [         R                  5        Ho  u  pU [        5       ;   d  M  UR                  5       n[        R
                  R                  U5      (       a  [        R                  " U5        UR                  5         Mq     g)z
Purge the old log file at the beginning when the benchmark script runs.
Should do it in the parent process rather than the child processes running
each individual model.
N)	rO   itemsr_   rn   r   r   r   unlinkr|   )r5   r   rz   s      r   purge_old_log_filesr     s^     0557(**,,.Hww~~h''		(#  8r   c                 4    [        [        R                  5      $ r2   )enabled_metric_tables_implr   r_   r   r   r   r_   r_     s    %f&B&BCCr   c                    [         [           " 5       nU R                  S5       HB  nUR                  5       nU(       d  M  U[        ;   d   SU S35       eUR                  U5        MD     U$ )Nr   zMetric table name z is not registered)r   rS   r   striprO   add)
config_strenabledr5   s      r   r   r     sl    oG  %zz|// 	
 &89	
/ 	D & Nr   c                    U [        5       ;   $ r2   )r_   r5   s    r   is_metric_table_enabledr    s    (***r   c                >    U [         ;   d   SU  S35       e[         U    $ )NzMetric table z is not defined)rO   r  s    r   r   r     s*    ++R}TF/-RR+#D))r   rK   )rL   rU   )r   rS   rL   rS   )r   rS   rL   r   )r   rS   r   rS   rL   Optional[str])r   rS   r   rS   rL   r  )r   rS   r   rS   rL   r   )r   rS   rL   rS   )r   rS   r   rS   rL   zOptional[int])r   rS   r   rS   rL   zOptional[float])r   rS   r   rS   r   rS   rL   rM   )rL   OrderedSet[str])r  rS   rL   r	  )r5   rS   rL   bool)r5   rS   rL   rQ   )<
__future__r   rw   r3   r   r   r   r   	functoolsr   typingr   r   r   r	   r
   torch._inductorr   torch._inductor.utilsr   torch.utils._ordered_setr   torch._inductor.schedulerr   r"   r#   r$   r   r   r   r&   r'   r   r    r(   r)   r*   r+   r,   r.   r7   r9   rO   rQ   r   r   r   r   r   r   r   r   r   r   r   r   r_   r   r  r   r   r   r   <module>r     s<   " 
   	 	 !  A A " 4 / ;  !"       8:4 9     ! ! ! CE !#? D ,- )   !4 3 3 3M8 82 46 0 5 :/ :/ :/z         "   -	   6+ 3.10&*-*))#&)<?)	)X!D 
 
+*r   