o
    Zh#6                     @  s:  U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZmZmZ d dlmZ d dlmZ d dlmZ erOd d	lmZ d ad ad ag ad
ed< g Zded< d ad aejG dd dZ g a!ded< d a"d a#d a$d a%dZddZ&eG dd dZ'd[ddZ(G dd dZ)i Z*ded< eG dd  d Z+e+,d!g d" e+,d#g d$ e+,d%g d& e+,d'g d( e+,d)g d* d\d-d.Z-d]d1d2Z.d^d5d6Z/d_d7d8Z0d`d:d;Z1d]d<d=Z2dad?d@Z3dbdCdDZ4dcdFdGZ5dddJdKZ6dZdLdMZ7dedOdPZ8e	dfdRdSZ9dgdVdWZ:dhdXdYZ;dS )i    )annotationsN)	dataclass)	lru_cache)CallablecastOptionalTYPE_CHECKINGUnion)config)get_benchmark_name)
OrderedSet)BaseSchedulerNodez#list[tuple[BaseSchedulerNode, int]]nodes_num_elemz%list[tuple[BaseSchedulerNode, float]]node_runtimesc                   @  s"   e Zd ZU ded< dZded< dS )CppOuterLoopFusedCountintZinner_kernel_numberr   local_buffer_numberN)__name__
__module____qualname____annotations__r    r   r   F/var/www/auris/lib/python3.10/site-packages/torch/_inductor/metrics.pyr   (   s   
 r   zlist[CppOuterLoopFusedCount]!cpp_outer_loop_fused_inner_countsreturnNonec                   C  s@   da dadat  t  dadat  da	da
dadad S )Nr   )generated_kernel_countgenerated_cpp_vec_kernel_countnum_bytes_accessedr   clearr   ir_nodes_pre_fusioncpp_to_dtype_countr   num_comprehensive_padding)num_matches_for_scatter_upon_const_tensornum_loop_reorderingparallel_reduction_countr   r   r   r   reset;   s   r&   c                   @  sB   e Zd ZU dZded< ded< ded< ded< ded< ded< d	S )
CachedMetricsDeltasz]
    The subset of metrics we want update across cache hits, e.g., the
    FxGraphCache.
    r   r   r   r    r!   r   r#   N)r   r   r   __doc__r   r   r   r   r   r'   U   s   
 r'   	list[str]c                   C  s   dd t tD S )Nc                 S  s   g | ]}|j qS r   name).0fieldr   r   r   
<listcomp>e   s    z%get_metric_fields.<locals>.<listcomp>)dataclassesfieldsr'   r   r   r   r   get_metric_fieldsd   s   r1   c                   @  s2   e Zd ZdZdddZdddZedd
dZdS )CachedMetricsHelperz
    A helper class to help calculate and apply counter deltas for those
    metrics we want to save with cache entries (e.g., FxGraphCache) and
    apply on a cache hit.
    r   r   c                 C  s&   i | _ t D ]
}t | | j |< qd S N)cached_metricsr1   globals)selfmetricr   r   r   __init__o   s   
zCachedMetricsHelper.__init__r'   c                 C  s6   i }t  D ]}t | | j|  ||< qtdi |S )Nr   )r1   r5   r4   r'   )r6   Zdelta_metricsr7   r   r   r   
get_deltast   s   
zCachedMetricsHelper.get_deltasdeltac                 C  s(   t  D ]}t |  t| |7  < qd S r3   )r1   r5   getattr)r:   r7   r   r   r   apply_deltas{   s   
z CachedMetricsHelper.apply_deltasNr   r   )r   r'   )r:   r'   r   r   )r   r   r   r(   r8   r9   staticmethodr<   r   r   r   r   r2   h   s    

r2   zdict[str, MetricTable]REGISTERED_METRIC_TABLESc                   @  s`   e Zd ZU ded< ded< dZded< dddZdddZdddZdddZe	dddZ
dS )MetricTablestr
table_namer)   column_namesr   r   num_rows_addedrow_fn4Callable[[], dict[str, Optional[Union[str, float]]]]r   r   c                   s   | j t vrd S |  t| jt ks"J t| j dt  t| jt  ks=J t| j dt   t }|g fdd| jD  }tdd |D sXJ | t	t
t | d S )Nz v.s. c                   s   g | ]} | qS r   r   )r,   Zcolumn_nameZrow_dictr   r   r.      s    z'MetricTable.add_row.<locals>.<listcomp>c                 s  s    | ]}t |tV  qd S r3   )
isinstancerA   )r,   ir   r   r   	<genexpr>   s    z&MetricTable.add_row.<locals>.<genexpr>)rB   enabled_metric_tableslenrC   r   keysr   all
_write_rowr   listrA   )r6   rE   Zbnrowr   rG   r   add_row   s   zMetricTable.add_rowc                 C  s   d| j  dS )NZmetric_table_z.csv)rB   )r6   r   r   r   output_filename   s   zMetricTable.output_filenamec                 C  sX   |   }t|d}tj|dd}|dg| j  W d    d S 1 s%w   Y  d S )Nw
lineterminatorZ
model_name)rS   opencsvwriterwriterowrC   )r6   filenamefdrZ   r   r   r   write_header   s
   "zMetricTable.write_headerrQ   c                 C  s   |   }| jdkrtj|s|   |  jd7  _t|D ]\}}t|tr,|d}n	|d u r3d}n|}|||< qt	|d}t
j|dd}|| W d    d S 1 sWw   Y  d S )Nr      z.6f arU   rV   )rS   rD   ospathexistsr^   	enumeraterH   floatrX   rY   rZ   r[   )r6   rQ   r\   idxZorig_valnew_valr]   rZ   r   r   r   rO      s   


"zMetricTable._write_rowr+   c                 C  s   t | |}|t| < d S r3   )r@   r?   )r+   rC   tabler   r   r   register_table   s   
zMetricTable.register_tableN)rE   rF   r   r   )r   rA   r=   )rQ   r)   r   r   )r+   rA   rC   r)   r   r   )r   r   r   r   rD   rR   rS   r^   rO   r>   rj   r   r   r   r   r@      s   
 



r@   Zslow_fusion)kernel1_pathkernel1_latencykernel2_pathkernel2_latencyZfused_kernel_pathZfused_kernel_latencyZslow_down_ratioZgraph_stats)Zgraph_idZnum_nodes_before_fusionZnum_nodes_after_fusionZpersistent_red_perf)
Zkernel0_pathrk   rm   Zkernel3_pathZkernel0_latencyrl   rn   Zkernel3_latency
size_hintsreduction_hintZ'fusion_failure_due_to_indexing_mismatch)Zpre_grad_graph_idZpost_grad_graph_idZ
node1_nameZ
node2_nameZnode1_debug_strZnode2_debug_strZcommon_buffer_namesZfailure_reasonkernel_metadatakernel_namekernel_pathkernel_categoryro   rp   Zline_of_codeZnum_loadZ	num_storeZnum_for_loopZnum_atomic_addnum_argsxnumelynumelrnumelZkernel_args_num_gbkernel_module_coderA   c                 C  s8   ddl m} ddlm} || }||}t|jjS )z
    The kernel_module_code is the python module that contains kernel function code.
    kernel function is the proper triton kernel function annotated with
    @triton.jit
    r_   )PyCodeCache)get_triton_kernel)Z	codecacher{   wrapper_benchmarkr|   loadinspect	getsourcefn)rz   r{   r|   modZkernelr   r   r   _parse_kernel_fn_code  s
   
r   proper_kernel_fn_coder   c                 C  s   t |  S )zJ
    Return the line of code for the kernel excluding the decorators.
    )rL   
splitlines)r   r   r   r   _parse_kernel_line_of_code&  s   r   ru   Optional[str]c                 C  s.   |dkrd S t d| }|sJ d|dS )NZforeachzsize_hints=(\[[0-9, ]*\]),zsize_hints missing!r_   researchgroup)rz   ru   mr   r   r   _parse_size_hints-  s
   
r   c                 C  s.   | dvrd S t d|}|sJ d|dS )N)Z	reductionZpersistent_reductionz$reduction_hint=ReductionHint\.(\w*),z/reduction_hint not found in kernel source code!r_   r   )ru   rz   r   r   r   r   _parse_reduction_hint6  s
   
r   patternc                 C  s
   |  |S r3   )count)r   r   r   r   r   _count_pattern@     
r   c                 C  sP   |   d }|dsJ |d}|d}||d | }|d}t|S )Nr   def (z):r_   ,)r   
startswithindexsplitrL   )r   Zdef_lineZ	start_idxZend_idxZdecl_csvcompsr   r   r   _count_argsD  s   


r   kernel_fn_codec                 C  s   |  d}| |d S )z
    Skip decorators.
    r   N)r   )r   	start_posr   r   r   _parse_proper_kernel_fn_codeN  s   
r   numel_arg_nameOptional[int]c                 C  s(   t | d| }|rt|dS d S )Nz
 = ([\d]+)r_   )r   r   r   r   )r   r   r   r   r   r   _parse_numelV  s   r   Optional[float]c                 C  s$   t d| }|rt|dS 	 dS )z
    inductor meta looks like:
        inductor_meta={... 'mutated_arg_names': [], 'no_x_dim': False, 'kernel_num_gb': 2.0},
    z.kernel_num_gb.:\s*([0-9.]+)r_   N)r   r   rf   r   )r   ru   r   r   r   r   _parse_kernel_args_num_gb^  s
   r   rs   rt   c              
     sh   ddl m} || t |t| t|tttd fdd dS )z
    An utility to log kernel metadata. We may parse metadata from kernel source code here.

    It's fine to parse the generated kernel code here since the logging is
    disabled by default. It would hurt compilation time.
    r_   )"get_kernel_category_by_source_coderq   c                     sX    t dt dt dt dttdtdtdt dS )	Nztl.loadztl.storezfor ztl.atomic_addrw   rx   ry   rr   )r   r   r   r   r   ru   r   Zkernel_line_of_coders   rt   r   rp   ro   r   r   <lambda>  s$   z%log_kernel_metadata.<locals>.<lambda>N)	r}   r   r   r   r   r   r   get_metric_tablerR   )rs   rt   rz   r   r   r   r   log_kernel_metadatas  s   	

r   c                  C  sF   t  D ]\} }| t v r | }tj|rt| |  qdS )z
    Purge the old log file at the beginning when the benchmark script runs.
    Should do it in the parent process rather than the child processes running
    each individual model.
    N)	r?   itemsrK   rS   rb   rc   rd   unlinkr^   )r+   ri   r\   r   r   r   purge_old_log_files  s   

r   OrderedSet[str]c                   C  s
   t tjS r3   )enabled_metric_tables_implr
   rK   r   r   r   r   rK     r   rK   
config_strc                 C  sN   t t  }| dD ]}| }|sq
|tv sJ d| d|| q
|S )Nr   zMetric table name z is not registered)r   rA   r   stripr?   add)r   enabledr+   r   r   r   r     s   


r   r+   boolc                 C  s
   | t  v S r3   )rK   r*   r   r   r   is_metric_table_enabled  r   r   c                 C  s    | t v sJ d|  dt |  S )NzMetric table z is not defined)r?   r*   r   r   r   r     s   r   r=   )r   r)   )rz   rA   r   rA   )r   rA   r   r   )rz   rA   ru   rA   r   r   )ru   rA   rz   rA   r   r   )r   rA   r   rA   r   r   )r   rA   r   rA   )r   rA   r   rA   r   r   )r   rA   ru   rA   r   r   )rs   rA   rt   rA   rz   rA   r   r   )r   r   )r   rA   r   r   )r+   rA   r   r   )r+   rA   r   r@   )<
__future__r   rY   r/   r   rb   r   r   	functoolsr   typingr   r   r   r   r	   Ztorch._inductorr
   Ztorch._inductor.utilsr   Ztorch.utils._ordered_setr   Ztorch._inductor.schedulerr   r   r   r   r   r   r   r    r!   r   r   r"   r#   r$   r%   r&   r'   r1   r2   r?   r@   rj   r   r   r   r   r   r   r   r   r   r   r   rK   r   r   r   r   r   r   r   <module>   s    


=



	








,

