a
    hq6                     @   s  d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ erdd dlmZ dd	 d
fedddZe jedd	 ddZe jedd	 d
dZeG dd dZeG dd dZG dd dZG dd dZd#ddZdd	 d dfddZdd  Zd!d" ZdS )$    N)deque)	dataclass)TYPE_CHECKINGprofile)
DeviceType)_KinetoEventc                 C   s   | j S N)childrenx r   C/var/www/auris/lib/python3.9/site-packages/torch/profiler/_utils.py<lambda>       r   F)reversec                 c   sP   |rt ndd }t|| }|rL||}|V  |||D ]}|| q:qd S )Nc                 S   s   | S r	   r   r   r   r   r   r      r   z_traverse.<locals>.<lambda>)reversedr   append)treenext_fnZchildren_fnr   order	remaining
curr_eventchild_eventr   r   r   	_traverse   s    r   c                 C   s   |   S r	   )popr   r   r   r   r      r   T)r   r   c                 C   s   |   S r	   )popleftr   r   r   r   r      r   c                   @   sJ   e Zd ZU dZeed< dZeed< dZeed< dZeed< e	dd Z
dS )	EventMetricsr   duration_time_nsself_time_nsidle_time_nsqueue_depthc                 C   s   | j dkrdS | j| j  S )Nr   g        )r   r    selfr   r   r   fraction_idle_time(   s    
zEventMetrics.fraction_idle_timeN)__name__
__module____qualname__r   int__annotations__r   r    r!   propertyr$   r   r   r   r   r   !   s   
r   c                   @   s*   e Zd ZU eed< eed< dZeed< dS )Intervalstartendr   r!   N)r%   r&   r'   r(   r)   r!   r   r   r   r   r+   /   s   
r+   c                   @   s>   e Zd Zdd Zdd Zdd Zdd Zee d	d
dZ	dS )EventKeyc                 C   s
   || _ d S r	   event)r#   r0   r   r   r   __init__7   s    zEventKey.__init__c                 C   s   t | jjS r	   )hashr0   idr"   r   r   r   __hash__:   s    zEventKey.__hash__c                 C   s   | j j|j jkS r	   )r0   r3   )r#   otherr   r   r   __eq__=   s    zEventKey.__eq__c                 C   s
   | j j S r	   )r0   namer"   r   r   r   __repr__@   s    zEventKey.__repr__)	intervalsc           	      C   s   d}t |dd d}|rTt| jj|d j}t| jj|d j}||k rT||| 7 }d\}}|t|k r|| }|| }|d7 }|j|jkr|j|jkr|d7 }q\n|j|_|}t| jj|j}t| jj|j}||k r\||| 7 }q\|S )Nr   c                 S   s   | j S r	   r,   r   r   r   r   r   E   r   z,EventKey.intervals_overlap.<locals>.<lambda>key)r      r=   )	sortedmaxr0   start_time_nsr,   minend_time_nsr-   len)	r#   r9   Zoverlap_timeZoverlap_startZoverlap_endijZprev_intervalZcurr_intervalr   r   r   intervals_overlapC   s.    zEventKey.intervals_overlapN)
r%   r&   r'   r1   r4   r6   r8   listr+   rF   r   r   r   r   r.   6   s
   r.   c                   @   sL   e Zd ZedddZdd Zdd Zdd	 Zd
d Zde	e
dddZdS )BasicEvaluation)profc                 C   sd   || _ i | _|   tdd | j D dd d| _dd | jD | _g | _|  | _	| 
  d S )Nc                 s   s   | ]
}|V  qd S r	   r   .0er   r   r   	<genexpr>j   r   z+BasicEvaluation.__init__.<locals>.<genexpr>c                 S   s   | j jS r	   )r0   r@   r   r   r   r   r   j   r   z*BasicEvaluation.__init__.<locals>.<lambda>r;   c                 S   s   g | ]
}|j qS r   r/   rJ   r   r   r   
<listcomp>l   r   z,BasicEvaluation.__init__.<locals>.<listcomp>)r   metricscompute_self_timer>   keysZ
event_keyseventscuda_eventscompute_queue_depthqueue_depth_listcompute_idle_time)r#   rI   r   r   r   r1   e   s    
zBasicEvaluation.__init__c                 C   s   | j jdusJ t| j j }|r| }|j}|jD ]}||j8 }|| q8t|| j	vsxJ d|j
 d|j t|d| j	t|< |j| j	t| _q dS )zM
        Computes event's self time(total time - time in child ops).
        NzDuplicate id: z, )r   )r   kineto_resultsr   Zexperimental_event_treer   r   r
   r   r.   rO   r3   r7   r   )r#   stackr   	self_timer   r   r   r   rP   q   s"    

z!BasicEvaluation.compute_self_timec                    s.  | j jdusJ | j j }dd dd tfdd|D dd	 d
}tfdd|D dd	 d
}t|| dd	 d
| _i }d}|D ]2 t| fdd	|d}|| < |dur|n|}qd}d}|| | j }	dd }
g }|	j|
d
 |	D ]6}t|drB| d }| |	  d }||v rB|| durB|| }t|dr|
 }|
 |  }||v r|| dur|| }nt|dr|j}|j}|t|k r|| 
 |kr|d7 }q|| d }t|d}t|dst|dr|t||| qt|dr|| jt| _q|S )z
        Computes queue_depth at each event. This will calculate the queue depth data for
        All the events in the tree.
        This will return a list of Interval of queue depth data of cuda launch and kernels.
        Nc                 S   s
   | j dkS )NZcudaLaunchKernel)r7   rL   r   r   r   is_cuda_launch_kernel   s    zBBasicEvaluation.compute_queue_depth.<locals>.is_cuda_launch_kernelc                 S   s   |   tjkod| j vS )NZmem)Zdevice_typer   ZCUDAr7   lowerrZ   r   r   r   is_cuda_kernel   s    z;BasicEvaluation.compute_queue_depth.<locals>.is_cuda_kernelc                 3   s   | ]} |r|V  qd S r	   r   rJ   )r[   r   r   rM      r   z6BasicEvaluation.compute_queue_depth.<locals>.<genexpr>c                 S   s   |   S r	   start_nsr   r   r   r   r      r   z5BasicEvaluation.compute_queue_depth.<locals>.<lambda>r;   c                 3   s   | ]} |r|V  qd S r	   r   rJ   )r]   r   r   rM      r   c                 S   s   |   S r	   r^   r   r   r   r   r      r   c                 S   s   |   S r	   r^   r   r   r   r   r      r   r   c                    s   |      kS r	   )Zlinked_correlation_idr   )cuda_launch_eventr   r   r      s   r:   c                 S   sD   t | dr|  d S t | dr(|  S t | dr8| jS tdd S )Nstart_us  r_   r@   zUnknown Event Type)hasattrrb   r_   r@   	Exceptionr/   r   r   r   new_old_event_comparator   s    


zEBasicEvaluation.compute_queue_depth.<locals>.new_old_event_comparatorrb   rc   r_   r@   r=   )r   rW   rR   r>   rS   index_of_first_matchsortrd   rb   Zduration_usr_   Zduration_nsr@   rB   rC   r?   r   r+   rO   r.   r!   )r#   Zcuda_event_listZcuda_launch_eventsZcuda_kernel_eventsZkernel_mappingZlast_mapped_kernelindexZcurrent_kernel_indexZspawned_kernel_indexZ
all_eventsrf   rU   r0   
start_timeZend_timeZcurrent_queue_depthr   )r`   r]   r[   r   rT      sz    
	






z#BasicEvaluation.compute_queue_depthc                 C   s   d}d}g }| j rP| jrP|t| jd j| j d jt| j d j| jd jg7 }| j D ]@}|jdkrr|sr|j}d}|jdkrV|rV|t||j d}qVdd | j	
 D }|D ]}t||| j	t| _qdS )z4
        Computes idle time of the profile.
        Fr   ra   Tc                 S   s   g | ]
}|j qS r   r/   rJ   r   r   r   rN      r   z5BasicEvaluation.compute_idle_time.<locals>.<listcomp>N)rU   rR   r+   r@   r,   r-   rB   r!   r   rO   rQ   r.   rF   r    )r#   ZidleZ
idle_startZidle_intervalsZ
data_point
event_listr0   r   r   r   rV      s,    
z!BasicEvaluation.compute_idle_timec                    s  ddl }ttj}dd |D }d d}g d}|t|k r||  krV|d7 }q4t|d t|D ]l}t| fdd|d	}t|||d
}	|	durh||	 |krht	||	 j
|| j
 |dur|n|} qqh|d7 }q4fddj D }
|
r|jfdd|
D |jd}|jfdd|
D |jd}||| || }||| || }|d|  }dd tt||
tdddD }
|
d| }
|
S )a  
        Filter and Rank the events based on some heuristics:
        1) Events that are in the falling phase of the queue depth.
        2) Events that have a high idle_time, self_time difference.

        Parameters:
            length: The number of events to return.
        r   Nc                 S   s   g | ]
}|j qS r   )r!   rJ   r   r   r   rN     r   z/BasicEvaluation.rank_events.<locals>.<listcomp>   r=   c                    s   |  kS r	   r   r   )bottom_threasholdr   r   r     r   z-BasicEvaluation.rank_events.<locals>.<lambda>r:   )r,   r-   c                    s   g | ]}|  r|qS r   )rF   rK   r0   )decrease_intervalr   r   rN   ,  s   
c                    s   g | ]} j | jqS r   )rO   r   rn   r"   r   r   rN   3  r   )Zdtypec                    s   g | ]} j | jqS r   )rO   r$   rn   r"   r   r   rN   7  r   g333333?c                 S   s   g | ]\}}|qS r   r   )rK   _r0   r   r   r   rN   ?  s   T)r<   r   )torchrG   r   rU   rC   rangerg   argmaxr   r+   r,   rO   rQ   Ztensorfloat32meanZstdr>   zipoperator
itemgetter)r#   lengthrq   rU   Z	qd_valuesZtop_threasholdrD   rE   Znext_minimum_idxZpeak_idxrk   rY   Z	idle_timeZnormalized_gainZnormalized_selfZheuristic_score_listr   )rm   ro   r#   r   rank_events  sb    

zBasicEvaluation.rank_eventsr=   T)ry   print_enablec                    sJ     |}|s|S |rdnd}|d fdd|D 7 }|rFt| |S )NzOptimizable events:
zNo events to optimize

c                    s@   g | ]8}d  d| dt |j d j| jd ddd  	qS )zP--------------------------------------------------------------------------------z
Event:                z
Source code location: z
Percentage idle time: d   z.2fz%
)source_code_locationr0   rO   r$   rn   r"   r   r   rN   Q  s   z:BasicEvaluation.get_optimizable_events.<locals>.<listcomp>)rz   joinprint)r#   ry   r{   rk   outputr   r"   r   get_optimizable_eventsJ  s    


z&BasicEvaluation.get_optimizable_eventsN)r=   T)r%   r&   r'   r   r1   rP   rT   rV   rz   r(   boolr   r   r   r   r   rH   d   s   ^IrH   c                 C   sD   |d u s|t | krt | }t||D ]}|| | r&|  S q&d S r	   )rC   rr   )seq	predicater,   r-   rD   r   r   r   rg   _  s    
rg   c                 C   s   | S r	   r   r   r   r   r   r   h  r   c                 C   s2   | || } t | dkrd S | t| |d| S )Nr   r;   )rC   ri   r?   )r   r<   r,   r-   r   r   r   rs   h  s    rs   c                 C   s0   | d ur,t d| j}|d u r&| j} q | jS dS )Nz
\.py\(.*\)zNo source code location found)researchr7   parent)r0   matchr   r   r   r~   o  s    r~   c                  C   s6   ddl m}  |   W d    n1 s(0    Y  d S )Nr   r   )torch.autograd.profilerr   r   r   r   r   _init_for_cuda_graphs}  s    r   )r   N)	functoolsrw   r   collectionsr   Zdataclassesr   typingr   r   r   Ztorch.profilerr   Ztorch.autogradr   r   r   partialZtraverse_dfsZtraverse_bfsr   r+   r.   rH   rg   rs   r~   r   r   r   r   r   <module>   s2   

. |
	