
    JThq6                     b   S SK r S SKrS SKrS SKJr  S SKJr  S SKJr  S SK	J
r
  S SKJr  \(       a  S SKJr  S S	4S
\4S jjr\ R"                  " \S SS9r\ R"                  " \S S	S9r\ " S S5      5       r\ " S S5      5       r " S S5      r " S S5      rSS jrS S S4S jrS rS rg)    N)deque)	dataclass)TYPE_CHECKINGprofile)
DeviceType)_KinetoEventc                     U R                   $ N)childrenxs    M/var/www/auris/envauris/lib/python3.13/site-packages/torch/profiler/_utils.py<lambda>r      s    1::    Freversec              #      #    U(       a  [         OS n[        U" U 5      5      nU(       a<  U" U5      nUv   U" U" U5      5       H  nUR                  U5        M     U(       a  M;  g g 7f)Nc                     U $ r    r   s    r   r   _traverse.<locals>.<lambda>   s    qr   )reversedr   append)treenext_fnchildren_fnr   order	remaining
curr_eventchild_events           r   	_traverser       sX     H[EeDk"I
Y'
 Z!89K[) : )s   A"A(&A(c                 "    U R                  5       $ r   )popr   s    r   r   r      s
    aeegr   T)r   r   c                 "    U R                  5       $ r   )popleftr   s    r   r   r      s
    r   c                   ^    \ rS rSr% Sr\\S'   Sr\\S'   Sr\\S'   Sr	\\S'   \
S 5       rSrg	)
EventMetrics!   r   duration_time_nsself_time_nsidle_time_nsqueue_depthc                 V    U R                   S:X  a  gU R                  U R                   -  $ )Nr   g        )r(   r*   selfs    r   fraction_idle_timeEventMetrics.fraction_idle_time(   s*      A%  4#8#888r   r   N)__name__
__module____qualname____firstlineno__r(   int__annotations__r)   r*   r+   propertyr/   __static_attributes__r   r   r   r&   r&   !   s=    cL#L#K9 9r   r&   c                   8    \ rS rSr% \\S'   \\S'   Sr\\S'   Srg)Interval/   startendr   r+   r   N)r1   r2   r3   r4   r5   r6   r+   r8   r   r   r   r:   r:   /   s    J	HKr   r:   c                   @    \ rS rSrS rS rS rS rS\\	   4S jr
Srg	)
EventKey6   c                     Xl         g r   event)r.   rC   s     r   __init__EventKey.__init__7   s    
r   c                 @    [        U R                  R                  5      $ r   )hashrC   idr-   s    r   __hash__EventKey.__hash__:   s    DJJMM""r   c                 \    U R                   R                  UR                   R                  :H  $ r   )rC   rH   )r.   others     r   __eq__EventKey.__eq__=   s    zz}}..r   c                 0    U R                   R                   $ r   )rC   namer-   s    r   __repr__EventKey.__repr__@   s    **//"#r   	intervalsc                    Sn[        US S9nU(       af  [        U R                  R                  US   R                  5      n[        U R                  R                  US   R                  5      nX4:  a  X$U-
  -  nSu  pVU[        U5      :  a  X   nX   nUS-  nUR                  UR                  :  a4  UR                  UR                  :  a  US-  nMW  UR                  Ul        Un[        U R                  R                  UR                  5      n[        U R                  R                  UR                  5      nX4:  a  X$U-
  -  nU[        U5      :  a  M  U$ )Nr   c                     U R                   $ r   r<   r   s    r   r   ,EventKey.intervals_overlap.<locals>.<lambda>E   s    AGGr   key)r      rZ   )	sortedmaxrC   start_time_nsr<   minend_time_nsr=   len)	r.   rS   overlap_timeoverlap_startoverlap_endijprev_intervalcurr_intervals	            r   intervals_overlapEventKey.intervals_overlapC   s<   9*;<	

 8 8)A,:L:LMMdjj44il6F6FGK*m ;;#i. %LM%LMFA  =#6#66 $$}'8'88FA*7*;*;M'A

 8 8-:M:MNMdjj44m6G6GHK*m ;;! #i. $ r   rB   N)r1   r2   r3   r4   rD   rI   rM   rQ   listr:   rh   r8   r   r   r   r?   r?   6   s&    #/$4> r   r?   c                   P    \ rS rSrS\4S jrS rS rS rS r	SS\
S	\4S
 jjrSrg)BasicEvaluationd   profc                 T   Xl         0 U l        U R                  5         [        S U R                  R	                  5        5       S S9U l        U R
                   Vs/ s H  o"R                  PM     snU l        / U l        U R                  5       U l
        U R                  5         g s  snf )Nc              3   $   #    U  H  ov   M     g 7fr   r   ).0es     r   	<genexpr>+BasicEvaluation.__init__.<locals>.<genexpr>j   s     ,+1Q+s   c                 .    U R                   R                  $ r   )rC   r]   r   s    r   r   *BasicEvaluation.__init__.<locals>.<lambda>j   s    AGG<Q<Qr   rX   )r   metricscompute_self_timer[   keys
event_keysrC   eventscuda_eventscompute_queue_depthqueue_depth_listcompute_idle_time)r.   rn   rr   s      r   rD   BasicEvaluation.__init__e   s    57  ,))+,2Q
 )-81ww8/1 $ 8 8 :  9s   B%c                 N   U R                   R                  c   e[        U R                   R                  R                  5       5      nU(       a  UR	                  5       nUR
                  nUR                   H"  nX4R
                  -  nUR                  U5        M$     [        U5      U R                  ;  d!   SUR                   SUR                   35       e[        US9U R                  [        U5      '   UR
                  U R                  [        U5         l        U(       a  M  gg)z=
Computes event's self time(total time - time in child ops).
NzDuplicate id: z, )r)   )r   kineto_resultsr   experimental_event_treer"   r(   r   r   r?   rw   rH   rP   r&   )r.   stackr   	self_timer   s        r   rx   !BasicEvaluation.compute_self_timeq   s     ||**666dll11IIKL J"33I)22999	[)  3 $DLL8C
b0ABC81=91UDLL*-. ",!<!< LL$ er   c                 
  ^^^ U R                   R                  c   eU R                   R                  R                  5       nS mS m[        U4S jU 5       S S9n[        U4S jU 5       S S9n[        X#-   S S9U l        0 nS	nU H  m[        UU4S
 jUS9nXdT'   Ub  UOUnM     S	nSnX#-   U R                  -   n	S n
/ nU	R                  U
S9  U	 GH  n[        US5      (       aE  UR                  5       S-  nUR                  5       UR                  5       -   S-  nX;   a	  XL   b  XL   n[        US5      (       a@  UR                  5       nUR                  5       UR                  5       -   nX;   a	  XL   b  XL   nO)[        US5      (       a  UR                  nUR                  nU[        U5      :  aB  X7   R                  5       W::  a,  US-  nU[        U5      :  a  X7   R                  5       U::  a  M,  X-
  S-   n[        US	5      n[        US5      (       d  [        US5      (       a  UR!                  [#        WWU5      5        GMy  [        US5      (       d  GM  XR$                  ['        U5         l        GM     U$ )z
Computes queue_depth at each event. This will calculate the queue depth data for
All the events in the tree.
This will return a list of Interval of queue depth data of cuda launch and kernels.
c                      U R                   S:H  $ )NcudaLaunchKernel)rP   rr   s    r   is_cuda_launch_kernelBBasicEvaluation.compute_queue_depth.<locals>.is_cuda_launch_kernel   s    66///r   c                     U R                  5       [        R                  :H  =(       a    SU R                  R	                  5       ;  $ )Nmem)device_typer   CUDArP   lowerr   s    r   is_cuda_kernel;BasicEvaluation.compute_queue_depth.<locals>.is_cuda_kernel   s+    ==?joo5U%qvv||~:UUr   c              3   F   >#    U  H  nT" U5      (       d  M  Uv   M     g 7fr   r   )rq   rr   r   s     r   rs   6BasicEvaluation.compute_queue_depth.<locals>.<genexpr>   s     D1+@+CQQ   !	!c                 "    U R                  5       $ r   start_nsr   s    r   r   5BasicEvaluation.compute_queue_depth.<locals>.<lambda>   
    !**,r   rX   c              3   F   >#    U  H  nT" U5      (       d  M  Uv   M     g 7fr   r   )rq   rr   r   s     r   rs   r      s     =1>!+<QQr   c                 "    U R                  5       $ r   r   r   s    r   r   r      r   r   c                 "    U R                  5       $ r   r   r   s    r   r   r      s
    1::<r   r   c                 F   > U R                  5       TR                  5       :H  $ r   )linked_correlation_id)r   cuda_launch_events    r   r   r      s    !113$::<=r   rV   c                     [        U S5      (       a  U R                  5       S-  $ [        U S5      (       a  U R                  5       $ [        U S5      (       a  U R                  $ [	        S5      e)Nstart_us  r   r]   zUnknown Event Type)hasattrr   r   r]   	ExceptionrB   s    r   new_old_event_comparatorEBasicEvaluation.compute_queue_depth.<locals>.new_old_event_comparator   s`    uj))~~'$..uj))~~''uo..***011r   r   r   r   r]   rZ   )r   r   r{   r[   r|   index_of_first_matchsortr   r   duration_usr   duration_nsr]   r_   r`   r\   r   r:   rw   r?   r+   )r.   cuda_event_listcuda_launch_eventscuda_kernel_eventskernel_mappinglast_mapped_kernelindexcurrent_kernel_indexspawned_kernel_index
all_eventsr   r~   rC   
start_timeend_timecurrent_queue_depthr   r   r   s                   @@@r   r}   #BasicEvaluation.compute_queue_depth   s    ||**666,,55<<>	0	V $DD&
 $==&

 "39O
 35!3("=(	E 16,-*/*;AS "4  !!'<t{{J
	2 ,.45Euj))"^^-4
!NN,u/@/@/BBdJ*~/D/P+9+@(uj))"^^-
 >>+e.?.?.AA*~/D/P+9+@(00"00
 ,, %s+='>>'=FFH %)$	 %s+='>>'=FFH #7"MPQ"Q"%&91"=uj))WUJ-G-G ''Z3FG 00<OXe_-9A  D  r   c                     SnSn/ nU R                   (       a  U R                  (       aw  U[        U R                  S   R                  U R                   S   R                  5      [        U R                   S   R
                  U R                  S   R                  5      /-  nU R                    Hi  nUR                  S:X  a  U(       d  UR
                  nSnUR                  S:  d  M:  U(       d  MC  UR                  [        X$R                  5      5        SnMk     U R                  R                  5        Vs/ s H  oUR                  PM     nnU H8  n[        U5      R                  U5      U R                  [        U5         l        M:     gs  snf )z$
Computes idle time of the profile.
Fr   r   TN)r~   r{   r:   r]   r<   r=   r_   r+   r   rw   ry   rC   r?   rh   r*   )r.   idle
idle_startidle_intervals
data_pointrr   
event_listrC   s           r   r   !BasicEvaluation.compute_idle_time   sD   
 
)+  T[[Q55t7L7LQ7O7U7UV..r266B8S8ST N
 //J%%*4'^^
%%)dd%%hz;K;K&LM 0 (,||'8'8':;':!gg':
;E9A:/ LL%)6   <s   5Fc                   ^ SSK n[        [        U R                  5      5      nU Vs/ s H  oDR                  PM     nnSmSn/ nSnU[        U5      :  a  XX   T:  a  US-  nM  [        US-   [        U5      5       He  n	[        UU4S jU	S9n
[        XYU
S9nUc  M!  X[   U:  d  M+  UR                  [        X;   R                  X8   R                  5      5        U
b  U
OUn  O   US-  nU[        U5      :  a  M  U R                  R                  5        Vs/ s H  nUR                  U5      (       d  M  UPM     nnU(       Ga  UR                  U Vs/ s H  oR                  U   R                   PM     snUR"                  S9nUR                  U Vs/ s H  oR                  U   R$                  PM     snUR"                  S9nXR'                  U5      -
  UR)                  U5      -  nXR'                  U5      -
  UR)                  U5      -  nUS	U-  -   n[+        [-        UU5      [.        R0                  " S5      S
S9 VVs/ s H  u  nnUPM
     nnnUSU nU$ s  snf s  snf s  snf s  snf s  snnf )z
Filter and Rank the events based on some heuristics:
1) Events that are in the falling phase of the queue depth.
2) Events that have a high idle_time, self_time difference.

Parameters:
    length: The number of events to return.
r   N   rZ   c                    > U T:*  $ r   r   )r   bottom_threasholds    r   r   -BasicEvaluation.rank_events.<locals>.<lambda>  s    .?)?r   rV   )r<   r=   )dtypeg333333?T)rY   r   )torchrj   r   r~   r+   r`   ranger   argmaxr   r:   r<   rw   ry   rh   tensorr)   float32r/   meanstdr[   zipoperator
itemgetter)r.   lengthr   r~   rr   	qd_valuestop_threasholddecrease_intervalrd   re   next_minimum_idxpeak_idxrC   r   r   	idle_timenormalized_gainnormalized_selfheuristic_score_list_r   s                       @r   rank_eventsBasicEvaluation.rank_events  s    	)>)> ?@,<=,<q]],<	=#i. |//Q1q5#i.1 $8?q$  "):JK 'I,?>,Q%,, ,6<<>N>Q>W>W
 -=,H(aA! 2" FA+ #i. 2 **,
,&&'89 , 	 

 ?IJzee$11zJmm % I EOPZEe$77ZPmm % I  )::i+@@EIIiDXXO(::i+@@EIIiDXXO#2S?5J#J 
 !',j9 ++A. !!HAu !   $GV,Js >:
 K Qs#   II$6I$"I)"I.I3r   print_enablec                 @   U R                  U5      nU(       d  U$ U(       a  SOSnUSR                  U Vs/ s HA  nS SU S[        UR                  5       SU R                  U   R
                  S-  S	 S
S 3	PMC     sn5      -  nU(       a  [        U5        U$ s  snf )NzOptimizable events:
zNo events to optimize

zP--------------------------------------------------------------------------------z
Event:                z
Source code location: z
Percentage idle time: rm   z.2fz%
)r   joinsource_code_locationrC   rw   r/   print)r.   r   r   r   outputrC   s         r   get_optimizable_events&BasicEvaluation.get_optimizable_eventsJ  s    %%f-
,6(<U$)) ( (E J g +EKK89 :||E*==CCH I	
	
 (	
 		
 &Ms   AB
)r|   rz   r{   rw   r   r~   N)rZ   T)r1   r2   r3   r4   r   rD   rx   r}   r   r   r5   boolr   r8   r   r   r   rl   rl   d   s>    
!W 
!=,\ |08GRS D  r   rl   c                     Ub  U[        U 5      :  a  [        U 5      n[        X#5       H  nU" X   5      (       d  M  Us  $    g r   )r`   r   )seq	predicater<   r=   rd   s        r   r   r   _  s@    
{cSXo#h5SVH  r   c                     U $ r   r   r   s    r   r   r   h  s    ar   c                 `    XU n [        U 5      S:X  a  g U R                  [        XS95      U-   $ )Nr   rX   )r`   r   r\   )r   rY   r<   r=   s       r   r   r   h  s2    
C.C
3x1}99S&'%//r   c                     U b>  [         R                  " SU R                  5      nUc  U R                  n M5  U R                  $ g)Nz
\.py\(.*\)zNo source code location found)researchrP   parent)rC   matchs     r   r   r   o  s:    

		-4=LLEzz*r   c                  T    SSK Jn   U " 5           S S S 5        g ! , (       d  f       g = f)Nr   r   )torch.autograd.profilerr   r   s    r   _init_for_cuda_graphsr   }  s    /	 
s   
')r   N)	functoolsr   r   collectionsr   dataclassesr   typingr   r   r   torch.profilerr   torch.autogradr	   r   r    partialtraverse_dfstraverse_bfsr&   r:   r?   rl   r   r   r   r   r   r   r   <module>r      s      	  !   + % + *>u * *   4EtT  ,e
 
9 
9 
9   + +\x xv  qd 0+r   