
    [ThO                     l   S SK r S SKrS SKrS SKJrJr  S SKJr  S SKJ	r	J
r
  S SKJr  S SKrS SKJr  / SQr " S S	\5      rS
 rS rS rS r " S S5      r " S S5      r\" S/ SQ5      r " S S\5      r " S S\5      r " S S\5      r " S S5      rS rSrSr S r!S"S  jr"         S#S! jr#g)$    N)defaultdict
namedtuple)
attrgetter)AnyOptional)
deprecated)
DeviceType)	EventListFormattedTimesMixinIntervalKernelFunctionEventFunctionEventAvgStringTableMemRecordsAccc                      ^  \ rS rSrSrU 4S jrS rS rS rS r	S r
\S	 5       r       SS
 jrS rS rS\S\4S jr   SS jrS rSrU =r$ )r
      z'A list of Events (for pretty printing).c                    > UR                  SS 5      nUR                  SS5      nUR                  SS5      n[        TU ]  " U0 UD6  X0l        X@l        SU l        XPl        g )N
use_deviceprofile_memoryF
with_flops)popsuper__init___use_device_profile_memory_tree_built_with_flops)selfargskwargsr   r   r   	__class__s         T/var/www/auris/envauris/lib/python3.13/site-packages/torch/autograd/profiler_util.pyr   EventList.__init__   s`    ZZd3
$4e<ZZe4
$)&)%- %    c                 r    U R                  5         U R                  5         U R                  5         SU l        g )NT)_populate_cpu_children_remove_dup_nodes_set_backward_stacktracesr   r   s    r#   _build_treeEventList._build_tree'   s.    ##% &&(r%   c                 "    U R                  5       $ N)tabler*   s    r#   __str__EventList.__str__-   s    zz|r%   c                     [        5       n[        [        U 5      5       H  nX   R                  c  M  X   R                  R                  X   R                  :X  d  M>  [        X   R                  R
                  5      S:X  d  Me  X   R
                  X   R                  l        X   R                  X   R                  l        X   R
                   H  nX   R                  Ul        M     UR                  U5        M     [        U5      S:X  a  g [        U 5       VVs/ s H  u  pEXA;  d  M  UPM     nnnU R                  5         U R                  U5        GMX  s  snnf )N   r   )setrangelen
cpu_parentnamecpu_childrenkernelsadd	enumerateclearextend)r   	to_deleteidxchindevnew_evtss          r#   r(   EventList._remove_dup_nodes0   s   ISY'I((4	,,11TY^^CDI00==>!C8<	8N8NDI((53793D3DDI((0"i44(,	(<(< 5MM#& ( 9~"*3D/R/wsS=Q/HRJJLKK!#  Ss   E-Ec                    U  Vs/ s H8  nUR                   (       a  M  UR                  [        R                  :X  d  M6  UPM:     nn[	        U[        S5      S9n[        R                  " US S9nU GH  u  pV[	        US S9n/ nU H  n	[        U5      S:  a  US   n
U	R                  R                  U
R                  R                  :  d.  U	R                  R                  U
R                  R                  :  a  UR                  5         ODU
R                  U	5        U	R                  b   SU	R                   35       eU	R!                  U
5        O[        U5      S:  a  M  UR#                  U	5        M     GM     gs  snf )	a  Populate child events into each underlying FunctionEvent object.

One event is a child of another if [s1, e1) is inside [s2, e2). Where
s1 and e1 would be start and end of the child event's interval. And
s2 and e2 start and end of the parent event's interval

Example: In event list [[0, 10], [1, 3], [3, 4]] would have make [0, 10]
be a parent of two other intervals.

If for any reason two intervals intersect only partially, this function
will not record a parent child relationship between then.
thread)keyc                 2    U R                   U R                  4$ r.   )rG   node_idevents    r#   <lambda>2EventList._populate_cpu_children.<locals>.<lambda>a   s    u||U]]&Cr%   c                 \    U R                   R                  U R                   R                  * /$ r.   )
time_rangestartendrK   s    r#   rM   rN   s   s$    5#3#3#9#9E<L<L<P<P;P"Qr%   r   Nz(There is already a CPU parent event for )is_asyncdevice_typer	   CPUsortedr   	itertoolsgroupbyr6   rP   rQ   rR   r   append_cpu_childr7   rH   set_cpu_parentappend)r   evtsync_eventseventsthreads
_thread_idthread_eventsthread_events_current_eventsrL   parents              r#   r'    EventList._populate_cpu_childrenD   sp   $ 
<< $'OOz~~$E  	 

 8$
 ##C
  *1%J#QN 35N'.)A-+B/F((..&2C2C2G2GG ++//&2C2C2G2GG '**,//6!,,4REeii[QR4,,V4 .)A-  %%e,# ( *19
s   E:E:E:c                 ^  ^ U4S jm0 nU  HJ  nT" U5      b  M  UR                   c  M  UR                  UR                  4nX1;  d  M<  UR                   X'   ML     U  HN  nT" U5      nUc  M  UR                  c   eUR                  UR                  4nX1;   a  X   Ul         MG  / Ul         MP     g )Nc                 T   > U c  g U R                   S:X  a  U $ T" U R                  5      $ Nr3   )scoper7   )r]   	bw_parents    r#   rk   6EventList._set_backward_stacktraces.<locals>.bw_parent   s*    {a
 00r%   )stacksequence_nrrG   
fwd_thread)r   
fwd_stacksr]   tprk   s        @r#   r)   #EventList._set_backward_stacktraces   s    	1 
C~%#))*?__cjj1&$'IIJM	  C#A}||///]]ALL1? *CI "CI r%   c                 &    [        S U  5       5      $ )Nc              3   8   #    U  H  oR                   v   M     g 7fr.   )self_cpu_time_total.0rL   s     r#   	<genexpr>0EventList.self_cpu_time_total.<locals>.<genexpr>   s     ?$,,$   )sumr*   s    r#   rv   EventList.self_cpu_time_total   s    ?$???r%   c                 N    [        U UUUUUUU R                  U R                  US9
$ )a  Print an EventList as a nicely formatted table.

Args:
    sort_by (str, optional): Attribute used to sort entries. By default
        they are printed in the same order as they were registered.
        Valid keys include: ``cpu_time``, ``cuda_time``, ``xpu_time``,
        ``cpu_time_total``, ``cuda_time_total``, ``xpu_time_total``,
        ``cpu_memory_usage``, ``cuda_memory_usage``, ``xpu_memory_usage``,
        ``self_cpu_memory_usage``, ``self_cuda_memory_usage``,
        ``self_xpu_memory_usage``, ``count``.
    top_level_events_only(bool, optional): Boolean flag to determine the
        selection of events to display. If true, the profiler will only
        display events at top level like top-level invocation of python
        `lstm`, python `add` or other functions, nested events like low-level
        cpu/cuda/xpu ops events are omitted for profiler result readability.

Returns:
    A string containing the table.
)	sort_by	row_limitmax_src_column_widthmax_name_column_widthmax_shapes_column_widthheaderr   r   top_level_events_only)_build_tabler   r   )r   r   r   r   r   r   r   r   s           r#   r/   EventList.table   s;    : !5"7$;//''"7
 	
r%   c                    SSK nU R                  (       d  SOU R                  n[        US5       nSnUR                  S5        U  GH  nUR                  c  M  UR                  SR                  UR                  UR                  R                  UR                  R                  5       UR                  (       d  UR                  OSUR                   SUR                   S	35      5        UR                   HQ  nUR                  S
UR                   SUR                  R                   SUR                   SU SU S35        US-  nMS     GM     [        U 5      S:  a=  UR                  UR                  5       S-
  UR                   5        UR#                  5         UR                  S5        SSS5        g! , (       d  f       g= f)zExport an EventList as a Chrome tracing tools file.

The checkpoint can be later loaded and inspected under ``chrome://tracing`` URL.

Args:
    path (str): Path where the trace will be written.
r   Ncudaw[zc{{"name": "{}", "ph": "X", "ts": {}, "dur": {}, "tid": {}, "pid": "CPU functions", "args": {{}}}}, z
" node_id:z, thread_id:z "z
{"name": "z", "ph": "s", "ts": z	, "tid": z , "pid": "CPU functions", "id": z, "cat": "cpu_to_z", "args": {}}, r3      ])osr   openwrite
trace_nameformatrP   rQ   
elapsed_us	is_remoterG   rJ   r:   r6   seektellSEEK_SETtruncate)r   pathr   device_namefnext_idr]   _s           r#   export_chrome_traceEventList.export_chrome_trace   sz    	$($4$4f$:J:J$_G GGCL>>)' (.v,,113"}} 

)#++l3::,bQ(  A GG%cnn%5 6!!$!5!5 6 7""%** .!!(	 ***5 7((	 qLG %' D 4y1}qvvx!|R[[1

GGCLW __s   E=F66
Gc                 
    / SQ$ )N)rv   self_cuda_time_totalself_xpu_time_totalself_privateuse1_time_total r*   s    r#   supported_export_stacks_metrics)EventList.supported_export_stacks_metrics	  s    
 	
r%   r   metricc           	         X R                  5       ;  a%  [        S[        U R                  5       5      -   5      e[        R                  SS5      n[	        US5       nU  H  nUR
                  (       d  M  [        UR
                  5      S:  d  M1  [        UUR                  SS5      R                  SS5      R                  S	S5      5      n[        U5      S:  d  M~  S
n[        UR
                  5       H  nXxR                  U5      -  nUS-  nM     US S S-   [        [        U5      5      -   nUR                  US-   5        M     S S S 5        g ! , (       d  f       g = f)Nzmetric should be one of: z ;	
____r   r   r   devicexpuprivateuse1 ;rS    
)r   
ValueErrorstr	maketransr   rm   r6   getattrreplaceintreversed	translater   )	r   r   r   translate_tabler   r]   metric_value	stack_strentrys	            r#   export_stacksEventList.export_stacks  s'   ==??+d::<=>  --&9$_999SYY!!3#*vx8 1 9	$L <(1,$&	%-cii%8E%)III%,I &9 %.crNS$83s<?P;Q$Q		D 01  __s    E4EA	EA+E
Ec                   ^ U R                   (       d   e[        [        5      nS[        [        S4   4U4S jjnU  H  mUU" TXU5         R                  T5        M!     [        UR                  5       U R                  U R                  U R                  S9nU H5  mTR                  SU Tl        U(       d  STl        U(       a  M.  STl        M7     U$ )a  Averages all function events over their keys.

Args:
    group_by_input_shapes: group entries by
        (event name, input shapes) rather than just event name.
        This is useful to see which input shapes contribute to the runtime
        the most and may help with size-specific optimizations or
        choosing the best candidates for quantization (aka fitting a roof line)

    group_by_stack_n: group by top n stack trace entries

    group_by_overload_name: Differentiate operators by their overload name e.g. aten::add.Tensor
    and aten::add.out will be aggregated separately

Returns:
    An EventList containing FunctionEventAvg objects.
return.c                   > [        U R                  5      [        U R                  5      [        U R                  5      [        U R                  5      [        U R
                  5      /nU(       a  UR                  TR                  5        U(       a$  UR                  [        U R                  5      5        US:  a  X@R                  S U -  n[        U5      $ Nr   )r   rH   rJ   rU   	is_legacyis_user_annotationr\   overload_nameinput_shapesrm   tuple)rL   group_by_input_shapesgroup_by_stack_ngroup_by_overload_namerH   r]   s        r#   get_key'EventList.key_averages.<locals>.get_keyC  s     EIIEMM"E%%&EOO$E,,-C &

3,,-$

3u1123!#{{#4$455:r%   r   r   r   Nr   )r   r   r   r   r   r;   r
   valuesr   r   r   rm   r   r   )r   r   r   r   statsr   avg_listr]   s          @r#   key_averagesEventList.key_averages)  s    . 9DEU9V	38_	$ C.BX c#h  LLN''//''	
 C		"3#34CI(#% ))$&!  r%   c                 P    [        5       nU  H  nX-  nSUl        M     SUl        U$ )z>Averages all events.

Returns:
    A FunctionEventAvg object.
NTotal)r   rH   )r   
total_statr]   s      r#   total_averageEventList.total_averagej  s4     &'
CJ!JN  !
r%   )r   r   r   r   )Nd   K   7   P   NF)Fr   F)__name__
__module____qualname____firstlineno____doc__r   r+   r0   r(   r'   r)   propertyrv   r/   r   r   r   r   r   r   __static_attributes____classcell__)r"   s   @r#   r
   r
      s    1& "(C-J#4 @ @
   "#(
T6p
2# 2s 24 $$	?B r%   r
   c                 J    SnSnX:  a  X-  S S3$ X:  a  X-  S S3$ U S S3$ )+Define how to format time in FunctionEvent.g    .Ag     @@z.3fsmsusr   )time_usUS_IN_SECONDUS_IN_MSs      r#   _format_timer   x  sM    "LH(-Q//$S),,c]"r%   c                 H    US:X  a  U S:X  d
   SU  35       egU S-  U-  S S3$ )r   r   zExpected time_us == 0 but got NaNg      Y@.2f%r   )r   total_time_uss     r#   _format_time_sharer     s?    !|G=gYGG|o-c2!44r%   c                     SnSU-  nSU-  n[        U 5      U:  a  U S-  U-  S S3$ [        U 5      U:  a  U S-  U-  S S3$ [        U 5      U:  a  U S-  U-  S S3$ [        U 5      S-   $ )z&Return a formatted memory size string.i         ?r   z Gbz Mbz Kbz b)absr   )nbytesKBMBGBs       r#   _format_memoryr     s    	B	B	B
6{b3,#C(,,	V	3,#C(,,	V	3,#C(,,6{T!!r%   c                 "   ^  [        U 4S j5      $ )Nc                 .   > [        [        U T5      5      $ r.   )r   r   )r   r8   s    r#   rM   !_attr_formatter.<locals>.<lambda>  s    gdD.A!Br%   )r   r8   s   `r#   _attr_formatterr     s    BCCr%   c                       \ rS rSrSr\" S5      r\" S5      r\" S5      r\" S5      r	\" S5      r
\" S5      r\S	 5       r\S
 5       r\\" S\S9S 5       5       rSrg)r   i  zsHelpers for FunctionEvent and FunctionEventAvg.

The subclass should define `*_time_total` and `count` attributes.
cpu_timedevice_timecpu_time_totaldevice_time_totalrv   self_device_time_totalc                 ^    U R                   S:X  a  S$ SU R                  -  U R                   -  $ Nr   g        r   )countr  r*   s    r#   r  FormattedTimesMixin.cpu_time  s+    jjAosQ31D1D+Dtzz+QQr%   c                 ^    U R                   S:X  a  S$ SU R                  -  U R                   -  $ r  )r  r  r*   s    r#   r  FormattedTimesMixin.device_time  s+    jjAosT31G1G+G$**+TTr%   z<`cuda_time` is deprecated, please use `device_time` instead.categoryc                     U R                   $ r.   )r  r*   s    r#   	cuda_timeFormattedTimesMixin.cuda_time  s     r%   r   N)r   r   r   r   r   r   cpu_time_strdevice_time_strcpu_time_total_strdevice_time_total_strself_cpu_time_total_strself_device_time_total_strr   r  r  r   FutureWarningr  r   r   r%   r#   r   r     s    
 #:.L%m4O()9:+,?@-.CD!01I!JR R U U F 	 
 r%   r   c                        \ rS rSrS rS rSrg)r   i  c                     Xl         X l        g r.   )rQ   rR   )r   rQ   rR   s      r#   r   Interval.__init__  s    
r%   c                 4    U R                   U R                  -
  $ )z$
Returns the length of the interval
rR   rQ   r*   s    r#   r   Interval.elapsed_us  s     xx$**$$r%   r  N)r   r   r   r   r   r   r   r   r%   r#   r   r     s    %r%   r   r   )r8   r   durationc                   T   \ rS rSrSrSSSSSSSSSSSS\R                  SSSSSSSS4S jrS rS	 r	S
 r
\S 5       r\S 5       r\\" S\S9S 5       5       r\S 5       r\S 5       r\S 5       r\\" S\S9S 5       5       r\S 5       r\\" S\S9S 5       5       r\S 5       rS rSrg)r   i  z.Profiling information about a single function.Nr   FrS   c                    Xl         UU l        X l        X`l        UU l        [        XE5      U l        X0l        Xpl        / U l	        SU l
        / U l        S U l        Xl        UU l        UU l        Xl        Xl        Xl        Xl        Xl        Xl        Xl        UU l        UU l        UU l        Uc  UOUU l        UU l        UU l        UU l        SU l        SU l        SU l         g )Nr3   rS   )!idrJ   r8   r   r   r   rP   rG   ro   r:   r  r9   r7   r   concrete_inputskwinputsrm   rj   r   cpu_memory_usagedevice_memory_usagerT   r   rn   rU   device_indexdevice_resource_idr   flopsr   self_cpu_percenttotal_cpu_percenttotal_device_percent)r   r!  r8   rG   start_usend_usr   ro   r   rm   rj   r   r$  r%  rT   r   rn   rJ   rU   r&  r'  r   r(  r   r"  r#  r   s                              r#   r   FunctionEvent.__init__  s    : #	"/)$,X$>!)3%'
1337-9*9(0 

)3%5(; &( +'2!-(0F6H 	  )$)
2D "!#$&!r%   c                     U R                   [        R                  :X  d   eU R                  R	                  [        XU5      5        g r.   )rU   r	   rV   r:   r\   r   )r   r8   r   r  s       r#   append_kernelFunctionEvent.append_kernel  s3    :>>111F4:;r%   c                     U R                   [        R                  :X  d   e[        U[        5      (       d   eUR                   [        R                  :X  d   eU R
                  R                  U5        g)zAppend a CPU child of type FunctionEvent.

One is supposed to append only direct children to the event to have
correct self cpu time being reported.
N)rU   r	   rV   
isinstancer   r9   r\   )r   childs     r#   rZ   FunctionEvent.append_cpu_child  s[     :>>111%////  JNN222  'r%   c                     U R                   [        R                  :X  d   e[        U[        5      (       d   eUR                   [        R                  :X  d   eXl        g)a  Set the immediate CPU parent of type FunctionEvent.

One profiling FunctionEvent should have only one CPU parent such that
the child's range interval is completely inside the parent's. We use
this connection to determine the event is from top-level op or not.
N)rU   r	   rV   r3  r   r7   )r   re   s     r#   r[   FunctionEvent.set_cpu_parent  sL     :>>111&-0000!!Z^^333 r%   c                     U R                   (       d  U R                  [        R                  :w  a  gU R                  [        S U R                   5       5      -
  $ )Nr   c              3   8   #    U  H  oR                   v   M     g 7fr.   )r$  rx   r4  s     r#   ry   6FunctionEvent.self_cpu_memory_usage.<locals>.<genexpr>/  s      +
0Au""0Ar{   )rT   rU   r	   rV   r$  r|   r9   r*   s    r#   self_cpu_memory_usage#FunctionEvent.self_cpu_memory_usage+  sJ    ==D,,
>$$s +
040A0A+
 (
 
 	
r%   c                     U R                   (       d  U R                  [        R                  :w  a  gU R                  [        S U R                   5       5      -
  $ )Nr   c              3   8   #    U  H  oR                   v   M     g 7fr.   )r%  r:  s     r#   ry   9FunctionEvent.self_device_memory_usage.<locals>.<genexpr>7  s      .
3D%%%3Dr{   )rT   rU   r	   rV   r%  r|   r9   r*   s    r#   self_device_memory_usage&FunctionEvent.self_device_memory_usage3  sJ    ==D,,
>''# .
373D3D.
 +
 
 	
r%   zO`self_cuda_memory_usage` is deprecated. Use `self_device_memory_usage` instead.r  c                     U R                   $ r.   rA  r*   s    r#   self_cuda_memory_usage$FunctionEvent.self_cuda_memory_usage;  s     ,,,r%   c                 t    U R                   [        R                  :X  a  U R                  R	                  5       $ gr   )rU   r	   rV   rP   r   r*   s    r#   r  FunctionEvent.cpu_time_totalC  s*    z~~-??--//r%   c                     U R                   (       d  U R                  [        R                  :w  a  gU R                  [        S U R                   5       5      -
  $ )Nr   c              3   8   #    U  H  oR                   v   M     g 7fr.   )r  r:  s     r#   ry   4FunctionEvent.self_cpu_time_total.<locals>.<genexpr>N  s      )
.?U  .?r{   )rT   rU   r	   rV   r  r|   r9   r*   s    r#   rv   !FunctionEvent.self_cpu_time_totalJ  sJ    ==D,,
>""S )
.2.?.?)
 &
 
 	
r%   c                    U R                   (       d  U R                  (       d  gU R                  [        R                  :X  af  U R
                  (       d9  [        S U R                   5       5      [        S U R                   5       5      -   $ [        S U R                   5       5      $ U R                  [        R                  [        R                  [        R                  4;   d   eU R                  R                  5       $ )Nr   c              3   8   #    U  H  oR                   v   M     g 7fr.   r  rx   kinfos     r#   ry   2FunctionEvent.device_time_total.<locals>.<genexpr>Y       D|e>>|r{   c              3   8   #    U  H  oR                   v   M     g 7fr.   r  )rx   rA   s     r#   ry   rR  Y  s      K3DR((3Dr{   c              3   8   #    U  H  oR                   v   M     g 7fr.   rO  rP  s     r#   ry   rR  ^  rS  r{   )rT   r   rU   r	   rV   r   r|   r:   r9   CUDAPrivateUse1MTIArP   r   r*   s    r#   r  FunctionEvent.device_time_totalR  s    ==z~~->>Dt||DDs K373D3DK H  
 Dt||DDD##&&(   
 ??--//r%   zA`cuda_time_total` is deprecated. Use `device_time_total` instead.c                     U R                   $ r.   rU  r*   s    r#   cuda_time_totalFunctionEvent.cuda_time_totalg  s     %%%r%   c                 l   U R                   (       d  U R                  (       d  gU R                  [        R                  :X  a)  U R
                  [        S U R                   5       5      -
  $ U R                  [        R                  [        R                  [        R                  4;   d   eU R
                  $ )Nr   c              3   8   #    U  H  oR                   v   M     g 7fr.   rU  r:  s     r#   ry   7FunctionEvent.self_device_time_total.<locals>.<genexpr>t  s      05FE''5Fr{   )rT   r   rU   r	   rV   r  r|   r9   rW  rX  rY  r*   s    r#   r  $FunctionEvent.self_device_time_totalo  s    ==z~~-))C 0595F5F0 -   ##&&(   
 )))r%   zK`self_cuda_time_total` is deprecated. Use `self_device_time_total` instead.c                     U R                   $ r.   r  r*   s    r#   r   "FunctionEvent.self_cuda_time_total  s     ***r%   c                     U R                   $ r.   r   r*   s    r#   rH   FunctionEvent.key  s    yyr%   c           	          U R                   nU R                  nU R                  nSR                  / SPU R                   PSPU R
                   PSPU R                   PSPU R                   PSPU R                   PSPU R                   PSPU R                  R                   PS	PU R                  R                   PS
P[        U R                   Vs/ s H  oDR                  PM     sn5       PSPU PSPU PSPU R
                   PSPU R                   PSP[        U R                   5       PSPU R"                   PSPU PSPU PSPU R$                   PSPU R&                   PSPU R(                   PSPU R*                   PSP5      $ s  snf )Nr   z<FunctionEvent id=z name=z overload_name=z device_type=z	 node_id=
 cpu_time=z
 start_us=z end_us=z cpu_children=r   _time=z thread= input_shapes= cpu_memory_usage=_memory_usage=z
 is_async=z is_remote=z seq_nr=z is_legacy=>)r   r  r%  joinr!  r8   r   rU   rJ   r  rP   rQ   rR   r   r9   rG   r   r$  rT   r   rn   r   )r   r   r  r%  r4  s        r#   __repr__FunctionEvent.__repr__  s   oo**"66y y  y	 y y		{ y/ y$J\J\I] y ^ y++,y,5y6:ll^yCMyNRN_N_M`yay--.y.6y7;7J7J6KyLy  t7H7H I7He7H IJKy LMy NYMy Z`y al_lymy II;	y '	y (,{{m	y 4B	y CFdFWFWBXAY	yZ 	y
 !% 5 56y
 78y
 9D}y
 ESy
 TgRgy
hy y '2y 37..1Ay BJy KOJZJZI[y \gy hlgugufvy wxy	
 !Js   F) r"  r  r9   r$  r7   r&  r%  r'  rU   r(  ro   r!  r   rT   r   r   r   r:   r#  r8   rJ   r   rj   r)  rn   rm   rG   rP   r*  r+  r   r   )r   r   r   r   r   r	   rV   r   r0  rZ   r[   r   r<  rA  r   r  rE  r  rv   r  r\  r  r   rH   ro  r   r   r%   r#   r   r     se   8 NN 7>'@<	(
! 
 
 
 
 Y-	 
-   
 
 0 0( K&	 
& * * U+	 
+  
r%   r   c                   4    \ rS rSrSrS	S jrS rS rS rSr	g)
r   i  z:Used to average stats over multiple FunctionEvent objects.Nc                 b   S U l         SU l        SU l        SU l        SU l        S U l        SU l        SU l        SU l        SU l	        S U l
        S U l        S U l        S U l        SU l        SU l        SU l        SU l        S U l        S U l        [(        R*                  U l        SU l        SU l        g )Nr   F)rH   r  rJ   rT   r   r   r  r  rv   r  r   r   rm   rj   r$  r%  r<  rA  r9   r7   r	   rV   rU   r   r(  r*   s    r#   r   FunctionEventAvg.__init__  s    "&
#$)-#$&'() +,#7;,0%)
$(
%&() *+"-.%;?37'1~~$
r%   c                 4   U R                   c  UR                   U l         UR                  U l        UR                  U l        UR                  U l        UR                  U l        UR
                  U l        UR                  U l        UR                  U l        UR                  U l        UR                  U l	        UR                  U l
        UR                  U l        UR                  U l        UR                  U l        [        U[        [         45      (       d   eUR                   U R                   :X  d   eU =R"                  UR"                  -  sl        U =R$                  UR$                  -  sl        U =R&                  UR&                  -  sl        U =R(                  UR(                  -  sl        U =R*                  UR*                  -  sl        U =R,                  UR,                  -  sl        U =R.                  UR.                  -  sl        U =R0                  UR0                  -  sl        U =R2                  UR2                  -  sl        U R4                  c  UR4                  U l        U $ UR4                  b  U =R4                  UR4                  -  sl        U $ r.   )rH   rJ   rT   r   r7   r9   r   r   rm   rj   rU   r   r   r   r3  r   r   r  r  rv   r  r$  r%  r<  rA  r  r(  r   others     r#   r;   FunctionEventAvg.add  s   88 yyDH ==DL!NNDM"__DN#..DO % 2 2D!&!4!4D % 2 2DDJDJ$00D"__DN#..DO&+&>&>D#%-1A!BCCCCyyDHH$$$u333%"9"99  E$=$== ##u'C'CC#!7!77  E$=$== ""e&A&AA"%%)G)GG%

ekk!
::DJ  [[$JJ%++%Jr%   c                 $    U R                  U5      $ r.   )r;   ru  s     r#   __iadd__FunctionEventAvg.__iadd__  s    xxr%   c                 F   U R                   (       d  SOU R                   nU R                  nU R                  nU R                  nSU R                   SU R
                   SU R                   SU SU SU SU S[        U R                  5       S	U R                   SU S
U S3$ )Nr   z<FunctionEventAvg key=z self_cpu_time=rh  z  self_ri  r   rj  rk  rl  rm  )
r   r  r  r%  rH   r  r  r   r   r$  )r   r   self_device_timer  device_memorys        r#   ro  FunctionEventAvg.__repr__  s    $(OOf::**00$TXXJod>Z>Z=[[efjfwfwex y M(8'9;-vk]Zhilmqm~m~i  iA A  $ 5 56a}NS`Raabd	
r%   )r  r9   r$  r7   r  r%  r  rU   r(  r   rT   r   r   r   rH   rJ   r   rj   r<  rv   rA  r  rm   r   )r   N)
r   r   r   r   r   r   r;   ry  ro  r   r   r%   r#   r   r     s    D2$L	
r%   r   c                       \ rS rSrS rSrg)r   i  c                 n    [        U5      S:  a  [        R                  R                  U5      OUX'   X   $ ri   )r6   torch_C	_demangle)r   rH   s     r#   __missing__StringTable.__missing__  s.     033x!|EHH&&s+	yr%   r   N)r   r   r   r   r  r   r   r%   r#   r   r     s    r%   r   c                   $    \ rS rSrSrS rS rSrg)r   i  z=Acceleration structure for accessing mem_records in interval.c                     Xl         / U l        / U l        [        U5      S:  aR  [	        [        U5       VVs/ s H  u  p#US   R                  5       U4PM     snn5      n[        U6 u  U l        U l        g g s  snnf r   )_mem_records_start_nses_indicesr6   rW   r<   start_nszip)r   mem_recordsirtmps        r#   r   MemRecordsAcc.__init__  sp    '&(#%{a9[;QR;Q411Q4==?A.;QRSC.13i+Ddm  Rs   !A7
c              #      #    [         R                  " U R                  US-  5      n[         R                  " U R                  US-  5      n[	        X45       H!  nU R
                  U R                  U      v   M#     g7f)zk
Return all records in the given interval
To maintain backward compatibility, convert us to ns in function
i  N)bisectbisect_leftr  bisect_rightr5   r  r  )r   r,  r-  	start_idxend_idxr  s         r#   in_intervalMemRecordsAcc.in_interval  sh     
 &&t'7'7DI	%%d&6&6Fy*A##DMM!$455 +s   A:A<)r  r  r  N)r   r   r   r   r   r   r  r   r   r%   r#   r   r     s    G86r%   r   c                 6   ^  / SQn[        U 4S jU 5       5      $ )N))autograd/__init___make_grads)r  backward)ztorch/tensorr  )_internal/common_utilsprof_callable)r  prof_func_call)r  prof_meth_callc              3   Z   >#    U  H   oS    T;   =(       a    US   T;   (       + v   M"     g7f)r   r3   Nr   )rx   r   r   s     r#   ry   &_filter_stack_entry.<locals>.<genexpr>  s,     O>NaDEM3adem44>Ns   (+)all)r   filtered_entriess   ` r#   _filter_stack_entryr  
  s     O>NOOOr%   z[memory]z[OutOfMemory]c                 .    [         [        SSSSSS/nX;   $ )Nz profiler::_record_function_enterz$profiler::_record_function_enter_newzprofiler::_record_function_exitzaten::is_leafzaten::output_nrzaten::_version)MEMORY_EVENT_NAMEOUT_OF_MEMORY_EVENT_NAME)r8   filtered_out_namess     r#   _filter_namer    s/     	 *.)	 %%r%   c                 `    [        5       nX    n U(       a  U R                  S5      (       a  Sn U $ )NzProfilerStep#zProfilerStep*)r   
startswith)r8   with_wildcardstring_tables      r#   _rewrite_namer  -  s-    =LD???++"DKr%   c
                   ^^.^/^0^1^2 [        U 5      S:X  a  g[        S U  5       5      n
[        S U  5       5      nU S   R                  nU(       d  U
(       a  [        S5      e[        S U  5       5      n[        S U  5       5      nTb  [	        [        U U4S	 jS
S9UUUS9n [        S U  5       5      S-   nUb  [        X5      n[        S U  5       5      S-   nUb  [        UU5      nSnUnSnU  Vs/ s H9  nUR                  c  M  [        UR                  5      S:  d  M-  UR                  PM;     nn[        U5      S:  nU(       a$  [        S U 5       5      S-   nUb  [        UU5      nS/nU(       a  UR                  S5        U/ SQ-  nUb  UR                  5       OSnU
(       a"  UR                  SU 3SU S3U S3U S3/5        U(       a;  UR                  SS/5        U(       a!  U(       a  UR                  U S3SU S3/5        UR                  S5        [        S U  5       5      nU(       a  UR                  S5        S m.S/m2S/m/T.* /m0S4U.U/U0U24S! jjnS" nU" U5        U(       a  U" U5        US#U-   S  H  nU" U5        M     U(       a  UR                  S$5        U" U5        U(       a  UR                  S%5        U" US&S'9  U(       ap  U  Vs/ s H!  nUR                  S:  d  M  UR                  PM#     nn[        U5      S:w  a1  U" [        U5      5      u  nnUR                  S(U 35        U" U5        OS)nT2S   n T/S   n!T0S   n"Sn/ m1U14S* jn#Sn$Sn%U  H  nU$UR                  -  n$UR                  [        R                   :X  a"  UR"                  (       a  U%UR$                  -  n%MR  UR                  [        R&                  [        R(                  [        R*                  4;   d  M  UR,                  (       a  M  U%UR$                  -  n%M     Ub  U#" S+U"-  5        U#" U5        U	(       a  U#" S+U"-  5        U#" S,5        U#" U!5        U#" U R.                  " U6 5        U#" U!5        S- n&Sn'U  GH  nU'U:X  a    GO}U	(       a  UR0                  b  M#  U'S#-  n'UR2                  n(Ub  [        U(5      US.-
  :  a  U(SUS.-
   S/-   n([5        UR                  U$5      Ul        UR8                  (       d  [5        UR:                  U$5      OSUl        U(/n)U(       a2  UR>                  n*Ub  [        U*5      US.-
  :  a  U*SUS.-
   S/-   n*U)U*/-  n)U)UR6                  UR@                  UR<                  URB                  URD                  /-  n)U
(       aX  [5        UR$                  U%5      Ul#        U)R                  URH                  URF                  URJ                  URL                  /5        U(       a  U)R                  [O        URP                  5      [O        URR                  5      /5        U(       a@  U(       a9  U)R                  [O        URT                  5      [O        URV                  5      /5        U)R                  URX                  5        U(       a  U)R                  URZ                  5        U(       a'  U)R                  []        UR^                  5      SU 5        U(       aB  UR                  S::  a  U)R                  S05        O U)R                  UR                  W-  S1 5        U(       aB  Sn+[        UR                  5      S:  a  U&" UR                  S   U5      n+U)R                  U+5        U#" U R.                  " U)6 5        U(       d  GM  S/[        U5      S#-
  -  n,UR                  S#S  H#  n-U#" U R.                  " U,U&" U-U5      /-   6 5        M%     U,R                  S5        U#" U R.                  " U,6 5        GM     U#" U!5        U#" S2[a        U$5       35        U
(       a*  U#" SUb  UR                  5       OS S3[a        U%5       35        SRc                  T15      $ s  snf s  snf )5zUPrint a summary of events (which can be a list of FunctionEvent or FunctionEventAvg).r   r   c              3   >   #    U  H  oR                   S :  v   M     g7fr   Nrc  rw   s     r#   ry   _build_table.<locals>.<genexpr>F  s     Ou66:   c              3   >   #    U  H  oR                   S :  v   M     g7fr  rD  rw   s     r#   ry   r  G  s     P77!;r  z9use_device is None, but there is device performance data.c              3   |   #    U  H2  nUR                   S L=(       a    [        UR                   5      S:  v   M4     g 7fr   )r   r6   rw   s     r#   ry   r  O  s:      E 
		4	'	GC0B0B,Ca,G	G   :<c              3   |   #    U  H2  nUR                   S L=(       a    [        UR                   5      S:  v   M4     g 7fr   )r   r6   rw   s     r#   ry   r  T  s:      E 
		D	(	IS1D1D-E-I	Ir  Nc                 |   > [        U TR                  SS5      R                  SS5      R                  SS5      5      $ )Nr   r   r   r   )r   r   )r]   r   s    r#   rM   _build_table.<locals>.<lambda>]  s3    OOFH5WUH-W]H5	!r%   T)rH   reverser   c              3   L   #    U  H  n[        UR                  5      v   M     g 7fr.   )r6   rH   rx   r]   s     r#   ry   r  j  s     ;FSCLLFs   "$   c              3   ^   #    U  H#  n[        [        UR                  5      5      v   M%     g 7fr.   )r6   r   r   r  s     r#   ry   r  n  s#     KFSc#c&6&6"788Fs   +-   c              3   F   #    U  H  n[        S  U 5       5      v   M     g7f)c              3   8   #    U  H  n[        U5      v   M     g 7fr.   r6   )rx   r   s     r#   ry   )_build_table.<locals>.<genexpr>.<genexpr>|  s     2E5CJJEr{   N)max)rx   rm   s     r#   ry   r  |  s     Gu2E222s   !NamezOverload Name)z
Self CPU %zSelf CPUzCPU total %z	CPU totalzCPU time avgNonezSelf z %z totalz	 time avgzCPU MemzSelf CPU Memz Memz
# of Callsc              3   >   #    U  H  oR                   S :g  v   M     g7f)rS   N)rJ   r  s     r#   ry   r    s     =fs*fr  zNode IDr   c                    > TS==   SU-   [        U 5      -   S-   ST-  -   -  ss'   TS==   SU -  ST-  -   -  ss'   TS==   U T-   -  ss'   g )Nr   z{: }r   -)r   )paddingtext_dirSPACING_SIZEheader_sep_lstline_length_lstrow_format_lsts     r#   
add_column _build_table.<locals>.add_column  sh    qHs7|+c1S<5GH	
 	qS7]cL.@AAg44r%   c                 .   / SQnU S:  d   e[        S[        [        R                  " U 5      S-  [	        [        U5      S-
  5      5      5      nUS:  a  U[        U5      :  d   e[        S[        R                  " U5      S-  5      U[        U5         4$ )N)FLOPsKFLOPsMFLOPsGFLOPsTFLOPsPFLOPsr      r3   
   g      )	r  minmathlog10floatr6   powfloorr   )r(  flop_headers	log_flopss      r#   auto_scale_flops&_build_table.<locals>.auto_scale_flops  s    
 qyy3tzz%014eC<MPQ<Q6RST	A~)c,.?"???BI.57c)n9UVVr%   r3   zInput ShapeszSource Location<)r  zTotal Fc                 J   > TR                  U 5        TR                  S5        g )Nr   )r\   )r   results    r#   r\   _build_table.<locals>.append  s    adr%   =z1This report only display top-level ops statisticsc                 v    [        U 5      U:  a)  [        U 5      U-
  nXS  n [        U 5      S:  a  SU SS  -   n U $ )Nr  ...r  )r   src_column_widthoffsets      r#   	trim_path_build_table.<locals>.trim_path  sG    t9''Y!11F=D4y1}tABx'r%   r  r  z--z8.3fzSelf CPU time total: z time total: )rm  )2r6   anyr   RuntimeErrorr
   rW   r  r  rm   r\   upperr>   r(  rv   rU   r	   rV   r   r  rW  rX  rY  r   r   r7   rH   r   r)  rT   r  r*  r   r  r  r  r+  r  r  r  r   r$  r<  r%  rA  r  rJ   r   r   r   rn  )3r_   r   r   r   r   r   r   r   r   r   has_device_timehas_device_memr   has_input_shapeshas_overload_namesname_column_widthshapes_column_widthDEFAULT_COLUMN_WIDTHflops_column_widthr  r]   stacks	has_stackheadersr   append_node_idr  r  r   	raw_flopsflops_scaleflops_header
row_format
header_sepline_lengthr\   sum_self_cpu_time_totalsum_self_device_time_totalr  event_limitr8   
row_valuesr   	src_fieldempty_headersr   r  r  r  r  r  s3    `                                            @@@@@r#   r   r   6  s    6{aOOOOPPPN%%J /VWW  
   
  	 ")!
  ;F;;a?( 1IKFKKaO*!"57NO-##csyy	SYYRSAS			V   FaIGGG!K 	  +"#35IJhG'  G )3(>*""$FK}%}B'-v&-y)		
 	
 .NN"m4(K=- NN< =f==Ny! LTNTN$}oO5 5W  !$%Q++-.'( / ~&&'()#c2*0B&3CIIMYSYY&	By>Q*:3y>*J'[,NNVL>23)*J"J"J!!$KJ F  !"3#:#::??jnn,&#*D*DD&OO&& *** '#*D*DD& $ s[ !vs[ !BC
:
:g&'
: K)# S^^%?1Kww ,T>SVW>W1W50146>D1##%< 

 << s113JK 	 V
--M%1&*?!*CC -.K1F1J Lu T=/)J  ''!!""
 	

 '9**,F(C$ 22,,--''  #3#7#78"3#<#<=	 n!! 's'>'>?&s'C'CD	 	II	
 ckk*c#"2"234H5HIJyyA~!!$'!!SYY%<T$BDI399~!%ciil4DE	i(z  *-.9DCL1$45M12%%'9U<L+M*NN '   $:$$m45E H :
"<0G#H"IJK**@J$$&fM N'(BCDF	
 776?M~ Cs   4a$	a$$a$a)6a))F)	NNr   r   r   r   FFF)$r  rX   r  collectionsr   r   operatorr   typingr   r   typing_extensionsr   r  torch.autogradr	   __all__listr
   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r   r   r%   r#   <module>r     s       /    (  %	[ [|
5"D   <	% 	% 
H<	=L
' L
^N
* N
b+ 6 6,	P  * && Fr%   