
    [ThwV                        S SK r S SKrS SKrSSKJr  \" \R                  S5      (       d]  \" S5      \R                  R                  S'   \" S5      \R                  R                  S'   \" S5      \R                  R                  S'   S SKJ	r	J
r
Jr  S	 rS
 r " S S\R                  R                  5      r " S S5      r SS jrg)    N   )_dummy_type_CudaStreamBase
_CUDAGraph_graph_pool_handle_cuda_isCurrentStreamCapturing)r   r   r   c                      [        5       $ )zReturn True if CUDA graph capture is underway on the current CUDA stream, False otherwise.

If a CUDA context does not exist on the current device, returns False without initializing the context.
)r        I/var/www/auris/envauris/lib/python3.13/site-packages/torch/cuda/graphs.pyis_current_stream_capturingr      s    
 *++r   c                      [        5       $ )zReturn an opaque token representing the id of a graph memory pool.

See :ref:`Graph memory management<graph-memory-management>`.

.. warning::
    This API is in beta and may change in future releases.
)r   r
   r   r   graph_pool_handler   "   s     r   c                      ^  \ rS rSrSrU 4S jrSU 4S jjrU 4S jrU 4S jrU 4S jr	U 4S jr
U 4S	 jrU 4S
 jrSrU =r$ )	CUDAGraph.   zfWrapper around a CUDA graph.

.. warning::
    This API is in beta and may change in future releases.
c                 "   > [         TU ]  U 5      $ N)super__new__)cls	__class__s    r   r   CUDAGraph.__new__5   s    ws##r   c                     > [         TU ]  XS9  g)a  Begin capturing CUDA work on the current stream.

Typically, you shouldn't call ``capture_begin`` yourself.
Use :class:`~torch.cuda.graph` or :func:`~torch.cuda.make_graphed_callables`,
which call ``capture_begin`` internally.

Arguments:
    pool (optional): Token (returned by :func:`~torch.cuda.graph_pool_handle` or
        :meth:`other_Graph_instance.pool()<torch.cuda.CUDAGraph.pool>`) that hints this graph may share memory
        with the indicated pool.  See :ref:`Graph memory management<graph-memory-management>`.
    capture_error_mode (str, optional): specifies the cudaStreamCaptureMode for the graph capture stream.
        Can be "global", "thread_local" or "relaxed". During cuda graph capture, some actions, such as cudaMalloc,
        may be unsafe. "global" will error on actions in other threads, "thread_local" will only error for
        actions in the current thread, and "relaxed" will not error on these actions. Do NOT change this setting
        unless you're familiar with `cudaStreamCaptureMode <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html#group__CUDART__STREAM_1g9d0535d93a214cbf126835257b16ba85>`_
)poolcapture_error_modeN)r   capture_begin)selfr   r   r   s      r   r   CUDAGraph.capture_begin8   s    " 	4Or   c                 "   > [         TU ]  5         g)a  End CUDA graph capture on the current stream.

After ``capture_end``, ``replay`` may be called on this instance.

Typically, you shouldn't call ``capture_end`` yourself.
Use :class:`~torch.cuda.graph` or :func:`~torch.cuda.make_graphed_callables`,
which call ``capture_end`` internally.
N)r   capture_endr   r   s    r   r!   CUDAGraph.capture_endK   s     	r   c                 "   > [         TU ]  5         g)z,Replay the CUDA work captured by this graph.N)r   replayr"   s    r   r%   CUDAGraph.replayV   s    r   c                 "   > [         TU ]  5         g)z1Delete the graph currently held by this instance.N)r   resetr"   s    r   r(   CUDAGraph.resetZ   s    r   c                     > [         TU ]  5       $ )zReturn an opaque token representing the id of this graph's memory pool.

This id can optionally be passed to another graph's ``capture_begin``,
which hints the other graph may share the same memory pool.
)r   r   r"   s    r   r   CUDAGraph.pool^   s     w|~r   c                     > [         TU ]  5       $ )z/Enable debugging mode for CUDAGraph.debug_dump.)r   enable_debug_moder"   s    r   r-   CUDAGraph.enable_debug_modef   s    w(**r   c                 "   > [         TU ]  U5      $ )z
Arguments:
    debug_path (required): Path to dump the graph to.

Calls a debugging function to dump the graph if the debugging is
enabled via CUDAGraph.enable_debug_mode()
)r   
debug_dump)r   
debug_pathr   s     r   r0   CUDAGraph.debug_dumpj   s     w!*--r   r
   )Nglobal)__name__
__module____qualname____firstlineno____doc__r   r   r!   r%   r(   r   r-   r0   __static_attributes____classcell__)r   s   @r   r   r   .   s6    $P&	+. .r   r   c                   f    \ rS rSr% SrSr\R                  S   \S'      SS\	4S jjr
S rS	 rS
rg)graphu   am  Context-manager that captures CUDA work into a :class:`torch.cuda.CUDAGraph` object for later replay.

See :ref:`CUDA Graphs <cuda-graph-semantics>` for a general introduction,
detailed use, and constraints.

Arguments:
    cuda_graph (torch.cuda.CUDAGraph): Graph object used for capture.
    pool (optional): Opaque token (returned by a call to :func:`~torch.cuda.graph_pool_handle()` or
        :meth:`other_Graph_instance.pool()<torch.cuda.CUDAGraph.pool>`) hinting this graph's capture
        may share memory from the specified pool. See :ref:`Graph memory management<graph-memory-management>`.
    stream (torch.cuda.Stream, optional): If supplied, will be set as the current stream in the context.
        If not supplied, ``graph`` sets its own internal side stream as the current stream in the context.
    capture_error_mode (str, optional): specifies the cudaStreamCaptureMode for the graph capture stream.
        Can be "global", "thread_local" or "relaxed". During cuda graph capture, some actions, such as cudaMalloc,
        may be unsafe. "global" will error on actions in other threads, "thread_local" will only error for
        actions in the current thread, and "relaxed" will not error on actions. Do NOT change this setting
        unless you're familiar with `cudaStreamCaptureMode <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html#group__CUDART__STREAM_1g9d0535d93a214cbf126835257b16ba85>`_

.. note::
    For effective memory sharing, if you pass a ``pool`` used by a previous capture and the previous capture
    used an explicit ``stream`` argument, you should pass the same ``stream`` argument to this capture.

.. warning::
    This API is in beta and may change in future releases.

.. _cudaStreamCaptureMode:
    https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html#group__CUDART__STREAM_1g9d0535d93a214cbf126835257b16ba85
Nztorch.cuda.Streamdefault_capture_streamr   c                 x   U R                   R                  c-  [        R                  R	                  5       U R                   l        Uc  SOU4U l        Ub  UOU R                   R                  U l        U R                  c   e[        R                  R                  U R                  5      U l        Xl	        X@l
        g )Nr
   )r   r>   torchcudaStreamr   capture_streamstream
stream_ctx
cuda_graphr   )r   rF   r   rD   r   s        r   __init__graph.__init__   s     >>00849JJ4E4E4GDNN1,BTG	(Fdnn.S.S 	 ""...**++D,?,?@$"4r   c                 :   [         R                  R                  5         [        R                  " 5         [         R                  R                  5         U R                  R                  5         U R                  R                  " U R                  SU R                  06  g )Nr   )r@   rA   synchronizegccollectempty_cacherE   	__enter__rF   r   r   r   )r   s    r   rN   graph.__enter__   sd    

 




  	!!#%%YY	
+/+B+B	
r   c                 p    U R                   R                  5         U R                  R                  XU5        g r   )rF   r!   rE   __exit__)r   exc_type	exc_value	tracebacks       r   rQ   graph.__exit__   s&    ##%  i@r   )r   rC   rF   r   rE   )NNr3   )r4   r5   r6   r7   r8   r>   typingOptional__annotations__strrG   rN   rQ   r9   r
   r   r   r<   r<   u   sE    : DHFOO,?@G
 "*5
  5,
Ar   r<   c                 6   [         R                  " 5       (       a%  [         R                  " 5       (       a  [        S5      eSn[	        U [
        5      (       d  SnU 4n U4n/ n[        X5       GH  u  px[	        U[         R                  R                  5      (       a~  [        UR                  5      S:X  a2  [        UR                  5      S:X  a  [        UR                  5      S:X  d   S5       e[        S UR                  5        5       5      (       d   S5       e[         R                  R                   R"                  " U6 n	UR%                  [        U	5      5        [        S U	 5       5      (       a  GM   S	5       e   U Vs/ s H  n[        U5      PM     n
nU  Vs/ s HG  n[	        U[         R                  R                  5      (       a  [        UR'                  5       5      OS
PMI     nn[)        [        U 5      5       Vs/ s H  nXl   X   -   PM     nn[)        [        U 5      5       Vs/ s H!  n[         R*                  R-                  5       PM#     nn[)        [        U 5      5       Vs/ s H!  n[         R*                  R-                  5       PM#     nnUc
  [/        5       OUn[         R*                  R1                  5         [         R*                  R3                  [         R*                  R5                  5       5         [        XU5       H  u  nnnSu  nnn[)        U5       H  n[         R                  R                   R7                  U" U6 5      n[        S U 5       5      n[        U5      S:  d  MR  [         R8                  R;                  U[        S U 5       5      [        S U 5       5      SUS9nM     UUU4 H  nAM     M     SSS5        [         R*                  R1                  5         / n/ n[        XU5       H  u  nnn[         R*                  R=                  UUS9   U" U6 nSSS5        [         R                  R                   R?                  W5      u  nnUR%                  [        U5      5        UR%                  U5        M     / n/ n[        [A        U5      [A        U5      [A        U5      5       GH  u  nnn [        S U 5       5      n![        S U 5       5      nSn[        U5      S:  ah  [         R*                  R=                  U US9   [         R8                  R;                  U[        S U 5       5      [        S U! 5       5      SUS9nSSS5        / n"Sn#U HC  n$U$RB                  (       a  Ub  U"R%                  UU#   5        U#S-  n#M2  U"R%                  S5        ME     [        U"5      n"UR%                  U!5        UR%                  U"5        GM"     URE                  5         URE                  5         S n%/ n&[G        U 5       H  u  nnU%" X   UU   X   X   UU   X   UU   UU   UU   5	      n'[	        U[         R                  R                  5      (       a:  S n(U(" UURH                  U'URJ                  5      Ul%        U&R%                  U5        M  U&R%                  U'5        M     U(       a  U&S   $ [        U&5      $ s  snf s  snf s  snf s  snf s  snf ! , (       d  f       GN = f! , (       d  f       GN= f! , (       d  f       GN= f)a  Accept callables (functions or :class:`nn.Module<torch.nn.Module>`\ s) and returns graphed versions.

Each graphed callable's forward pass runs its source callable's
forward CUDA work as a CUDA graph inside a single autograd node.

The graphed callable's forward pass also appends
a backward node to the autograd graph. During backward, this node runs the
callable's backward work as a CUDA graph.

Therefore, each graphed callable should be a drop-in replacement for its source callable
in an autograd-enabled training loop.

See :ref:`Partial-network capture<partial-network-capture>` for detailed use and constraints.

If you pass a tuple of several callables, their captures will use the same memory pool.
See :ref:`Graph memory management<graph-memory-management>` for when this is appropriate.

Arguments:
    callables (torch.nn.Module or Python function, or tuple of these): Callable or callables to graph.
        See :ref:`Graph memory management<graph-memory-management>` for when passing a tuple of callables
        is appropriate.  If you pass a tuple of callables, their order in the tuple must be the same order
        they'll run in the live workload.
    sample_args (tuple of Tensors, or tuple of tuples of Tensors): Samples args for each callable.
        If a single callable was passed, ``sample_args`` must be a single tuple of argument Tensors.
        If a tuple of callables was passed, ``sample_args`` must be tuple of tuples of argument Tensors.
    num_warmup_iters (int): The number of warmup iterations. Currently, ``DataDistributedParallel`` needs
        11 iterations for warm up. Default: ``3``.
    allow_unused_input (bool): If False, specifying inputs that were not used when computing outputs
        (and therefore their grad is always zero) is an error. Defaults to False.
    pool (optional): Token (returned by :func:`~torch.cuda.graph_pool_handle` or
        :meth:`other_Graph_instance.pool()<torch.cuda.CUDAGraph.pool>`) that hints this graph may share memory
        with the indicated pool.  See :ref:`Graph memory management<graph-memory-management>`.
.. note::
    The ``requires_grad`` state of each Tensor in ``sample_args`` must match the state
    that's expected for the corresponding real input in the training loop.

.. warning::
    This API is in beta and may change in future releases.

.. warning::
    ``sample_args`` for each callable must contain only Tensors. Other types are not allowed.

.. warning::
    Returned callables do not support higher order differentiation (e.g., double backward).

.. warning::
    In any :class:`~torch.nn.Module` passed to :func:`~make_graphed_callables`, only parameters
    may be trainable. Buffers must have ``requires_grad=False``.

.. warning::
    After you pass a :class:`torch.nn.Module` through :func:`~make_graphed_callables`,
    you may not add or remove any of that Module's parameters or buffers.

.. warning::
    :class:`torch.nn.Module`\s passed to :func:`~torch.cuda.make_graphed_callables` must not have module hooks
    registered on them at the time they are passed. However, registering hooks on modules *after* passing them
    through :func:`~torch.cuda.make_graphed_callables` is allowed.

.. warning::
    When running a graphed callable, you must pass its arguments in the same order and format
    they appeared in that callable's ``sample_args``.

.. warning::
    The automatic mixed precision is supported in :func:`~torch.cuda.make_graphed_callables` only with disabled
    caching. The context manager `torch.cuda.amp.autocast()` must have `cache_enabled=False`.
z_make_graphed_callables does not support the autocast caching. Please set `cache_enabled=False`.FTr   zModules must not have hooks registered at the time they are passed. However, registering hooks on modules after passing them through make_graphed_callables is allowed.c              3   <   #    U  H  oR                   S L v   M     g7f)FNrequires_grad.0bs     r   	<genexpr>)make_graphed_callables.<locals>.<genexpr>  s     EA%/s   zIn any :class:`~torch.nn.Module` passed to :func:`~make_graphed_callables`, only parameters may be trainable. All buffers must have ``requires_grad=False``.c              3   V   #    U  H  n[        U[        R                  5      v   M!     g 7fr   )
isinstancer@   Tensor)r_   args     r   ra   rb   #  s     HKS:c5<<00Ks   ')zfIn the beta API, sample_args for each callable must contain only Tensors. Other types are not allowed.r
   N)NNNc              3   J   #    U  H  oR                   (       d  M  Uv   M     g 7fr   r\   r_   os     r   ra   rb   D  s     $K1??QQ   #	#c              3   J   #    U  H  oR                   (       d  M  Uv   M     g 7fr   r\   r_   is     r   ra   rb   H  s      %';!AA';rj   c              3   r   #    U  H-  oR                   (       d  M  [        R                  " U5      v   M/     g 7fr   r]   r@   
empty_likerh   s     r   ra   rb   K  s(      +9@AOO/E,,Q//s   77)outputsinputsgrad_outputsonly_inputsallow_unused)r   c              3   r   #    U  H-  oR                   (       a  [        R                  " U5      OS v   M/     g 7fr   ro   rh   s     r   ra   rb   o  s'      $
FT??EQ<ns   57c              3   J   #    U  H  oR                   (       d  M  Uv   M     g 7fr   r\   rh   s     r   ra   rb   s  s     J1//QQrj   c              3   J   #    U  H  oR                   (       d  M  Uv   M     g 7fr   r\   rl   s     r   ra   rb   y  s      T,@qOO,@rj   c              3   .   #    U  H  oc  M  Uv   M     g 7fr   r
   rh   s     r   ra   rb   z  s     &W2EQqq2Es   	   c	           	         ^ ^^^^^^^^^
  " UU UUUUU4S jS[         R                  R                  5      m
U
UU4S jn	U	$ )Nc                      > \ rS rSr\UUUU4S j5       r\\R                  R                  R                  U UU4S j5       5       r
Srg)Omake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphedi  c                   > [        T5       HB  nTU   R                  5       X   R                  5       :w  d  M,  TU   R                  X   5        MD     TR                  5         [	        T[
        5      (       d   e[        S T 5       5      $ )Nc              3   @   #    U  H  oR                  5       v   M     g 7fr   detachrh   s     r   ra   jmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.forward.<locals>.<genexpr>  s     @AXXZZs   )rangedata_ptrcopy_r%   rd   tuple)ctxrr   rm   	fwd_graphlen_user_argsstatic_input_surfacestatic_outputss      r   forwardWmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.forward  s|     }-A+A.779VY=O=O=QQ,Q/55fi@ .   "!.%8888@@@@r   c                 H  > [        U5      [        T5      :X  d   e[        TU5       H?  u  p#Uc  M
  UR                  5       UR                  5       :w  d  M.  UR                  U5        MA     TR	                  5         [        T[        5      (       d   e[        S T 5       5      $ )Nc              3   J   #    U  H  ob  UR                  5       OUv   M     g 7fr   r   r^   s     r   ra   kmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.backward.<locals>.<genexpr>  s!      @R1-AHHJQ6@Rs   !#)lenzipr   r   r%   rd   r   )r   gradsggrad	bwd_graphstatic_grad_inputsstatic_grad_outputss       r   backwardXmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.backward  s     5zS)<%===="#6>GA} ::<4==?:GGDM  ?   " ""4e<<<< @R  r   r
   N)r4   r5   r6   r7   staticmethodr   r@   autogradfunctiononce_differentiabler   r9   )r   r   r   r   r   r   r   s   r   Graphedr}     sC    A A ^^$$88 9 r   r   c                     > [         R                  R                  R                  " U 6 nTR                  " [        U5      T-   6 n[         R                  R                  R                  UT5      $ r   )r@   utils_pytreearg_tree_leavesapplyr   tree_unflatten)	user_argsflatten_user_argsoutr   module_paramsoutput_unflatten_specs      r   functionalizedVmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.functionalized  sW     !& 3 3 C CY O--%(9":]"JLC;;&&55c;PQQr   )r@   r   Function)r   r   r   r   r   r   r   r   r   r   r   s   ````````` @r   make_graphed_autograd_function>make_graphed_callables.<locals>.make_graphed_autograd_function  s-    	 	enn-- 	:	R r   c                     ^ ^^^ U UUU4S jnU$ )Nc                  8   > TR                   T:X  a  T" U 6 $ T" U 6 $ r   )training)r   funcgraph_training_stategraphedorig_fwds    r   new_fwdEmake_graphed_callables.<locals>.make_graphed_forward.<locals>.new_fwd  s(     }}(<<&	22'33r   r
   )r   r   r   r   r   s   ```` r   make_graphed_forward4make_graphed_callables.<locals>.make_graphed_forward  s    4 4 r   )&r@   is_autocast_enabledis_autocast_cache_enabledRuntimeErrorrd   r   r   nnModuler   _backward_hooks_forward_hooks_forward_pre_hooksallbuffersr   r   r   append
parametersr   rA   r   r   rJ   rD   rB   tree_leavesr   r   r<   tree_flattenreversedr]   reverse	enumerater   r   ))	callablessample_argsnum_warmup_itersallow_unused_inputr   just_one_callableflatten_sample_argscargsflatten_argper_callable_len_user_argsper_callable_module_paramsrm   "per_callable_static_input_surfaces_
fwd_graphs
bwd_graphsmempoolr   r   grad_inputsrq   outputs_gradvper_callable_static_outputs"per_callable_output_unflatten_specr   flatten_outputsspec per_callable_static_grad_outputsper_callable_static_grad_inputsr   r   r   r   grad_idxrf   r   retr   r   s)                                            r   make_graphed_callablesr      s   J   ""u'F'F'H'Hm
 	
 i'' L	"ny.a))A%%&!+(()Q.,,-2
]3 EEEE -E
 kk))994@""5#56HKHHH 	
Z	
H# /0 9L!L8K#d)8K!L "A ",Auxx!?!?allnRG  " s9~&*&A 	!;!>>& ' *
 38I2GH2GQ%**&&(2GJH27I2GH2GQ%**&&(2GJH%)\!tG
 
JJ			5::,,.	/03$F1
,D$, 2B.K,+,++--99$+F$$K$KK|$q("'.."5"5 ,$ %';%   &+ +9@+ & %)%7 #6 
#K	 - |[9 :'1
 
0. 
JJ #%)+&!$YZ!HdIZZig6DkG 7 !& 3 3 @ @ I#**5+AB*11$7 "I (*$&(#;>34,-<7ni $ $
FT$
 
 JJJ|q !!)'!:#nn11(  T,@ TT!&&W2E&W!W $!3 2  ;  'C  [%<"))+h*?@A"))$/ ( ##56(//0CD'../ABK<P %,,.#++-0f CY'40MqM&)&).q1.1'*,Q/+A.

 dEHHOO,,	 0dmmWdll[DLJJtJJw; (> 1v:A "M"*
 IH 
0	/B 760 ;:sL   [A[[-([-([ ?A3[%6A[%'[7A\	%
[47
\		
\	)   FN)rK   rV   r@   _utilsr   hasattr_C__dict__torch._Cr   r   r   r   r   r   r<   r   r
   r   r   <module>r      s    	     uxx*++&1,&?EHHl#.9:N.OEHH*+:E(;EHH67 , D.## D.NFA FAV PTkr   