
    [Th<?                        S SK r S SKJrJr  S SKrS SKJr  S SKJrJ	r	  / SQr
\ R                  " \5      r Sr " S S5      r " S	 S
\5      r\" \R"                  " S5      S 5      rS r " S S5      r " S S5      rS r  SS\\S4   S\\\\4      S\S\\\S4      S\\\\4      S\\\   \\   4   4S jjrS\\   4S jrg)    N)AnyOptionalmap_aggregate)tree_flattentree_unflatten)TensorChunkSpecsplit_args_kwargs_into_chunksmerge_chunksFc                       \ rS rSrSrS rSrg)_CustomReducer   a   
Custom reducer class that can be used to specify a custom operation that
reduces losses of multiple microbatches into one value.

Example:
>>> # xdoctest: +SKIP
>>> sum_reducer = _CustomReducer(
>>>     torch.tensor(0.0),
>>>     lambda a, b: a + b
>>> )
c                     Xl         X l        g N
init_value	reduce_fn)selfr   r   s      _/var/www/auris/envauris/lib/python3.13/site-packages/torch/distributed/pipelining/microbatch.py__init___CustomReducer.__init__(   s    $"    r   N)__name__
__module____qualname____firstlineno____doc__r   __static_attributes__ r   r   r   r      s    
#r   r   c                       \ rS rSrSrg)_LossReducer-   r   Nr   r   r   r   r   r   r   r   r!   r!   -       r   r!   g        c                 
    X-   $ r   r   )abs     r   <lambda>r(   1   s    15r   c                   z    \ rS rSr% SrS r\\S'   S rS r	\
S\\S4   4S	 j5       r\
S\\\4   4S
 j5       rSrg)r	   8   z*
Class used to specify chunking of inputs
c                     Xl         g r   	split_dim)r   r-   s     r   r   TensorChunkSpec.__init__=   s    "r   r-   c                 |    U R                   R                   SU R                   R                   SU R                   S3$ )N.())	__class__r   r   r-   r   s    r   __repr__TensorChunkSpec.__repr__B   s9    ~~(()4>>+B+B*C1T^^DTTUV	
r   c                 "    SU R                    S3$ )NzTensorChunkSpec(r2   r,   r4   s    r   __str__TensorChunkSpec.__str__G   s    !$..!133r   
chunk_dims.c                      [        U S 5      nU$ )aJ  
A helper for creating a tuple of `TensorChunkSpec` from a tuple of chunk
dimensions (int's).
Example:
    >>> # xdoctest: +SKIP
    >>> # There are three positional arguments to the model, and
    >>> # we are chunking them along dimension 0, 0 and 1, respectively
    >>> args_chunk_spec = TensorChunkSpec.from_tuple((0, 0, 1))
c                     [        U 5      $ r   r	   dims    r   r(   ,TensorChunkSpec.from_tuple.<locals>.<lambda>Y   	    ,r   r   )r:   args_chunk_specs     r   
from_tupleTensorChunkSpec.from_tupleJ   s     (,
 r   c                      [        U S 5      nU$ )a$  
A helper for creating a dictionary of `TensorChunkSpec` from a
dictionary of chunk dimensions (int's).
Example:
    >>> # xdoctest: +SKIP
    >>> # Chunk dimension 0 for the "id" argument, 1 for the "mask" argument
    >>> kwargs_chunk_spec = TensorChunkSpec.from_dict({"id": 0, "mask": 1})
c                     [        U 5      $ r   r=   r>   s    r   r(   +TensorChunkSpec.from_dict.<locals>.<lambda>k   rA   r   r   )r:   kwargs_chunk_specs     r   	from_dictTensorChunkSpec.from_dict]   s     *,
 ! r   r,   N)r   r   r   r   r   r   int__annotations__r5   r8   staticmethodtuplerC   dictstrrI   r   r   r   r   r	   r	   8   se    # N

4 #s(O $ !cN! !r   r	   c                       \ rS rSrSrg)
_Replicateq   r   Nr#   r   r   r   rR   rR   q   r$   r   rR   c                 L   0 n/ nUnSn[        U 5      [        U5      :X  d;   S[        U R                  5       5       S[        UR                  5       5       35       eU R                  5        GHe  u  px[	        U5      u  pUR                  U
5        X   nUc   e[	        U5      u  p[        U	5      [        U5      :w  a  [        SU SU 35      e/ n[        X5       GH  u  nnU[        L d  [        U[        R                  5      (       d  UR                  U/U-  5        MF  [        U[        5      (       Ga  [        U[        R                  5      (       d
   U S35       eUR                  UR                  5      nUU:  a>  U(       a"  [        R!                  SU S	U S
U S35        UnO[#        SU SU SU S35      e[        R$                  " XUR                  5      n[&        (       a  / nSnU H  n[        R(                  " U5      nUUR                  UR                  5      -   n[+        SSS5      /UR,                  -  n[+        UU5      UUR                  '   UUU'   UR                  U5        UUR                  UR                  5      -  nM     UR                  U5        OUR                  U5        SnGM  [/        SU 35      e   XU'   GMh     / n[1        U5       HJ  n0 nUR                  5        H   u  nnU Vs/ s H  nUU   PM
     nnUUU'   M"     UR                  U5        ML     / nU Hf  n 0 n![        U5      [        U 5      :X  d   e[        U R                  5       U5       H  u  u  nnn"[3        UU"5      U!U'   M     UR                  U!5        Mh     U$ s  snf )a3  
Given a dictionary of args, and a dictionary of chunking specs, shard the
args according to the chunking specs.

Args:
    args_dict: Dictionary of args
    args_chunk_spec: Dictionary of chunking specs
    num_chunks: Number of chunks to shard the args into

Returns:
    args_split: List of sharded args
Tzargs_dict.keys() = z args_chunk_spec.keys() = NzArgument value z9 did not have the same number of values as as chunk spec z is not a tensorz%Tensor size on chunking dimension is z', downsizing the number of chunks from z to r0   zArg z% on chunking dimension has a size of z$, smaller than the number of chunks z. PiPPy cannot reduce the number of chunks because other arguments have bigger chunk-dimension sizes. Please adjust your num_chunks setting.r   FzUnrecognized chunk spec: )lenlistkeysitemsr   append
ValueErrorziprR   
isinstancetorchTensorr	   sizer-   loggerwarningRuntimeErrortensor_split_debug_mask_minibatches
zeros_likeslicendim	TypeErrorranger   )#	args_dictrB   
num_chunksargs_sharded_replicated	arg_specsreal_num_chunksfirst_tensorarg_keyargflatspec
chunk_specchunk_spec_flat_sharded_arg_flatvchunk_vv_split_dim_sizechunk_tensorsexpanded_chunkssplit_dim_idxchunk_tensornew_val	upper_idxslice_indiceschunks_flat	chunk_idx
chunk_argskeyv_flatarg_single_chunk
args_splitchunkper_chunk_argsarg_specs#                                      r   _shard_dict_of_argsr   u   s   ( !I OLy>S11 
d9>>#3455OPTUdUiUiUkPlOmn1 ")!#&
$-
%%%)*5t9O,,!# '++5,8 
 d4JAw*$Jq%,,,G,G ''o(=>G_55 "!U\\22Jqc9I4JJ2#$66'*;*;#< #o5# CDTCU VDDN<tTdSeefh +;*"7)+PQaPb cAAK MEE  !& 2 2(9(9! +*&(O$%M(5"'"2"21"5$1L4E4EgFWFW4X$X	).tT4)@(AGLL(P;@)9<g&7&78 2>.'..w7%):):7;L;L)MM )6 %++O<$++M:$";G9 EFFq 5t ,<(S *X K?+	
/557HC@CDfy 1D.JsO 8 	:& , J9~U+++$'y$A JS#"0h"?N3 %B.)    Es   N!args.kwargschunksrB   rH   returnc                   ^ Uc  0 nUc  [        [        5      4[        U 5      -  nUc#  [        R	                  U[        [        5      5      n[        [        [        U 5      5      [        [        U5      5      U5      n[        U5      n[        UUU5      n[        U5      U:  a<  [        U5      n[        [        [        U 5      5      [        [        U5      5      U5      n[        U5      [        U5      :w  a#  [        S[        U5       S[        U5       35      eU V^s/ s H*  m[        U4S j[        [        T5      5       5       5      PM,     n	nX4$ s  snf )a  
Given a sequence of args and kwargs, split them into a number of chunks
according to  their respective chunking specs.

Args:
    args: Tuple of args
    kwargs: Dict of kwargs
    chunks: Number of chunks to split the args and kwargs into
    args_chunk_spec: chunking specs for args, in same shape as args
    kwargs_chunk_spec: chunking specs for kwargs, in same shape as kwargs

Returns:
    args_split: List of sharded args
    kwargs_split: List of sharded kwargs
z;args and kwargs are split into different number of chunks: z, c              3   .   >#    U  H
  nTU   v   M     g 7fr   r   ).0ir   s     r   	<genexpr>0split_args_kwargs_into_chunks.<locals>.<genexpr>V  s     <%;jm%;s   )
r	   DEFAULT_CHUNK_DIMrU   rO   fromkeysr   	enumeraterb   rN   ri   )
r   r   r   rB   rH   args_split_dictrn   kwargs_splitr   r   s
           ` r   r
   r
      sX   p ~ *+<=?#d)K  MM&/BS2TU)Yt_Y'(O
 /*O&L <?* l+-4!?+,
 ?s<00I?#$Bs<'8&9;
 	
 *)J 	<U3z?%;<<)  
 ##s    1Ec                    Ub  [        U5      u  p#O,[        U S   5      u  pC[        [        5      /[        U5      -  n/ nU  HJ  n[        U5      u  px[        U5      [        U5      :w  a  [	        SU SU 35      eUR                  U5        ML     / n	[        U5       GH6  u  p[        U[        5      (       Gaq  [        [        U5      5       Vs/ s H
  nX\   U
   PM     nn[        (       Ga	  US   R                  nUSS  H  nUR                  U:X  a  M   e   [        R                  " [        R                  " USS06[        U5      UR                  S9n/ nSn[        U5      [        U5      :X  d   e[        UU5       Hp  u  nnUUR!                  UR                  5      -   n[#        SSS5      /UR$                  -  n[#        UU5      UUR                  '   UU   nUR                  U5        UnMr     OUnU	R                  [        R&                  " UUR                  S	95        GM  [        U[(        5      (       aR  UR*                  n[        [        U5      5       H  nUR-                  UX\   U
   5      nM     U	R                  U5        GM  US   U
   n[        S[        U5      5       H  nX\   U
   U:X  a  M   e   U	R                  U5        GM9     [/        X5      $ s  snf )
z
Given a list of chunks, merge them into a single value according to
the chunk spec.

Args:
    chunks: list of chunks
    chunk_spec: Chunking spec for the chunks

Returns:
    value: Merged value
Nr   zChunk z did not match chunk spec    devicemeta)sectionsr?   r>   )r   r	   r   rU   rZ   rY   r   r\   ri   rd   shaper]   rc   emptyr-   r[   r_   rf   rg   catr   r   r   r   )r   rt   spec_flattenedflatten_specchunk0_flatchunks_flattenedr   chunk_flattenedrv   args_flattenedarg_idxrq   r   partial_valuesoverall_shapevalmeta_chunksvalues_to_catchunk_start_idxpartial_value
meta_chunkchunk_end_idxr   slicedreduced_valvalues                             r   r   r   ]  s   Z '3J'?$ %1$;!)*;<=K@PP )%03~#66veW,FzlSTT0  N!.1c?++ "'s+;'<!=!=I !+G4!=  
 '& .q 1 7 7)!"-C99555 .#00KK>v> 0 !#"#>*c+.>>>>14^[1Q-M:$3joocmm6T$TM%*4t%<$=@R@R$RM38-3XM#--0*=9F!((0&3O 2R !/!!%))Ms}}"MN^,,..K"3'7#89	!mm!1!<W!E :
 !!+.$Q'0E"1c*:&;<	'27;uDDD =!!%(a 2f .77cs   K#)NN)loggingtypingr   r   r]   torch.fx.noder   torch.utils._pytreer   r   __all__	getLoggerr   r`   rd   r   r!   tensorsum_reducerr   r	   rR   r   rN   rO   rP   rK   rV   r
   r   r   r   r   <module>r      s4       ' < 
		8	$
   # #$	> 	 5<<,.@A  5! 5!r	 	|F >B>Bf$
S/f$T#s(^$f$ f$ eOS$89:	f$
  S/%9 :;f$ 4;T
"#f$Rw8Iw8r   