
    [Tho;                        S SK r S SKJr  S SKJr  S SKJrJr  \R                  4S jr	S \R                  4S jr
S \R                  4S jr\R                  \R                  4S jr\R                  \R                  4S jr\R                  4S	 jr\R                  4S
 jr\R                  4S jrSS\R                  4S jr\R                  \R                  4S jr " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S  S!\5      rg)"    N)Function)groupReduceOpc                 .    [         R                  XU 5      $ )a  
Broadcasts the tensor to the whole group.

``tensor`` must have the same number of elements in all processes
participating in the collective.

Arguments:
    tensor (Tensor): Data to be sent if ``src`` is the rank of current
        process.
    src (int): Source rank.
    group (ProcessGroup, optional): The process group to work on.

Returns:
    Tensor: Received tensor from the broadcast op.

)
_Broadcastapply)tensorsrcr   s      W/var/www/auris/envauris/lib/python3.13/site-packages/torch/distributed/nn/functional.py	broadcastr      s    " C//    c                 .    [         R                  XU 5      $ )a4  
Gathers a list of tensors in a single process.

Arguments:
    tensor (Tensor): Input tensor.
    dst (int, optional): Destination rank (default is 0).
    group (ProcessGroup, optional): The process group to work on.

Returns:
    tuple[Tensor]: List of appropriately-sized tensors with the gathered data.
)_Gatherr   )r	   dstr   s      r   gatherr       s     ==V,,r   c                 0    [         R                  " X/U Q76 $ )a  
Scatters a list of tensors to all processes in a group.

Each process will receive exactly one tensor and store its data in the
``tensor`` argument.

Arguments:
    tensors (list[Tensor]): List of tensors to scatter on the source rank.
        Receivers must pass ``None`.
    src (int, optional): Source rank (default is 0).
    group (ProcessGroup, optional): The process group to work on.

Returns:
    Tensor: Output tensor from the scatter operation.

)_Scatterr   )tensorsr
   r   s      r   scatterr   /   s    " >>#/w//r   c                 .    [         R                  XX05      $ )a  
Reduces the tensor data across all machines.

Only the process with rank ``dst`` is going to receive the final result.

Arguments:
    tensor (Tensor): Input of the collective.
    dst (int): Destination rank.
    op (optional): One of the values from
        ``torch.distributed.ReduceOp``
        enum.  Specifies an operation used for element-wise reductions.
    group (ProcessGroup, optional): The process group to work on.

Returns:
    Tensor: Output of the collective.

)_Reducer   )r	   r   opr   s       r   reducer   C   s    $ ==%00r   c                 2    [         R                  " X#U /UQ76 $ )a  
Reduces, then scatters a list of tensors to all processes in a group.

Arguments:
    output (Tensor): Output tensor.
    input_list (list[Tensor]): List of tensors to reduce and scatter.
    op (optional): One of the values from
        ``torch.distributed.ReduceOp``
        enum.  Specifies an operation used for element-wise reductions.
    group (ProcessGroup, optional): The process group to work on.

Returns:
    Tensor: Output of the collective.

)_Reduce_Scatterr   )output
input_listr   r   s       r   reduce_scatterr   X   s        F@Z@@r   c                 ,    [         R                  X5      $ )z
Gathers tensors from the whole group in a list.

Arguments:
    tensor (Tensor): Tensor to be broadcast from current process.
    group (ProcessGroup, optional): The process group to work on.

Returns:
    tuple([Tensor]): Output of the collective.

)
_AllGatherr   )r	   r   s     r   
all_gatherr!   k   s     E**r   c                 .    [         R                  XU5      $ )av  
Single tensor all gather. Gathers a single tensor from all ranks, and puts them in a single output tensor.

Args:
    output_tensor (Tensor): Output tensor. It should contain
        correctly-sized tensors to be used for output of the collective.
    input_tensor (Tensor): Tensor to be broadcast from current process.
    group (ProcessGroup, optional): The process group to work on. If None,
        the default process group will be used.

Examples:
    >>> # All tensors below are of torch.int64 dtype.
    >>> # We have 2 process groups, 2 ranks.
    >>> # xdoctest: +SKIP("incorrect want text")
    >>> output_tensor = torch.zeros(2, dtype=torch.int64)
    >>> output_tensor
    [tensor([0, 0])] # Rank 0 and 1
    >>> tensor = torch.arange(1, dtype=torch.int64) + 1 + rank
    >>> tensor
    tensor([1]) # Rank 0
    tensor([2]) # Rank 1
    >>> dist.all_gather_base(output_tensor, tensor)
    >>> output_tensor
    tensor([1,2]) # Rank 0
    tensor([1,2]) # Rank 1

.. warning::
    `_all_gather_base` is experimental and subject to change.
    It is the caller's responsibility to ensure the output_tensor
    is correctly sized.

)_AllGatherBaser   )output_tensorinput_tensorr   s      r   _all_gather_baser&   z   s    B UCCr   c                 0    [         R                  " X /UQ76 $ )a  
Each process scatters list of input tensors to all processes in a group and return gathered list of tensors in output list.

Arguments:
    output_tensor_list (list[Tensor]): list of tensors to gather one per rank.
    input_tensor_list (list[Tensor]): List of tensors to scatter one per rank.
    group (ProcessGroup, optional): The process group to work on.

Returns:
    tuple([Tensor]): Output of the collective.

)	_AlltoAllr   )output_tensor_listinput_tensor_listr   s      r   
all_to_allr+      s     ??5I7HIIr   c                 0    [         R                  X@X#U5      $ )a  
Each process splits input tensor and then scatters the split list to all processes in a group.

Then concatenate the received tensors from all the processes in the group and return single output tensor.

Arguments:
    output (Tensor): Gathered concatenated output tensor.
    input (Tensor): Input tensor to scatter.
    output_split_sizes: (list[Int], optional): Output split sizes for dim 0
        if specified None or empty, dim 0 of ``output`` tensor must divide
        equally by ``world_size``.
    input_split_sizes: (list[Int], optional): Input split sizes for dim 0
        if specified None or empty, dim 0 of ``input`` tensor must divide
        equally by ``world_size``.

Returns:
    Tensor: Output of the collective.

)_AlltoAllSingler   )r   inputoutput_split_sizesinput_split_sizesr   s        r   all_to_all_singler1      s    4   )e r   c                 .    [         R                  XU 5      $ )a  
Reduces the tensor data across all machines in such a way that all get the final result.

After the call the returned tensor is going to be bitwise
identical in all processes.

Arguments:
    tensor (Tensor): Input of the collective.
    op (optional): One of the values from
        ``torch.distributed.ReduceOp``
        enum.  Specifies an operation used for element-wise reductions.
    group (ProcessGroup, optional): The process group to work on.

Returns:
    Tensor: Output of the collective

)
_AllReducer   )r	   r   r   s      r   
all_reducer4      s    $ Bv..r   c                   4    \ rS rSr\S 5       r\S 5       rSrg)r      c                     Xl         X l        [        R                  " US9U l        UR                  5       n[        R                  " X1US9  U$ Nr   )r
   r   distget_rankrankcloner   )ctxr
   r   r	   s       r   forward_Broadcast.forward   s<    	==u- v%0r   c                     [         R                  U R                  [        R                  U R
                  U5      nU R                  U R                  :w  a  UR                  5         S S U4$ N)r   r   r
   r   SUMr   r<   zero_)r>   grad_outputgxs      r   backward_Broadcast.backward   sH    ]]377HLL#))[I77chhHHJdBr    N__name__
__module____qualname____firstlineno__staticmethodr?   rG   __static_attributes__rI   r   r   r   r      s(         r   r   c                   4    \ rS rSr\S 5       r\S 5       rSrg)r      c                 d   Xl         X l        [        [        R                  " US95       Vs/ s H  n[
        R                  " U5      PM     nnUR                  5       n[        R                  " US9U:X  a  [        R                  " X5XS9  O[        R                  " US XS9  [        U5      $ s  snf r8   )r   r   ranger:   get_world_sizetorch
zeros_like
contiguousr;   r   tuple)r>   r   r   r	   itensor_lists         r   r?   _Gather.forward   s    	 /4D4G4Ge4T.U
.UEV$.U 	 
 ""$==u%,KKS>KKc7[!!
s    B-c                 b    S[         R                  " U R                  U R                  /UQ76 4-   $ NNN)r   r   r   r   )r>   grad_outputss     r   rG   _Gather.backward
  s(    x~~cggsyyP<PRRRr   rI   NrJ   rI   r   r   r   r      s*    " "$ S Sr   r   c                   4    \ rS rSr\S 5       r\S 5       rSrg)r   i  c                 (  ^ Xl         X l        [        U4S jT 5       5      (       d   e[        R                  " TS   5      n[
        R                  " US9U:X  a!  [
        R                  " U[        T5      XS9  U$ [
        R                  " US XS9  U$ )Nc              3   j   >#    U  H(  oR                  5       TS    R                  5       :H  v   M*     g7f)r   N)size).0tr   s     r   	<genexpr>#_Scatter.forward.<locals>.<genexpr>  s%     B'Q668wqz00's   03r   r9   )	r
   r   allrV   rW   r:   r;   r   list)r>   r
   r   r   r   s      ` r   r?   _Scatter.forward  s{    	B'BBBBB!!'!*-==u%,LLgA  LLs8r   c                 ^    S[         R                  U R                  U R                  U5      -   $ r^   )r   r   r
   r   r>   rE   s     r   rG   _Scatter.backward  s"    gmmCGGSYYLLLr   rI   NrJ   rI   r   r   r   r     s*    	 	 M Mr   r   c                   4    \ rS rSr\S 5       r\S 5       rSrg)r   i!  c                 h    Xl         X0l        UR                  5       n[        R                  " XAX#S9  U$ )Nr   r   )r
   r   r=   r:   r   )r>   r
   r   r   r	   s        r   r?   _Reduce.forward"  s*    	FB4r   c                 `    S[         R                  U R                  U R                  U5      4-   $ N)NNN)r   r   r
   r   rn   s     r   rG   _Reduce.backward*  s'    !Z%5%5cggsyy+%V$XXXr   rI   NrJ   rI   r   r   r   r   !  s*      Y Yr   r   c                   4    \ rS rSr\S 5       r\S 5       rSrg)r   i/  c                     X l         UR                  5       n[        S U 5       5      n[        R                  " U[        U5      XS9  U$ )Nc              3   @   #    U  H  oR                  5       v   M     g 7frB   rX   rf   rg   s     r   rh   *_Reduce_Scatter.forward.<locals>.<genexpr>5  s     !L:KQ,,..:K   rr   )r   rX   rY   r:   r   rk   )r>   r   r   r	   r*   s        r   r?   _Reduce_Scatter.forward0  sD    	""$!!L:K!LLFD):$;Pr   c                 H    S[         R                  U R                  U5      -   $ ru   )r    r   r   rn   s     r   rG   _Reduce_Scatter.backward9  s    !J$4$4SYY$LLLr   rI   NrJ   rI   r   r   r   r   /  s*      M Mr   r   c                   4    \ rS rSr\S 5       r\S 5       rSrg)r    i>  c                     UR                  5       nXl        [        [        R                  " US95       Vs/ s H  n[
        R                  " U5      PM     nn[        R                  " XBUS9  [        U5      $ s  snf r8   )	rX   r   rT   r:   rU   rV   
empty_liker!   rY   )r>   r   r	   _out_tensor_lists        r   r?   _AllGather.forward?  so     ""$	.3D4G4Ge4T.U
.UEV$.U 	 
 	u=_%%
s    A8c                 >   [         R                  " U R                  S9[         R                  R                  L ak  [         R
                  " U R                  S9n[        R                  " X   5      n[        R                  " [        R                  U R                  U/UQ76 nS U4$ U Vs/ s H  n[        R                  " U5      PM     nn[        R                  " U R                  U/UQ76 n[        R                  " [        R                  " U5      SS9nS U4$ s  snf )Nr9   r   )dim)r:   get_backendr   BackendNCCLr;   rV   r   r   r   r   rC   r(   sumstack)r>   r`   r<   rF   r	   r[   gxss          r   rG   _AllGather.backwardL  s    #)),0A0AA==syy1D!!,"45B &&x||SYYR\RB bz COO,5++F3,KO//#))[H<HC5;;s+3Bbz Ps   ) DrI   NrJ   rI   r   r   r    r    >  s(    
& 
&  r   r    c                   4    \ rS rSr\S 5       r\S 5       rSrg)r#   i[  c                 X    X0l         [        R                  " XR                  5       US9  U$ r8   )r   r:   r&   rX   )r>   r$   r%   r   s       r   r?   _AllGatherBase.forward\  s&    	m-D-D-FeTr   c                 >   [         R                  " U R                  S9[         R                  R                  L a  [         R
                  " U R                  S9n[        UR                  5       5      nUS   U-  S:w  a  [        SU SU 35      eUS   [         R
                  " U R                  S9-  US'   [        R                  " X1R                  UR                  S9n[         R                  " XA[        R                  U R                  5        O[        S5      eS US 4$ )Nr9   r   zTensor with dimensions: z8 does not have first dimension divisible by world_size: devicedtypezBackend not supported!)r:   r   r   r   r   rU   rk   re   RuntimeErrorrV   emptyr   r   _reduce_scatter_baser   rC   )r>   rE   
world_sizeout_sizerF   s        r   rG   _AllGatherBase.backwardb  s    #)),0A0AA,,399=JK,,./H{Z'1,".xj 9IISV  #1+)<)<399)MMHQK!3!3;;L;LB %%bx||SYYO788b$r   rI   NrJ   rI   r   r   r#   r#   [  s(     
    r   r#   c                   4    \ rS rSr\S 5       r\S 5       rSrg)r(   iv  c                 :   Xl         [        [        R                  " US95       Vs/ s H  oCU   R	                  5       PM     snU l        [        R                  " US9n[        S U 5       5      n[        R                  " US9[        R                  R                  L aO  [        [        R                  " US95       H-  nS nXE:X  a  [        U5      n[        R                  " X$   XdUS9  M/     O[        R                  " U[        U5      US9  [        U5      $ s  snf )Nr9   c              3   @   #    U  H  oR                  5       v   M     g 7frB   rz   r{   s     r   rh   $_AlltoAll.forward.<locals>.<genexpr>~  s     81r}   )r   rT   r:   rU   re   input_tensor_size_listr;   rY   r   r   GLOOrk   r   r+   )r>   r   r   r   rZ   my_rankto_sends          r   r?   _AlltoAll.forwardw  s    	',T-@-@u-M'N&
'N!AJOO'N&
" --e,888%(DLL,=,==4..U;<<"7mG_/5I	 = OOW
 _%%%&
s   Dc           	          U R                    Vs/ s H2  n[        R                  " X!S   R                  US   R                  S9PM4     nnS[
        R                  " U R                  U/UQ76 -   $ s  snf )Nr   r   r_   )r   rV   r   r   r   r(   r   r   )r>   r`   re   r[   s       r   rG   _AlltoAll.backward  sv     22	
 3 KK!_33<?;P;P 3	 	 
 ioociiT|TTT
s   9A/rI   NrJ   rI   r   r   r(   r(   v  s*    & &, U Ur   r(   c                   4    \ rS rSr\S 5       r\S 5       rSrg)r-   i  c                     Xl         UR                  5       U l        X@l        X0l        [
        R                  " UUUUUS9  U$ )N)r/   r0   r   )r   re   
input_sizer/   r0   r:   r1   )r>   r   r   r/   r0   r.   s         r   r?   _AlltoAllSingle.forward  sD    	!2 21/	
 r   c           	          [         R                  " U R                  UR                  UR                  S9nS[
        R                  U R                  UU R                  U R                  UR                  5       5      4-   $ )Nr   )NNNN)rV   r   r   r   r   r-   r   r   r/   r0   rX   )r>   rE   r	   s      r   rG   _AlltoAllSingle.backward  sq    NN;#5#5[=N=N
 (!!		&&%%&&(+
 
 	
r   rI   NrJ   rI   r   r   r-   r-     s(      
 
r   r-   c                   4    \ rS rSr\S 5       r\S 5       rSrg)r3   i  c                     X l         Xl        UR                  [        R                  S9n[
        R                  " X1US9  U$ )N)memory_formatrr   )r   r   r=   rV   contiguous_formatr:   r4   )r>   r   r   r	   s       r   r?   _AllReduce.forward  s5    	E,C,CDU3r   c                 `    S[         R                  U R                  U R                  U5      4-   $ r^   )r3   r   r   r   rn   s     r   rG   _AllReduce.backward  s'    z//		;OQQQr   rI   NrJ   rI   r   r   r3   r3     s*      R Rr   r3   )rV   torch.distributeddistributedr:   torch.autogradr   r   r   WORLDr   r   r   rC   r   r   r!   r&   r+   r1   r4   r   r   r   r   r   r    r#   r(   r-   r3   rI   r   r   <module>r      sF      #
 . "' 0(  - %++ 0( $<<u{{ 1* +3,,ekk A& #[[ + 9> !DH =BKK J& 
++> #,,ekk /*   (Sh S2Mx M$Yh YMh M : X  6 U  UF
h 
@R Rr   