
    [Thl0                     $   % S SK Jr  S SKJr  S SKrS SKJr  S SKJ	r	  S SKJ
r
  S SKJr  SSKJr  SSKJr  SS	KJrJr   SS
\R(                  S\S\4S jjr  SS\R                  R,                  S\S\4S jjrSq\\R4                     \S'   \S 5       rS rS\R                  R,                  S\S\R                  R,                  4S jrS\R                  R,                  S\R                  R,                  4S jrSS\	R,                  S\4S jjr g)    )contextmanager)OptionalN)distributed_c10d)ShardedTensor   )Sharder)ShardingPlan)ChunkShardingSpecShardingSpectensorsharding_specreturnc                    U R                  5       (       d  [        S5      eUb  UO[        R                  " 5       n[        R
                  " U5      n[        R                  " U5      nS/U-  n[        R                  " XrU4US9  [        U5       HI  u  pX)S   :w  a  [        SU SU SU	S    SU 35      eXS   :w  d  M1  [        S	U SU S
U	S    SU 35      e   UR                  XUS9n
U
$ )a  
Given a :class:`torch.Tensor`, it shards that tensor according to the provided
``sharding_spec``. ``src_rank`` denotes the source rank which would be
used as the ground truth of the data which would be scattered as shards
across the rest of the ranks.

Args:
    tensor (:class:`torch.Tensor`): Tensor needs to be sharded.
    sharding_spec (:class:`torch.distributed._shard.sharding_spec.ShardingSpec`): The specification
        describing how to shard the Tensor.

Keyword args:
    src_rank (int, optional): The source rank which is used as the ground truth of
        the data for the parameter that would be sharded and scattered
        across the rest of the ranks.
        Default: 0.
    process_group (ProcessGroup, optional): The process group to work on. If None,
        the default process group will be used.

Returns:
    A :class:`ShardedTensor` sharded from the given tensor.

.. warning::
    Only :class:`torch.distributed._shard.sharding_spec.ChunkShardingSpec` is
    currently supported as the ``sharding_spec``.
z'input tensor is not a contiguous TensorN)groupr   z	src_rank=z
 on rank: z does not match with src_rank=r   zsharding_spec=z# does not match with sharding_spec=src_rankprocess_group)
is_contiguous
ValueErrorr   _get_default_groupdistget_world_sizeget_rankall_gather_object	enumerateshard)r   r   r   r   pg
world_sizecurrent_rankgathered_listidxentrysts              T/var/www/auris/envauris/lib/python3.13/site-packages/torch/distributed/_shard/api.py_shard_tensorr%      s,   : !!BCC $ 	002 
 $$R(J==$L FZ'M=]*C2N.
QxH:Z~ >'',Qxj
3%A  !H$ z, H,,1!H:ZuF  / 
		Vb		IBI    module
param_namec           
         [        X5      (       d  [        U R                  5        SU S35      e[        X5      n[	        U[
        R                  5      (       d:  [        S[        U 5      R                   SU S[        U5      R                   35      eUR                  5       (       d  [        SU S35      e[        XRX45      nU R                  U[        R                  " U5      5        g)	a  
Given a :class:`torch.nn.Module`, a ``param_name`` for a parameter in that
module, it shards that parameter according to the provided
``sharding_spec``. ``src_rank`` denotes the source rank which would be
used as the ground truth of the data which would be scattered as shards
across the rest of the ranks.

This method replaces ``module.param_name`` with a
:class:`torch.distributed._sharded_tensor.ShardedTensor`

Args:
    module (:class:`torch.nn.Module`): Module whose parameter needs to be sharded.
    param_name (str): Name of the parameter of ``module`` that needs to be sharded.
    sharding_spec (:class:`torch.distributed._shard.sharding_spec.ShardingSpec`): The specification
        describing how to shard the Tensor.

Keyword args:
    src_rank (int, optional): The source rank which is used as the ground truth of
        the data for the parameter that would be sharded and scattered
        across the rest of the ranks.
        Default: 0.
    process_group (ProcessGroup, optional): The process group to work on. If None,
        the default process group will be used.

.. warning::
    Only :class:`torch.distributed._shard.sharding_spec.ChunkShardingSpec` is
    currently supported as the ``sharding_spec``.
z has no attribute ``z	Expected .z to be a Tensor, but found zparam: z is not a contiguous TensorN)hasattrAttributeError	_get_namegetattr
isinstancetorchTensorr   type__name__r   r%   register_parameternn	Parameter)r'   r(   r   r   r   r   r#   s          r$   shard_parameterr8   M   s    H 6&& 0 0 233FzlRSTUUV(Ffell++V--.a
|;VW[\bWcWlWlVmn
 	
 !!7:,.IJKK	vh	FB j",,r*:;r&   _CURRENT_PROCESS_GROUPc              #   P   #    [         b  [        S5      eU q  U v   Sq g! Sq f = f7f)zN
Context manager to set the process group with which to load a ShardedTensor.
NzNProcessGroup already set by previous "load_with_process_group" context manager)r9   RuntimeError)r   s    r$   load_with_process_groupr<      s>      )
 	
 +&!%s   & &#&c                  F    [         c  [        R                  " 5       $ [         $ )zx
Retrieves the current process group set by ``load_with_process_group``.
If not set, it just returns the default group.
)r9   r   r    r&   r$   _get_current_process_groupr?      s     %2244%%r&   resharding_specc                 6   ^ U4S jnU R                  U5        U $ )a  
Hook a module with output resharding in the forward pass according
to the given ``resharding_spec``.

Args:
    module (:class:`torch.nn.Module`): Module whose output needs to be resharded.
    resharding_spec (:class:`torch.distributed._shard.sharding_spec.ShardingSpec`):
        The specification describing how the output of the module will be resharded.

Returns:
    A :class:`torch.nn.Module` object with reshard API hooked.
c                 T   > [        U[        5      (       a  UR                  T5      $ U$ )N)r0   r   reshard)_module_inputoutputr@   s      r$   	hook_func"_reshard_output.<locals>.hook_func   s$    fm,,>>/22r&   register_forward_hook)r'   r@   rG   s    ` r$   _reshard_outputrK      s     
   +Mr&   c                 .    S nU R                  U5        U $ )a  
Hook a module with local shards collection in the forward pass.

This API is typically used to convert a sharded representation back to data parallel
representation. In particular, it returns the local tensor for this Shard. If the
size along the sharding dimension for the local tensor is 1, this dimension is removed
from the final result. For example a [4, 16] ShardedTensor across 4 ranks is typically
a local Tensor of size [16] across each rank and not [1, 16] across each rank.

Args:
    module (:class:`torch.nn.Module`): Module whose output is ShardedTensor and the
        local tensor value needs to be returned.

Returns:
    A :class:`torch.nn.Module` object with collection API hooked.
c                    [        U[        5      (       aw  UR                  5       nUR                  n[        U[        5      (       aD  UR                  UR                  5      S:X  a%  UR                  UR                  R                  5      nU$ g )Nr   )r0   r   local_tensor_sharding_specr
   sizedimsqueeze)rD   rE   rF   rN   r   s        r$   rG   '_collect_local_shard.<locals>.hook_func   s{    fm,,!..0L"11M=*;<< %%m&7&78A=+33))--    -r&   rI   )r'   rG   s     r$   _collect_local_shardrT      s    $    +Mr&   planc           	      6   / nUR                   R                  5        H-  u  pV[        U[        5      (       d  M  UR	                  U5        M/     UR                   R                  5        H  u  pV[        U[
        5      (       ac  UR                  S5      u  pxn	U H+  n
UR                  U
5      (       d  M  [        SU SU
 S35      e   U R                  U5      n[        XXbUS9  M}  [        U[        5      (       a`  UR                  S5      u  pnUS:X  a  [        S5      eU R                  U5      nU R                  U5      nUR                  U5      nXl        M  [        SU S	35      e   UR                  be  UR                  R                  5        HG  u  nn[        U[
        5      (       a  U R                  U5      n[!        UU5        M:  [        SU S	35      e   UR"                  b0  UR"                   H  nU R                  U5      n[%        U5        M!     g
g
)a  
Shards a given module according to the provided sharding `plan`. This method
first shards all the parameters according to the given sharding `plan`. Then if
`output_plan` and `return_local_tensor` are specified in the sharding `plan`, it
will tag the output of modules according `output_plan`, convert the module's
output back to data parallel according to `return_local_tensor`.

Needs to be called on all ranks in an SPMD fashion.

Args:
    module (:class:`torch.nn.Module`): The module to apply sharding to
    plan (:class:`torch.distributed._shard.sharding_plan.ShardingPlan`):
        The ShardingPlan which specified param name to ShardingSpec to apply to
        each parameter.

Keyword args:
     src_rank (int, optional): The source rank which is used as the ground truth of
        the data for the module that would be sharded and scattered across the rest
        of the ranks.
        Default: 0.
    process_group (ProcessGroup, optional): The process group to work on. If None,
        the default process group will be used.
r+   z7ShardingPlan is in-valid, trying to shard a parameter: z1, but there's already a Sharder entry for module z`, parameter sharding should not conflict with the submodule tree that a Sharder is working with!r    z1Module path must not be empty for custom sharder!z:Only `ShardingSpec` and `Sharder` are supported to shard ''Nz5Only `ShardingSpec` is supported as output_plan for ')rU   itemsr0   r   appendr   
rpartition
startswithr;   get_submoduler8   KeyErrorr   mod_name	TypeErroroutput_planrK   return_local_tensorrT   )r'   rU   r   r   sharder_pathsnamespecmodule_path_r(   sharder_pathmodparent_mod_path	_mod_name
parent_modsharded_modoutput_specs                    r$   shard_modulero      s	   4 Miioo'
dG$$  & (
 iioo'
dL)))-)=&KJ -)),77&QRVQW XKKW. Y;<  !. &&{3C g&&,0OOC,@)O	rzRSS&&t,C--o>J**S/K"-LTFRST 9 (B #(,(8(8(>(>(@$K+|44**;7[1KK=XYZ  )A +33K&&{3C % 4 ,r&   )r   N)!
contextlibr   typingr   r1   torch.distributeddistributedr   torch.nnr6   r   'torch.distributed._shard.sharded_tensorr   sharderr   sharding_planr	   r   r
   r   r2   r%   Modulestrr8   r9   ProcessGroup__annotations__r<   r?   rK   rT   ro   r>   r&   r$   <module>r|      s   %      . A  ' : RV:LL:)5::B 3<HHOO3<3<  3<n 7; !2!23 : & &"	&HHOO.:
XX__2! !UXX__ !HP& P&, P&r&   