
    [Thv6                         % S SK r S SKrS SKJrJr  S SKJr  S SKJrJ	r	J
r
JrJr  S SKrS SKJr  S SKJr  S SKJr  S SKJr  / r\\   \S'   \ R4                  " \5      r " S	 S
\R:                  5      rS\\   S\4S jrg)    N)
CollectionMapping)deepcopy)AnyCallableOptionaloverloadUnion)optim)ShardedTensor)FullyShardedDataParallel__all__c                      \ rS rSrSr  SS\\\\R                  \
4   4   S\R                  S\\\\\4         S\\R"                     SS4
S	 jjrS
 rS\\\4   4S jr\SSS jj5       r\S\/ \4   S\4S j5       rSS\\/ \4      S\\   4S jjr\S\\R                  \4   4S j5       rS\\\4   SS4S jrS\\\4   SS4S jrSS jrS\\\4   4S jrS\\\4   4S jr Sr!g)_NamedOptimizer   a\  
``_NamedOptimizer`` takes a dict of parameters and exposes ``state_dict`` by parameter key.

We replace the original key (number) in an optim to the
fully qualified name (FQN) string. User can initialize the optim as they
initialize a PyTorch optim, the only difference is that they also need to
pass in the FQN of each parameters.

Args:
    named_parameters (Mapping[str, Union[torch.Tensor, ShardedTensor]]):
        Mapping from FQN to parameter.
    optimizer_class (optim.Optimizer):
        The class of optimizer to instantiate.
    param_groups (Collection[Mapping[str, Any]]):
        `param_groups` to pass to optimizer if specified.
        The key of the inner map needs to be FQNs.
        Default: None
    module (nn.Module): the module whose parameters to updated
        by the optimizer.
    args: arguments to pass to the optimizer constructor.
    kwargs: arguments to pass to the optimizer constructor.

Example::
    >>> # xdoctest: +SKIP("distributed")
    >>> from torch import optim
    >>> from torch.distributed.optim import _NamedOptimizer
    >>>
    >>> # Define the named optimizer.
    >>> m = Model(...)
    >>> named_optim = _NamedOptimizer(m.named_parameters(), optim.SGD)
    >>> # Forward pass + backward pass.
    >>> named_optim.step()
    >>> ...
    >>> # Call state_dict for the named optimizer returns a FQN state_dict.
    >>> named_optim.state_dict()

Warning: This API is still in development and subject to change.

TODO: Add tutorial for _NamedOptimizer.
TODO: Add documentation in the docstring for the public attributes
      like self.param_groups and self.named_parameters.
Nnamed_parametersoptimizer_classparam_groupsmodulereturnc                    [         R                  R                  S5        X0l        U R	                  5         [        U5      U l        Uc  U R                  R                  5       OUnU" U/UQ70 UD6U l        X@l	        Uc)  [        U R                  R                  5       5      U l        O[        R                  " S5        U R                  R                  5        VV	s0 s H  u  pX_M	     n
nn	/ nU H6  nUS    H*  n	X;  a  [!        SU	 S35      eUR#                  X   5        M,     M8     Xl        U R                  R                  U l        g s  sn	nf )Nz'torch.distributed.optim._NamedOptimizerzvSince we pass in param_groups, we will use param_groups to initialize the optimizer, not all parameters of the module.paramszExpect param name z% found in param group but is missing.)torch_C_log_api_usage_oncer   _param_groups_checkdictr   values
_optimizerr   listkeysordered_param_keyswarningswarnitems
ValueErrorappend)selfr   r   r   r   argskwargsparams_for_optimizerkeyparamparam_to_keyr"   groups                _/var/www/auris/envauris/lib/python3.13/site-packages/torch/distributed/optim/named_optimizer.py__init___NamedOptimizer.__init__@   sO    	$$%NO;G  " $%5 6.:.BD!!((* 	 * 

 

 &*4+@+@+E+E+G&HD#MMN :>9N9N9T9T9VW9V:3EJ9VLW!#%"8_E0(07\]  '--l.AB - & '9# OO88 Xs   Ec                    U R                   b  U R                    H  n[        U[        5      (       d   S5       eSU;   d   S5       eUS   n[        U[        R                  5      (       a  U/n[        U5      nU HD  n[        U[        R                  5      (       a  M$  [        S[        R                  " U5      -   5      e   X!S'   M     g g )Nparam group must be a dictr   z#param group must contain key paramsz>optimizer can only optimize Tensors, but one of the params is )r   
isinstancer   r   Tensorr    	TypeErrortypename)r(   param_groupr   r-   s       r0   r   #_NamedOptimizer._param_groups_checkj   s    (#00!+t44R6RR4;.U0UU.$X.fell33$XFf#E%eU\\::'8:?..:OP  $ )/H%  1 )    c                    U R                   R                  5       nUS   nUS   R                  5        VVs0 s H  u  p4U R                  U   U_M     nnn/ nU Hp  nUS    Vs/ s H  oR                  U   PM     n	nS[	        U	5      0n
UR                  5        H  u  pUS:w  d  M  [        U5      X'   M     UR                  U
5        Mr     U R                  XVS.5      $ s  snnf s  snf )z
Return the ``state_dict`` of the optimizer.

Instead of using number to index
parameters, we will use module fully qualified name (FQN) as the key.
r   stater   )r=   r   )r   
state_dictr%   r"   sortedr   r'   _post_state_dict)r(   r>   r   st_key	state_val	ret_state
ret_groupsr/   r-   
param_keys	ret_groupkvs                r0   r>   _NamedOptimizer.state_dict{   s     __//1
!.1 &0%8%>%>%@
%@! ##F+Y6%@ 	 

 
!EFKHoVoU11%8oJV!6*#56I=#+A;IL & i( " $$y%UVV
 Ws   C #C&closurec                     g N r(   rJ   s     r0   step_NamedOptimizer.step   s    14r;   c                     g rL   rM   rN   s     r0   rO   rP      s    ;>r;   c                 4    U R                   R                  US9$ )zr
Perform a single optimization step.

This will call :meth:`torch.optim.Optimizer.step` on the wrapped
optimizer.
rJ   )r   rO   rN   s     r0   rO   rP      s     ##G#44r;   c                 .    U R                   R                  $ rL   )r   r=   )r(   s    r0   r=   _NamedOptimizer.state   s    $$$r;   r>   c                    U R                   R                  5       nU R                  U5      nUS   nUS   n[        U5      S:X  a  [	        S5      e[        U R                  5       GH  u  pVXcR                  5       ;  a  M  [        X6   5      [        XE   5      :w  a*  [	        S[        XE   5       SU S[        X6   5       35      eXE   R                  5        GHz  u  pxXsU   ;  a  [	        SU SU S35      eX6   U   n	[        U[        5      (       a  [        U	[        5      (       d   e[        UR                  5       5      n
[        U	R                  5       5      nX:w  a  [	        S	U S
U
 SU SU 35      e[        UR                  5       U	R                  5       5       H8  u  pUR                  R                  5       R                  UR                  5        M:     GM  [        U[         R"                  5      (       aC  [        U	[         R"                  5      (       d   eUR                  5       R                  U	5        GMj  [%        U	5      XE   U'   GM}     GM     US   nUS   n0 nU H  n['        US   5      nUU[)        U5      '   M!     0 nU H=  n/ nUS    H!  nUR+                  U R                  U   5        M#     UU[)        U5      '   M?     UR                  5        H  u  nnUU;  a  M  UU   n[        U5      [        U5      :w  a'  [	        S[        U5       SU S
[        U5       S35      eU H4  nUU;  a  [	        SU SU S35      eUS:w  d  M#  [%        UU   5      UU'   M6     M     U R                   R-                  U5        g)a@  
Define the default behavior to load a state_dict for ``_NamedOptimizer``.

Sample Code
```
    my_model = MyModule()
    optimizer = _NamedOptimizer(my_model.named_parameters(), Adagrad)
    ...

    optim_state_dict = optimizer.state_dict()
    ...
    ...

    optimizer.load_state_dict(optim_state_dict)
    ...
```
Args:
    state_dict (Dict[str, Any]) : A ``state_dict`` to load into the optimizer.
        Note that this state dict update is performed in place.

.. note:: PyTorch is using lazy init to initialize the optim states.
    So it is possible that there is no optim state when user call
    ``load_state_dict`` and for ``_NamedOptimizer`` we make it stricter
    that users can only call ``load_state_dict`` after the state is initialized.
    By doing this, we can validate the optim ``state_dict`` to be loaded.
r=   r   zJExpects the optim to be initialized before load but found not initialized.zExpects equal length as z for parameter z but found: zExpects state z but not found.z"Expects equal number of shards as z but found z for /r   r   z"Expects equal param_group size as z for group .zExpects group key z to be in group z  in `state_dict` but is missing.N)r   r>   _pre_load_state_dictlenr&   	enumerater"   r!   r%   r5   r   local_shardsziptensordetachcopy_r   r6   r   r    _gen_param_group_keyr'   load_state_dict)r(   r>   new_state_dictr=   	new_stateidx	param_key	state_keyrB   src_state_val
num_shardsnum_new_shardsshard	src_shardsrc_param_groupsnew_param_groupssrc_group_mapr/   rE   new_group_map	new_group	group_key	src_grouprG   s                           r0   rb   _NamedOptimizer.load_state_dict   s   6 335..z:
7#"7+	y>Q\  ((?(?@NC

,5#$IN(;; .s9>/B.C?S\R]]ijmnsn~j  jA  B  )2(<(<(>$	)$44$(?9+_]  !& 0 ;i77%m]CCCC!$Y%;%;%=!>J%()C)C)E%FN!3(@@PP[\f[gglmvlwwx  zC  yD  E  -0!..0-2L2L2N-( ++-33I4D4DE-  	5<<88%mU\\BBBB$$&,,];080GIN9-/ )? AF &n5).9%EeHo.J>CM.z:; & )IJ&x0	!!$"9"9)"DE 1>GM.z:;	 *
 %2$7$7$9 Iy -%i0I9~Y/ 8Y8HT]S^^ijmnwjxiyyz{  I%$,QC/?	{Jjk  =#+IaL#9IaL  %:$ 	''7r;   r9   c                    [        U[        5      (       d   S5       eUS   n[        U[        R                  5      (       a  U/US'   O[	        U5      US'   U R
                  R                  5        VVs0 s H  u  p4XC_M	     nnnUS    H0  nXE;  a  [        S5      eU R                  R                  XT   5        M2     U R                  R                  U5        U R                  R                  U l        gs  snnf )z
Add a param group to the :class:`_NamedOptimizer` s `param_groups`.

Warning: This API is still in development and subject to change.
r4   r   z%some parameters are not in the moduleN)r5   r   r   r6   r    r   r%   r&   r"   r'   r   add_param_groupr   )r(   r9   r   r,   r-   r.   s         r0   rv   _NamedOptimizer.add_param_group  s     +t,,J.JJ,X&fell++%+HK!$(LK!595J5J5P5P5RS5Rzs
5RS *E( !HII##**<+>? +
 	''4 OO88 Ts   3C4c                     U R                   R                  5        HP  nUR                  (       d  M  [        R                  " U5      n[        R
                  R                  U5      Ul        MR     U R                  SS9  g)z
Run a dummy optimizer step, which allows to initialize optimizer state because we do lazy init for most optimizers.

This allows doing in-place loading of optimizer state from a checkpoint.
NrS   )	r   r   requires_gradr   
zeros_likeautogradVariablegradrO   )r(   r-   ts      r0   
init_state_NamedOptimizer.init_state(  s_     **113E"""$$U+"^^44Q7
 4
 			$	r;   c                     [        U R                  [        5      (       a+  [        R                  " U R                  U R                  USS9$ U$ )NT)is_named_optimizer)r5   r   FSDPoptim_state_dict_to_loadr   r(   r>   s     r0   rY   $_NamedOptimizer._pre_load_state_dict5  sA     dkk4((00T__jT  r;   c                     [        U R                  [        5      (       a,  [        R                  " U R                  U R                  U5        U$ rL   )r5   r   r   optim_state_dictr   r   s     r0   r@    _NamedOptimizer._post_state_dict>  s5     dkk4((!!$++t
Kr;   )r   r   r   r"   r   )NN).)rJ   Nr   NrL   )r   N)"__name__
__module____qualname____firstlineno____doc__r   strr
   r   r6   r   r   	Optimizerr   r   r   nnModuler1   r   r   r>   r	   rO   r   floatpropertyr=   rb   rv   r   rY   r@   __static_attributes__rM   r;   r0   r   r      s   )^ AE&*(9!#uU\\=-H'I"IJ(9 (9 z'#s(*;<=	(9
 #(9 
(9T/"WDcN W4 4 4>HRY/>E> >5HXb%i%89 5Xe_ 5 %wu||S01 % %f8'#s(*; f8 f8P9738+< 9 90 $sCx. d38n r;   r   rE   r   c                 6    SR                  [        U 5      5      $ )zGConcatenate all param keys as a unique indentifier for one param group.rW   )joinr?   )rE   s    r0   ra   ra   F  s    88F:&''r;   ) loggingr#   collections.abcr   r   copyr   typingr   r   r   r	   r
   r   torch.nnr   r   'torch.distributed._shard.sharded_tensorr   torch.distributed.fsdpr   r   r   r    r   __annotations__	getLoggerr   loggerr   r   ra   rM   r;   r0   <module>r      sp      /  ; ;    A C c 			8	$oeoo od	(T#Y (3 (r;   