
    [ThWX                        S SK r S SKrS SKJrJr  S SKJrJrJr  S SK	J
r
JrJrJrJr  S SKJr  / SQrS\R$                  S\\R$                  /\\R$                     4   4S jrS\R$                  S	\\R$                  \\\
4   4   S
\S\\R$                  /\\R$                     4   4S jrS\R$                  S\\\R$                        S\\R$                     S\\\
4   S	\\R$                  \\\
4   4   4
S jrS\4S jr " S S\5      rS\R$                  S\S\S\\\R$                        S\4
S jr " S S\5      r " S S\5      r S\R$                  S\S\S\S\4
S jr!S\R$                  S\S\S\\\R$                        S\4
S jr"S\R$                  S\\   S\4S  jr#S\R$                  S\S\S\4S! jr$\" S"5      SS4S\R$                  S\S\S#\S$\\\\R$                           S%\\\\R$                           S\4S& jjr%\RL                  \RN                  1\%l(        \RR                  1\%l*        \ RV                  S'\
S(\
S\S)   4S* j5       r,S\R$                  S+\
S\R$                  4S, jr-S\R$                  S'\S\R$                  4S- jr. S5S\R$                  S.\S'\S\\R$                     S/\\R^                     S0\S1\
S\0\R$                  \4   4S2 jjr1 " S3 S45      r2g)6    N)ABCabstractmethod)	GeneratorIterableSequence)AnyCallablecastOptionalUnion)always_wrap_policylambda_auto_wrap_policytransformer_auto_wrap_policysize_based_auto_wrap_policyenable_wrapwrapCustomPolicyModuleWrapPolicyroot_modulefnc                    ^^^ U 1mS[         R                  S[        S[        [         R                     4UUU4S jjmT" U SS5        g)a9  
This applies ``fn`` to every module in the module tree of ``root_module``
following a post-order traversal. If ``fn`` returns an :class:`nn.Module`,
then this replaces the original module with the newly returned one in the
tree. Otherwise, ``fn`` should return ``None``, in which case the module is
not changed.
modulemodule_nameparent_modulec                 x  > U R                  5        H'  u  p4UT;  d  M  TR                  U5        T" XCU 5        M)     T" U 5      nUbs  [        U[        R                  5      (       d   SU SU  35       eU(       d
   SU  35       e[        U[        R                  5      (       d
   SU 35       e[        X!U5        g g )Nz=Non-root modules should have their parent module set but got z for zTNon-root modules should have their module name set but got an empty module name for z.fn should return None or an nn.Module but got )named_childrenadd
isinstancennModulesetattr)	r   r   r   child_module_namechild_moduleoptional_module_post_order_apply_innerr   visited_moduless	         S/var/www/auris/envauris/lib/python3.13/site-packages/torch/distributed/fsdp/wrap.pyr%   2_post_order_apply.<locals>._post_order_apply_inner-   s    
 06/D/D/F+?2##L1'P 0G V*&mRYY77 O /vh07  ,,285; oryy99 @@QR9 M@ '     N)r   r    strr   )r   r   r%   r&   s    `@@r'   _post_order_applyr,      sP     (3mOA		AA  		*A A0 KT2r)   target_module_to_kwargsfsdp_fnreturnc                 n   ^ ^^ S[         R                  S[        [         R                     4UU U4S jjnU$ )z
This constructs the "wrap" function to pass to :func:`_post_order_apply`
based on ``target_module_to_kwargs``, which should be constructed from the
wrapping policy.
r   r/   c                 8   > U T;   a  U TLa  TU    nT" U 40 UD6$ g N )r   kwargsr.   r   r-   s     r'   r   _construct_wrap_fn.<locals>.fnS   s2     ,,{1J,V4F6,V,,r)   )r   r    r   )r   r-   r.   r   s   ``` r'   _construct_wrap_fnr6   H   s/    299 "))!4   Ir)   module_classesignored_modulesroot_kwargsc                     [        [        U5      5      nU R                  5        H,  nXb;   a  M
  [        Xe5      (       d  M  Xd;  a  X4U'   S XF   S'   M.     U$ )Nmixed_precision)tuplesetmodulesr   )r   r7   r8   r9   r-   module_classes_tupler   s          r'   $_run_mixed_precision_override_policyr@   ^   s`     !^!45%%'$5542=/AE#+,=> ( #"r)   c                      g)z
A simple recursive wrap policy that always returns ``True``. This means
that every submodule is wrapped by the wrapper class in
:func:`_recursive_wrap`.
Tr3   )argsr4   s     r'   r   r   r   s     r)   c                       \ rS rSrSr\S\R                  S\\R                     S\	\
\4   S\	\R                  \	\
\4   4   4S j5       rSrg	)
_Policy{   z_
This defines an abstract base class that represents a policy for applying
a module-level API.
r   r8   r9   r/   c                     g)zp
This should return a dict ``target_module_to_kwargs`` that maps from
each target module to wrap to its kwargs.
Nr3   )selfr   r8   r9   s       r'   _run_policy_Policy._run_policy   s     	r)   r3   N)__name__
__module____qualname____firstlineno____doc__r   r   r    r=   dictr+   r   rH   __static_attributes__r3   r)   r'   rD   rD   {   sk    
 
YY
 RYY
 #s(^	

 
biic3h'	(
 
r)   rD   r   recursenonwrapped_numelc                 <    U(       a  g[        U [        U5      5      $ )a  
This auto wrap policy wraps every module that is an instance of any type in
``module_classes`` as its own FSDP instance. The root module given by
``module`` is always wrapped as an FSDP instance regardless. Since the
wrapping proceeds bottom up, each FSDP instance manages the parameters in
its subtree excluding any already managed by a child FSDP instance.

Args:
    module (nn.Module): Current module being considered.
    recurse (bool): If ``False``, then this function must decide whether
        ``module`` should be wrapped as an FSDP instance or not. If
        ``True``, then the function is still recursing down the module
        tree as a part of the DFS.
    nonwrapped_numel (int): Parameter numel not yet wrapped.
    module_classes (Set[Type[nn.Module]]): Set of module classes that are
        wrapped as FSDP instances.

Returns:
    ``True`` if ``recurse=True``, and whether ``module`` should be wrapped
    if ``recurse=False``.
Tr   r<   )r   rQ   rR   r7   s       r'   _module_wrap_policyrU      s    6 feN344r)   c                      ^  \ rS rSrSrS\\\R                        4S jr	S\R                  S\
\R                     S\\\4   S\\R                  \\\4   4   4S	 jrS
 rS\4U 4S jjrSrU =r$ )r      zo
This policy applies to every module of the specified module classes,
passing in the kwargs given to the root.
r7   c                 F    [        U5      nX l        [        U5      U l        g r2   )r=   _module_classesr+   _module_classes_str)rG   r7   module_classes_sets      r'   __init__ModuleWrapPolicy.__init__   s!     01#&'9#: r)   r   r8   r9   r/   c                     [        U R                  5      n0 nUR                  5        H4  nXb;   a  M
  [        Xd5      (       d  M  [        R                  " U5      XV'   M6     U$ r2   )r<   rY   r>   r   copy)rG   r   r8   r9   r7   r-   r   s          r'   rH   ModuleWrapPolicy._run_policy   s[     t334CE!))+F(F3326))K2H'/ , '&r)   c                 ,    [        XSU R                  S9$ )N)rR   r7   )rU   rY   )rG   r   rQ   rB   r4   s        r'   __call__ModuleWrapPolicy.__call__   s    "bAUAU
 	
r)   c                 B   > [         TU ]  5       SU R                   S3-   $ )N())super__repr__rZ   )rG   	__class__s    r'   ri   ModuleWrapPolicy.__repr__   s&    w!a(@(@'A$CCCr)   )rY   rZ   )rJ   rK   rL   rM   rN   r   typer   r    r\   r=   rO   r+   r   rH   rc   ri   rP   __classcell__)rj   s   @r'   r   r      s    
;xRYY'@ ;
'YY' RYY' #s(^	'
 
biic3h'	(' 
D# D Dr)   r   c                       \ rS rSrSrS\\R                  /\\	\
\\4   4   4   4S jrS\R                  S\\R                     S\
\\4   S\
\R                  \
\\4   4   4S	 jrS
rg)r      a  
This policy takes in a lambda function that maps a given ``nn.Module`` to
either ``False``, ``True``, or a kwarg dictionary.
- If the function returns ``False`` or an empty dictionary, then the module
  does not have the API applied.
- If the function returns ``True``, then the module has the API applied
  with the root's kwargs.
- If the function returns a non-empty dictionary, then the module has the
  API applied, and the dictionary overrides the root's kwargs.

Example::

    >>> # xdoctest: +SKIP("undefined variables")
    >>> model = init_transformer_model(...)
    >>> def lambda_fn(module: nn.Module):
    >>>     if module is model.lm_head:
    >>>         return {"sharding_strategy": ShardingStrategy.SHARD_GRAD_OP}
    >>>     elif isinstance(module, TransformerBlock):
    >>>         return True
    >>>     return False
    >>> policy = CustomPolicy(lambda_fn)
    >>> fsdp_model = FSDP(model, auto_wrap_policy=policy)
	lambda_fnc                     Xl         g r2   
_lambda_fn)rG   rp   s     r'   r\   CustomPolicy.__init__   s    #r)   r   r8   r9   r/   c                 L   0 nUR                  5        H  nXR;   a  M
  U R                  U5      n[        U[        [        45      (       d  [        SU 35      eU(       d  MM  [        R                  " U5      n[        U[        5      (       a  UR                  U5        XtU'   M     U$ )Nz_The lambda_fn passed to CustomPolicy should return False/True or a kwarg dict, but it returned )r>   rs   r   rO   bool
ValueErrorr_   update)rG   r   r8   r9   r-   r   resr4   s           r'   rH   CustomPolicy._run_policy   s     DF!))+F(//&)CcD$<00 CCF%I  YY{+F#t$$ c".4F+! ," '&r)   rr   N)rJ   rK   rL   rM   rN   r	   r   r    r   rv   rO   r+   r   r\   r=   rH   rP   r3   r)   r'   r   r      s    0$(BII;dDcN>R8S+S"T $'YY' RYY' #s(^	'
 
biic3h'	('r)   r   rp   c                 "    U(       a  gU" U 5      $ )a  
A convenient auto wrap policy to wrap submodules based on an arbitrary user
function. If `lambda_fn(submodule) == True``, the submodule will be wrapped as
a `wrapper_cls` unit.

Return if a module should be wrapped during auto wrapping.

The first three parameters are required by :func:`_recursive_wrap`.

Args:
    module (nn.Module): Current module being considered.
    recurse (bool): If ``False``, then this function must decide whether
        ``module`` should be wrapped as an FSDP instance or not. If
        ``True``, then the function is still recursing down the module
        tree as a part of the DFS.
    nonwrapped_numel (int): Parameter numel not yet wrapped.

    lambda_fn (Callable[[nn.Module], bool]): If this returns ``True``, then
        this module will be wrapped.
Tr3   )r   rQ   rR   rp   s       r'   r   r     s    . Vr)   transformer_layer_clsc                     [        XX#5      $ )a  
See :func:`_module_wrap_policy`, where ``transformer_layer_cls`` is the
same as ``module_classes``. Note that shared parameters must be wrapped in
the same FSDP instance, so this auto wrap policy can help wrap shared
embeddings into the same FSDP instance for transformer models.
)rU   )r   rQ   rR   r|   s       r'   r   r   '  s     v0@XXr)   c                 <    U(       a  g[        U [        U5      5      $ )NTrT   )r   r7   rQ   rB   r4   s        r'   _wrap_module_cls_individuallyr   6  s      &%"788r)   c                 6   ^ ^^ [        U UU4S jU 5       5      $ )zj
A policy that wraps ``module`` if any policy in the passed in iterable of
``policies`` returns ``True``.
c              3   4   >#    U  H  nU" TTTS 9v   M     g7f)r   rQ   rR   Nr3   ).0policyr   rR   rQ   s     r'   	<genexpr>_or_policy.<locals>.<genexpr>L  s"      F 	fg@PQs   )any)r   rQ   rR   policiess   ``` r'   
_or_policyr   B  s        r)   g    חAmin_num_paramsforce_leaf_modulesexclude_wrap_modulesc                     Uc  [         R                  OUnUc  [         R                  OUnUnX&:  nU(       a#  U=(       a    [        U [	        U5      5      (       + $ U=(       a    [        U [	        U5      5      (       + $ )a{  
A size-based auto wrap policy.

Args:
    module (nn.Module): Current module being considered.
    recurse (bool): If ``False``, then this function must decide whether
        ``module`` should be wrapped as an FSDP instance or not. If
        ``True``, then the function is still recursing down the module
        tree as a part of the DFS.
    nonwrapped_numel (int): Parameter numel not yet wrapped.

    min_num_params (int): Customizable policy input that controls the size
        threshold over which a module is ready to be wrapped. This is in
        units of numel.
    force_leaf_modules (Optional[set[type[nn.Module]]]): Set of module types to keep
        as leaves, i.e. their children will never be wrapped.
    exclude_wrap_modules (Optional[set[type[nn.Module]]]): Set of module types to be
        excluded in wrapping.

Returns:
    Whether ``module`` should be wrapped.
)r   FORCE_LEAF_MODULESEXCLUDE_WRAP_MODULESr   r<   )r   rQ   rR   r   r   r   min_nonwrapped_numelis_larges           r'   r   r   R  s    B % 	$66   ' 	$88!  *7HM
659K3L MMM O
659M3N OOOr)   wrapper_clswrapper_kwargs)NNNc              +   j   #    SU 0UEn[        S0 UD6   Sv   SSS5        g! , (       d  f       g= f7f)a  
Context manager to wrap modules using a wrapper.

Useful for when you'd like to apply the same configuration arguments to all
child modules that you wrap. A particularly important use case is wrapping
large layers so that they get sharded (in-place) during initialization, to
avoid running out of system memory. Large layers can indicate that they
should be sharded via the ``wrap`` annotation and this context manager can
provide the exact configuration for these nested instances.

Usage::

    with enable_wrap(wrapper_cls, **params):
        # Wraps layer in FSDP by default if within context
        self.l1 = wrap(torch.nn.Linear(5, 5))

Args:
    wrapper_cls:
        Class that `wrap` annotation will `wrap` modules with, such as
        `FullyShardedDataParallel`.
    **wrapper_kwargs:
        Configuration settings that will be passed to all ``wrap``
        instances inside the context
r   Nr3   )_ConfigAutoWrap)r   r   r4   s      r'   r   r     s6     : 	{
F 
	"6	" 
#	"	"s   3"	3
03wrap_overridesc                     [         R                  (       aB  [         R                  c   e0 [         R                  EUEn[	        U [         R                  40 UD6$ U $ )a  
Annotate that a module should be wrapped. Annotated modules will only be
wrapped if inside of an :func:`enable_wrap` context manager. This allows
a module to be initialized both with and without a wrapper without code
change.

The class that this function wraps the passed in ``nn.Module`` with is the
passed in ``wrapper_cls`` argument into ``enable_wrap``. Both
``enable_wrap`` and ``wrap`` can take in kwargs specifying how to construct
the ``wrapper_cls`` instance. In the case of duplicate kwargs in
``enable_wrap`` and ``wrap``, the argument passed into ``wrap`` will be
respected.

Usage::

    with enable_wrap(wrapper_cls=FSDP, **fsdp_config):
        # Wraps layer in FSDP by default if within context
        self.l1 = wrap(torch.nn.Linear(5, 5))

Args:
    module (nn.Module): module to wrap (if in :func:`enable_wrap` context)
    **wrap_overrides: configuration overrides that will take priority over
        the values provided by the :func:`enable_wrap` context
)r   in_autowrap_contextr   r4   _wrap)r   r   s     r'   r   r     s^    2 ****666EO22EnE''
 
 	

 Mr)   c                 r    Uc   e[        U S5      (       a  0 UEU R                  EnU" U 40 UD6$ U" U 40 UD6$ )N_wrap_overrides)hasattrr   )r   r   r4   	overridess       r'   r   r     sS    """v())
 9v8!7!78	6/Y//v(((r)   auto_wrap_policyignored_paramsonly_wrap_childrenr4   c           
         ^ Uc   S5       eUc   S5       eU R                  5        H.  u  pxX;   a  M   [        U[        [        U5      5      (       a   eM0     [        U4S jU R                  5        5       5      n	Uc   eU" U SU	S9(       an  Sn
U R                  5        H/  u  pX;   a  M  [        S	UUUUTS.UD6u  p[        XU5        X-  n
M1     X-
  nU(       d  U" U SUS9(       a  [        X40 UD6U	4$ X
4$ U S4$ ! [         a     M  f = f)
a  
Wraps submodules of ``module`` for which ``auto_wrap_policy`` returns
``True`` with ``wrapper_cls``.

Args:
    module (nn.Module): Module to recursively wrap.
    auto_wrap_policy (Callable): A callable representing a policy that
        determines which modules to recursively wrap with ``wrapper_cls``.
    ignored_modules (set[torch.nn.Module]): Modules to ignore when
        wrapping.
    ignored_params (set[torch.nn.Parameter]): Parameters to ignore when
        wrapping; these should be the parameters contained in the modules
        in ``ignored_modules``.
Returns:
    (nn.Module, int):
        ``module`` after wrapping and the numel recursively wrapped.
zMust specify auto_wrap_policy.zMust specify wrapper_clsc              3   R   >#    U  H  oT;  d  M
  UR                  5       v   M     g 7fr2   )numel)r   pr   s     r'   r   "_recursive_wrap.<locals>.<genexpr>
  s$      .a>2I			.s   	''Tr   r   )r   r   r   r8   r   Fr3   )named_modulesr   r
   rl   	TypeErrorsum
parametersr   _recursive_wrapr!   r   )r   r   r   r8   r   r   r4   _childrR   total_wrapped_numelnamewrapped_childnum_wrapped_params	remainders       `          r'   r   r     sg   4 'I)II'">$>>"((*#	!%dK)@AAAAA	 +  !,,.  '''vtFVW!002KD'0? 1!1' /-1 1-M F-05 3  %:	!&659'
 779III..19K  		s   !C??
DDc                       \ rS rSr% SrSr\\S'   Sr\	\
   \S'   0 r\\\4   \S'   S\\\4   4S jr\S\S	S4S
 j5       r\SS j5       rSS jrS\S\S\S	S4S jrSrg)r   i-  z
Helper class to wrap modules based on default config args via a context manager.
See :func:`enable_wrap` for more information.
Fr   Nr   r4   c                     Xl         g r2   r4   )rG   r4   s     r'   r\   _ConfigAutoWrap.__init__7  s    r)   r/   c                     [         R                  (       a  [        S5      eS[         l        SU R                  5       ;   d   S5       e[	        [
        U S   5      [         l        U S	 U [         l        g )Nz]You are already within an autowrap context and we currently do not supported nested autowrap.Tr   z9Expected to pass in wrapper_cls arg into _ConfigAutoWrap.)r   r   NotImplementedErrorkeysr
   r	   r   r4   r   s    r'   enable_autowrap_context'_ConfigAutoWrap.enable_autowrap_context:  si    ..%o  /3+- 	
G	
- '+8VM5J&K#=!!'r)   c                  F    S[         l        S [         l        0 [         l        g )NF)r   r   r   r4   r3   r)   r'   disable_autowrap_context(_ConfigAutoWrap.disable_autowrap_contextJ  s    .3+&*#!#r)   c                 :    U R                  U R                  5        g r2   )r   r4   )rG   s    r'   	__enter___ConfigAutoWrap.__enter__P  s    $$T[[1r)   exc_typeexc_valexc_tbc                 $    U R                  5         g r2   )r   )rG   r   r   r   s       r'   __exit___ConfigAutoWrap.__exit__S  s    %%'r)   r   )r/   N)rJ   rK   rL   rM   rN   r   rv   __annotations__r   r   r	   r4   rO   r+   r   r\   staticmethodr   r   r   r   rP   r3   r)   r'   r   r   -  s    
 !&%&*K(#*FDcNc3h  ( ( ( ( $ $
2( (s (C (D (r)   r   )F)3
contextlibr_   abcr   r   collections.abcr   r   r   typingr   r	   r
   r   r   torch.nnr   __all__r    r,   rO   r+   r6   rl   r=   r@   rv   r   rD   intrU   r   r   r   r   r   r   r   
ModuleList
ModuleDictr   MultiheadAttentionr   contextmanagerr   r   r   	Parameterr<   r   r   r3   r)   r'   <module>r      s      # 9 9 7 7 	&3&3"))hryy112&3R!"))T#s(^";<  ryyk8BII../	,##T"))_-# ^# c3h	#
 ""))T#s(^";<#(4 c (5II55 5 RYY(	5
 
5@"Dw "DJ4'7 4'nII $8;HP	8YIIYY Y tBII/	Y
 
Y	9II	9'/~	9@D	9II 
 
* c(9=;?3PII3P3P 3P
 3P !T"))_!563P #3tBII#783P 
3Pn 57MM2==3Q  0242G2G1H  .   +.     F" "c "bii "J
)")) 
)( 
) 
)&  %GIIGG G ^	G
 %G G G 299c>GT'( '(r)   