
    JThSf              &       H   S r SSKJrJrJr  SSKrSSKJr  SSKJrJ	r	J
r
JrJrJrJrJrJrJrJrJrJrJrJr  SS/r " S	 S\5      rS
S\ S\ S\ S\ S\
 S3-   \l         S\\   S\\   S\\   S\\   S\\   S\\   S\S\S\S\S\S\S\S\S\S\S\4"S  jrS\\   S\\   S\\   S\\   S\\   S\\   S\S\S\S\S\S\S\S\S\S\S\4"S! jr\" \S"9      S%S\\   S\\   S\\   S\\   S\\   S\\   S\S#\\   S\S\S\S\S\S\S\S\S\S\4$S$ jj5       rg)&z'Implementation for the NAdam algorithm.    )castOptionalUnionN)Tensor   )_capturable_doc_default_to_fused_or_foreach_differentiable_doc_disable_dynamo_if_unsupported_foreach_doc!_get_capturable_supported_devices_get_scalar_dtype
_get_value_maximize_doc_params_doc_stack_if_compiling_use_grad_for_differentiable_view_as_real	OptimizerParamsTNAdamnadamc                      ^  \ rS rSr      SSSSSS.S\S\\\4   S\\\4   S\S	\S
\S\	S\
\	   S\	S\	S\	4U 4S jjjjrU 4S jrS r\SS j5       rSrU =r$ )r      FN)foreachmaximize
capturabledifferentiableparamslrbetasepsweight_decaymomentum_decaydecoupled_weight_decayr   r   r   r   c                  > [        U[        5      (       a  UR                  5       S:w  a  [        S5      eSU::  d  [        SU 35      eSU::  d  [        SU 35      eSUS   s=::  a  S:  d  O  [        SUS    35      eSUS   s=::  a  S:  d  O  [        S	US    35      eSU::  d  [        S
U 35      eSU::  d  [        SU 35      e[	        UUUUUUU	UU
US9
n[
        TU ]  X5        g )Nr   zTensor lr must be 1-element        zInvalid learning rate: zInvalid epsilon value: r         ?z#Invalid beta parameter at index 0: z#Invalid beta parameter at index 1: zInvalid weight_decay value: zInvalid momentum_decay value: )
r    r!   r"   r#   r$   r%   r   r   r   r   )
isinstancer   numel
ValueErrordictsuper__init__)selfr   r    r!   r"   r#   r$   r%   r   r   r   r   defaults	__class__s                I/var/www/auris/envauris/lib/python3.13/site-packages/torch/optim/nadam.pyr.   NAdam.__init__   s    b&!!bhhjAo:;;by6rd;<<cz6se<==eAh$$B58*MNNeAh$$B58*MNNl";L>JKKn$=n=MNOO%)#9!)
 	*    c                 b  > [         TU ]  U5        U R                   GH  nUR                  SS5        UR                  SS 5        UR                  SS5        UR                  SS5        UR                  SS5        US    GH&  nU R                  R                  U/ 5      n[        U5      S:w  d  M1  [        R                  " US	   5      (       d`  [        US	   5      nUS   (       a(  [        R                  " U[        5       UR                  S
9O[        R                  " U[        5       S9US	'   [        R                  " US   5      (       a  M  US   nUS   (       a(  [        R                  " U[        5       UR                  S
9O[        R                  " U[        5       S9US'   GM)     GM     g )Nr   Fr   r   r   r%   r   r   stepdtypedevicer8   
mu_product)r-   __setstate__param_groups
setdefaultstategetlentorch	is_tensorfloattensorr   r9   )r/   r?   grouppp_statestep_valmu_prod_valr1   s          r2   r<   NAdam.__setstate__J   sj   U#&&EZ/Y-\51-u55u=8_**..B/w<1$ ??76?;;#(#9
  %\2 "LL (0A0CAHH "'h>O>Q!R   !??7<+@AA&-l&;
  %\2 "LL +3D3Fqxx "'kARAT!U  - % 'r4   c                    SnUS    GH  n	U	R                   c  M  U[        R                  " U	5      -  nUR                  U	5        U	R                   R                  (       a  [        S5      eUR                  U	R                   5        U R                  U	   n
[        U
5      S:X  a  US   (       a(  [        R                  " S[        5       U	R                  S9O[        R                  " S[        5       S	9U
S
'   US   (       a(  [        R                  " S[        5       U	R                  S9O[        R                  " S[        5       S	9U
S'   [        R                  " U	[        R                  S9U
S'   [        R                  " U	[        R                  S9U
S'   UR                  U
S   5        UR                  U
S   5        UR                  U
S   5        UR                  U
S
   5        GM     U$ )NFr   z'NAdam does not support sparse gradientsr   r    r7   r'   r:   r6   r(   r;   )memory_formatexp_avg
exp_avg_sq)gradrB   
is_complexappend	is_sparseRuntimeErrorr?   rA   zerosr   r9   rE   ones
zeros_likepreserve_format)r/   rF   params_with_gradgradsexp_avgsexp_avg_sqsmu_productsstate_stepshas_complexrG   r?   s              r2   _init_groupNAdam._init_grouph   s    xAvv!u//22 ''*66##&'PQQQVV$

1u:? !. B.?.A!((S"\\#5F5HI &M !. 

2->-@R"\\#5F5HI ,' (-'7'7)>)>(E)$ +0*:*:)>)>+E,' i 01""5#67""5#67""5=1I !J r4   c                    U R                  5         SnUb%  [        R                  " 5          U" 5       nSSS5        U R                   H}  n/ n/ n/ n/ n/ n/ n	[	        [
        [        [        4   US   5      u  pU R                  UUUUUUU	5      n[        UUUUUU	U
UUS   US   US   US   US   US   US	   US
   US   US9  M     U$ ! , (       d  f       N= f)zPerform a single optimization step.

Args:
    closure (Callable, optional): A closure that reevaluates the model
        and returns the loss.
Nr!   r    r#   r$   r"   r   r%   r   r   r   )beta1beta2r    r#   r$   r"   r   r%   r   r   r   r`   )	 _cuda_graph_capture_health_checkrB   enable_gradr=   r   tuplerD   ra   r   )r/   closurelossrF   rZ   r[   r\   r]   r^   r_   rd   re   r`   s                r2   r6   
NAdam.step   s$    	--/""$y % &&E-/"$E%'H(*K(*K(*KeUl 3U7^DLE** K  ;">2$%56%Lz*',-E'Fi( .$%56'%' 'P W %$s   C


CrM   )gMb`?)g?g+?g:0yE>r   gMbp?FN)__name__
__module____qualname____firstlineno__r   r   rD   r   rh   boolr   r.   r<   ra   r   r6   __static_attributes____classcell__)r1   s   @r2   r   r      s     $(%1 $',)+ #' $)+)+ %- )+ UE\"	)+
 )+ )+ )+ !%)+ $)+ )+ )+ )+ )+V<0d "6 "6r4   a  Implements NAdam algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \gamma_t \text{ (lr)}, \: \beta_1,\beta_2 \text{ (betas)},
                \: \theta_0 \text{ (params)}, \: f(\theta) \text{ (objective)}                   \\
            &\hspace{13mm} \: \lambda \text{ (weight decay)}, \:\psi \text{ (momentum decay)}    \\
            &\hspace{13mm} \: \textit{decoupled\_weight\_decay}, \:\textit{maximize}             \\
            &\textbf{initialize} :  m_0 \leftarrow 0 \text{ ( first moment)},
                v_0 \leftarrow 0 \text{ ( second moment)}                                 \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}\textbf{if} \: \textit{maximize}:                                       \\
            &\hspace{10mm}g_t           \leftarrow   -\nabla_{\theta} f_t (\theta_{t-1})         \\
            &\hspace{5mm}\textbf{else}                                                           \\
            &\hspace{10mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})          \\
            &\hspace{5mm} \theta_t \leftarrow \theta_{t-1}                                       \\
            &\hspace{5mm} \textbf{if} \: \lambda \neq 0                                          \\
            &\hspace{10mm}\textbf{if} \: \textit{decoupled\_weight\_decay}                       \\
            &\hspace{15mm} \theta_t \leftarrow \theta_{t-1} - \gamma \lambda \theta_{t-1}                    \\
            &\hspace{10mm}\textbf{else}                                                          \\
            &\hspace{15mm} g_t \leftarrow g_t + \lambda \theta_{t-1}                             \\
            &\hspace{5mm} \mu_t \leftarrow \beta_1 \big(1 - \frac{1}{2}  0.96^{t \psi} \big)     \\
            &\hspace{5mm} \mu_{t+1} \leftarrow \beta_1 \big(1 - \frac{1}{2} 0.96^{(t+1)\psi}\big)\\
            &\hspace{5mm}m_t           \leftarrow   \beta_1 m_{t-1} + (1 - \beta_1) g_t          \\
            &\hspace{5mm}v_t           \leftarrow   \beta_2 v_{t-1} + (1-\beta_2) g^2_t          \\
            &\hspace{5mm}\widehat{m_t} \leftarrow \mu_{t+1} m_t/(1-\prod_{i=1}^{t+1}\mu_i)\\[-1.ex]
            & \hspace{11mm} + (1-\mu_t) g_t /(1-\prod_{i=1}^{t} \mu_{i})                         \\
            &\hspace{5mm}\widehat{v_t} \leftarrow   v_t/\big(1-\beta_2^t \big)                   \\
            &\hspace{5mm}\theta_t \leftarrow \theta_t - \gamma \widehat{m_t}/
                \big(\sqrt{\widehat{v_t}} + \epsilon \big)                                       \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to `Incorporating Nesterov Momentum into Adam`_.
    z
    Args:
        a  
        lr (float, Tensor, optional): learning rate (default: 2e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square (default: (0.9, 0.999))
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        momentum_decay (float, optional): momentum momentum_decay (default: 4e-3)
        decoupled_weight_decay (bool, optional): whether to decouple the weight
            decay as in AdamW to obtain NAdamW. If True, the algorithm does not
            accumulate weight decay in the momentum nor variance. (default: False)
        z	
        z

    .. _Incorporating Nesterov Momentum into Adam:
        https://openreview.net/forum?id=OM0jvwB8jIp57ZJjtNEZ
    .. _Decoupled Weight Decay Regularization:
        https://arxiv.org/abs/1711.05101

    r   r[   r\   r]   r^   r_   rd   re   r    r#   r$   r"   r%   r   r   r   r`   c          
         [        U 5       GH  u  nnU(       d  UU   OUU   * nUU   nUU   nUU   nUU   n[        R                  " U5      (       aX  [        R                  " U5      n[        R                  " U5      n[        R                  " U5      n[        R                  " U5      n[        R                  R                  5       (       d  U(       a{  [        5       nUR                  R                  UR                  R                  s=:X  a  UR                  R                  :X  a  O  OUR                  R                  U;   d   SU S35       eUS-  nU(       a  UnO[        U5      nSUU-  -
  nU	S:w  a.  U(       a  UR                  SX-  -
  5        OUR                  UU	S9nUSSSUU
-  -  -  -
  -  nUSSSUS-   U
-  -  -  -
  -  nUU-  nUR                  USU-
  5        UR                  U5      R                  UUSU-
  S	9  UR                  U5      R                  5       nU(       d  U(       a^  UR                  U5      nUU-  nUU* SU-
  -  SU-
  -  -  nUU* U-  SU-
  -  -  nUR!                  UU5        UR!                  UU5        GM  [        U5      U-  nUR#                  U5        UR!                  UUU* SU-
  -  S[        U5      -
  -  S	9  UR!                  UUU* U-  SU-
  -  S	9  GM     g )
NzVIf capturable=True, params, mu_products and state_steps must be on supported devices: .r   r   alphar(         ?Q?)value)	enumeraterB   rR   view_as_realcompileris_compilingr   r9   typer   mul_addlerp_addcmul_divsqrtaddcdiv_add_)r   r[   r\   r]   r^   r_   rd   re   r    r#   r$   r"   r%   r   r   r   r`   iparamrQ   rO   rP   r;   step_tcapturable_supported_devicesr6   bias_correction2mumu_nextdenommu_product_nexts                                  r2   _single_tensor_nadamr     s   ( f%5'uQxeAhY1+ ^
 ^
QE""&&u-E%%d+D((1G++J7J ~~**,,+L+N(!!Z%6%6%;%;Qv}}?Q?QQLL%%)EE))E(FaI	F 	!Df%Dud{?1%

1r001xx\x: cC4D>,A#BCCD3$(n1L(M!NNO 	b
 	dAI&''d!e)'D/0557ZIIcNE )72OB3#(+sZ/?@AD"w#2G!HIGNN4'NN7E*(4w>OJJsONNeRC38$4j>T8T$U   NNsW}9N&O  K &r4   c                  ^, [        U 5      S:X  a  g U(       a   S5       e[        R                  R                  5       (       d?  U(       a8  [	        SS9m,[        U,4S j[        XU5       5       5      (       d   ST, S35       e[        R                  " XX#XE/5      nUR                  5        GH  u  u  nnnnnnn[        [        [           U5      n[        [        [           U5      n[        [        [           U5      n[        [        [           U5      n[        [        [           U5      n[        [        [           U5      nU(       a  [        UUUU5        U(       a  [        R                  " U5      n[        R                  R                  5       (       d>  US   R                  (       a*  [        R                   " U[        R"                  " SS	S
9SS9  O[        R                   " US5        U	S:w  aX  U(       a  [        R$                  " USX-  -
  5        O4U(       a  [        R                   " UUU	S9  O[        R&                  " UUU	S9n[        R(                  " UUSU-
  5        [        R$                  " UU5        [        R*                  " UUUSU-
  5        [        R,                  " U5      nU(       GaB  [        R.                  " UU
5      n [        R0                  " SU 5      n![        R$                  " U!S5        [        R                   " U!S5        [        R$                  " U!U5        [        R                   " U U
5        [        R0                  " SU 5      n"[        R$                  " U"S5        [        R                   " U"S5        [        R$                  " U"U5        A [        R0                  " UU5      n#[        R2                  " U#S5        [        R4                  " U#5        [        R6                  " U#5        O{U V$s/ s H  n$SU[9        U$5      -  -
  S-  PM     n#n$U V$s/ s H  n$USSS[9        U$5      U
-  -  -  -
  -  PM     n!n$U V$s/ s H   n$USSS[9        U$5      S-   U
-  -  -  -
  -  PM"     n"n$[        R$                  " UU!5        [        R:                  " UU#5        [        R                   " UU5        A#U(       Ga  [        R2                  " U!S5        [        R$                  " U!U5        [        R<                  " US5      n%[        R4                  " U%5        [        R:                  " U!U%5        U!n&A%[        R.                  " UU"5      n%[        R$                  " U"U5        [        R2                  " U%S5        [        R:                  " U"U%5        U"n'A%[        R.                  " U&U5      n([        R*                  " U(U'U5        [        R>                  " UU(U5        GM5  [A        [        UU!5       V)V*s/ s H)  u  n)n*[9        U5      SU*-
  -  S[9        U)5      -
  -  S-  PM+     sn*n)5      n&[A        [        UU"5       V)V+s/ s H)  u  n)n+[9        U5      U+-  S[9        U)5      U+-  -
  -  S-  PM+     sn+n)5      n'[        R>                  " UUUU&5        [        R>                  " UUUU'5        GM     g s  sn$f s  sn$f s  sn$f s  sn*n)f s  sn+n)f )Nr   z#_foreach ops don't support autogradF)supports_xlac              3     >#    U  Hy  u  pnUR                   R                  UR                   R                  s=:H  =(       a    UR                   R                  :H  Os  =(       a    UR                   R                  T;   v   M{     g 7frl   )r9   r   ).0rG   mpr6   r   s       r2   	<genexpr>&_multi_tensor_nadam.<locals>.<genexpr>  sa      
  Et HHMMRYY^^??t{{/?/?? >!==>Ds   BBzWIf capturable=True, params, mu_products, and state_steps must be on supported devices: ru   r(   cpu)r9   rv   r   ry   g      rx   )!rA   rB   r}   r~   r   allzipr   "_group_tensors_by_device_and_dtypevaluesr   listr   r   _foreach_negis_cpu_foreach_add_rE   _foreach_mul__foreach_add_foreach_lerp__foreach_addcmul__foreach_sqrt_foreach_mul_foreach_pow_foreach_sub__foreach_neg__foreach_sqrt_r   _foreach_div__foreach_sub_foreach_addcdiv_r   )-r   r[   r\   r]   r^   r_   rd   re   r    r#   r$   r"   r%   r   r   r   r`   grouped_tensorsgrouped_params_grouped_grads_grouped_exp_avgs_grouped_exp_avg_sqs_grouped_mu_products_grouped_state_steps__grouped_paramsgrouped_gradsgrouped_exp_avgsgrouped_exp_avg_sqsgrouped_mu_productsgrouped_state_stepsexp_avg_sq_sqrtexponentmusmu_nextsbias_correction_sqrtr6   r   step_size_gradsstep_size_expavg	numeratorr;   r   r   r   s-                                               @r2   _multi_tensor_nadamr   u  sm   ( 6{aDDD >>&&((Z'H(
$  
  #6D
 
 
 	E e  fB  eC  CD  E		E 
  BB	{HO ""$		 	d6lO<T&\>:V.?@"4<1EF"4<1EF"4<1EF /?AT !..}=M ~~**,,1DQ1G1N1N#U\\#e%DC  3Q71%##NA8I4IJ ''%~\ %*$6$6%~\%M
 	-}a%iH/7q5y	
  --.AB
 ))*=~NH$$T84CT*S)U+ .9))$9H$/#.%0 #(#5#5e=P#Q  4c: 45  !56 DW$CV4Uj...36CV ! $
 0/D sdz$/?./P&QRRS/   0 0D *T*:Q*>.)P QRRT/   	/5O-ABOS1 !S)R(&&':C@E&U+!O &&':HEE"- s+%0' **?MJI##I/?AQR ##NIO1 +..A3*G*G
B  ^sRx0C*Z:P4PQUWW*GO  3 033F/Q 0R+
G #2!"J!7'!AAC  0R
  ## ## 0/CSy %`$bs   =Z0!$Z5'Z:40Z? 0[)single_tensor_fnr   c                   [        S U 5       5      (       d  [        S5      e[        S U 5       5      (       d  [        S5      eUc  [        X	SS9u  nnU(       a.  [        R                  R                  5       (       a  [        S5      eU(       a*  [        R                  R                  5       (       d  [        nO[        nU" U UUUUUUUUUUUUUUU	U
S	9  g)
zhFunctional API that performs NAdam algorithm computation.

See :class:`~torch.optim.NAdam` for details.
c              3   V   #    U  H  n[        U[        R                  5      v   M!     g 7frl   r)   rB   r   r   ts     r2   r   nadam.<locals>.<genexpr>`       @Kqz!U\\**K   ')zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsc              3   V   #    U  H  n[        U[        R                  5      v   M!     g 7frl   r   r   s     r2   r   r   e  r   r   zPAPI has changed, `mu_products` argument must contain a list of singleton tensorsNF)	use_fusedz6torch.jit.script not supported with foreach optimizers)rd   re   r    r#   r$   r   r%   r"   r   r   r`   )r   rU   r	   rB   jitis_scriptingr   r   )r   r[   r\   r]   r^   r_   r%   r   r   r   r`   r   rd   re   r    r#   r$   r"   r   funcs                       r2   r   r   D  s    8 @K@@@^
 	
 @K@@@^
 	
 1e

7 599))++STTuyy--//"#!%5%#r4   )FNFFFF)__doc__typingr   r   r   rB   r   	optimizerr   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   __all__r   r   rD   rq   r   r   r   rM   r4   r2   <module>r      sW   . ( (      & G
sI sn&N		 	 
 		 		 		 !O> F[L[<[ 6l[ f	[
 f[ f[ [ [ 	[ [ [ 
[ ![ [  ![" #[$ %[|LLL<L 6lL f	L
 fL fL L L 	L L L 
L !L L  !L" #L$ %L^  1EF $)" DLD<D 6lD f	D
 fD fD !D d^D D D D D  !D" #D$ 	%D& 'D( )D* 
+D GDr4   