
    JTh@                        S SK JrJrJrJr  S SKrS SKJr  SSKJrJ	r	J
r
JrJrJrJrJrJrJrJrJrJr  SS/r " S S\5      rS	S
\ S\ S\ S\ S\
 S3-   \l        S\\   S\\   S\\   S\\   S\\   S\S\S\S\S\S\S\S\4S jrS\\   S\\   S\\   S\\   S\\   S\S\S\S\S\S\S\S\4S jr\" \S9    S S\\   S\\   S\\   S\\   S\\   S\S\\   S\S\S\S\S\S\S\4S jj5       rg)!    )AnycastOptionalUnionN)Tensor   )_capturable_doc_default_to_fused_or_foreach_differentiable_doc_disable_dynamo_if_unsupported_foreach_doc!_get_capturable_supported_devices_get_scalar_dtype_maximize_doc_params_doc_use_grad_for_differentiable_view_as_real	OptimizerParamsTAdadeltaadadeltac                      ^  \ rS rSr     SSSSS.S\S\\\4   S\S\S\S	\\	   S
\	S\	S\	4U 4S jjjjr
U 4S jrS\\\4   S\\   S\\   S\\   S\\   S\\   4S jr\SS j5       rSrU =r$ )r      F)
capturablemaximizedifferentiableparamslrrhoepsweight_decayforeachr   r   r   c                ^  > [        U[        5      (       a  UR                  5       S:w  a  [        S5      eSU::  d  [        SU 35      eSUs=::  a  S::  d  O  [        SU 35      eSU::  d  [        SU 35      eSU::  d  [        SU 35      e[	        UUUUUUUU	S	9n
[
        TU ]  X5        g )
Nr   zTensor lr must be 1-elementg        zInvalid learning rate:       ?zInvalid rho value: zInvalid epsilon value: zInvalid weight_decay value: )r   r   r    r!   r   r   r"   r   )
isinstancer   numel
ValueErrordictsuper__init__)selfr   r   r   r    r!   r"   r   r   r   defaults	__class__s              L/var/www/auris/envauris/lib/python3.13/site-packages/torch/optim/adadelta.pyr*   Adadelta.__init__   s     b&!!bhhjAo:;;by6rd;<<c S 23%899cz6se<==l";L>JKK%!)	
 	*    c                 P  > [         TU ]  U5        U R                   GH  nUR                  SS 5        UR                  SS5        UR                  SS5        UR                  SS5        US    H  nU R                  R                  U/ 5      n[        U5      S:w  d  M0  [        R                  " US   5      (       a  MP  [        US   5      nUS   (       a(  [        R                  " U[        5       UR                  S	9O[        R                  " U[        5       S
9US'   M     GM     g )Nr"   r   Fr   r   r   r   stepdtypedevicer4   )r)   __setstate__param_groups
setdefaultstategetlentorch	is_tensorfloattensorr   r5   )r+   r:   grouppp_statestep_valr-   s         r.   r7   Adadelta.__setstate__@   s    U#&&EY-Z/-u5\518_**..B/w<1$U__WV_-M-M$WV_5H
 !. $,=,? #\\(:K:MN FO	 % 'r0   rA   params_with_gradgradssquare_avgs
acc_deltasstate_stepsc                 
   SnUS    GHv  nUR                   c  M  U[        R                  " U5      -  nUR                  U5        UR                   R                  (       a  [        S5      eUR                  UR                   5        U R                  U   n	[        U	5      S:X  a  US   (       a(  [        R                  " S[        5       UR                  S9O[        R                  " S[        5       S9U	S	'   [        R                  " U[        R                  S
9U	S'   [        R                  " U[        R                  S
9U	S'   UR                  U	S   5        UR                  U	S   5        UR                  U	S	   5        GMy     U$ )NFr   z*Adadelta does not support sparse gradientsr   r    r3   r6   r2   )memory_format
square_avg	acc_delta)gradr=   
is_complexappend	is_sparseRuntimeErrorr:   r<   zerosr   r5   
zeros_likepreserve_format)
r+   rA   rF   rG   rH   rI   rJ   has_complexrB   r:   s
             r.   _init_groupAdadelta._init_groupS   sN    xAvv~5++A..K##A&vv"#OPPLL JJqME 5zQ \* KK*;*=ahhOR/@/BC f ',&6&6U%:%:'l# &+%5%5U%:%:&k" u\23eK01uV}-9 !< r0   c                 ~   U R                  5         SnUb%  [        R                  " 5          U" 5       nSSS5        U R                   Ha  n/ n/ n/ n/ n/ nUS   US   US   US   US   US   US   US	   4u  n	n
nnnnnnU R	                  X4XVXx5      n[        UUUUUU	U
UUUUUUUS
9  Mc     U$ ! , (       d  f       N= f)zPerform a single optimization step.

Args:
    closure (Callable, optional): A closure that reevaluates the model
        and returns the loss.
Nr   r   r    r!   r"   r   r   r   )	r   r   r    r!   r"   r   r   r   rX   ) _cuda_graph_capture_health_checkr=   enable_gradr8   rY   r   )r+   closurelossrA   rF   rG   rH   rI   rJ   r   r   r    r!   r"   r   r   r   rX   s                     r.   r2   Adadelta.step~   s     	--/""$y % &&E-/"$E(*K')J(*K deen%i j!&'l#		 **ZK  )!-%'= '^ e %$s   B..
B<rL   )r$   g?gư>r   NN)__name__
__module____qualname____firstlineno__r   r   r?   r   r   boolr*   r7   r(   strr   listrY   r   r2   __static_attributes____classcell__)r-   s   @r.   r   r      s	    $'"&"+ !$"+"+ %- "+ 	"+
 "+ "+ $"+ "+ "+ "+ "+H&)CH~) v,) F|	)
 &\) L) &\)V "= "=r0   a  Implements Adadelta algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \gamma \text{ (lr)}, \: \theta_0 \text{ (params)},
                \: f(\theta) \text{ (objective)}, \: \rho \text{ (decay)},
                \: \lambda \text{ (weight decay)}                                                \\
            &\textbf{initialize} :  v_0  \leftarrow 0 \: \text{ (square avg)},
                \: u_0 \leftarrow 0 \: \text{ (accumulate variables)}                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm}if \: \lambda \neq 0                                                    \\
            &\hspace{10mm} g_t \leftarrow g_t + \lambda  \theta_{t-1}                            \\
            &\hspace{5mm} v_t      \leftarrow v_{t-1} \rho + g^2_t (1 - \rho)                    \\
            &\hspace{5mm}\Delta x_t    \leftarrow   \frac{\sqrt{u_{t-1} +
                \epsilon }}{ \sqrt{v_t + \epsilon}  }g_t \hspace{21mm}                           \\
            &\hspace{5mm} u_t  \leftarrow   u_{t-1}  \rho +
                 \Delta x^2_t  (1 - \rho)                                                        \\
            &\hspace{5mm}\theta_t      \leftarrow   \theta_{t-1} - \gamma  \Delta x_t            \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to `ADADELTA: An Adaptive Learning Rate Method`_.
    z
    Args:
        ar  
        lr (float, Tensor, optional): coefficient that scale delta before it is applied
            to the parameters (default: 1.0)
        rho (float, optional): coefficient used for computing a running average
            of squared gradients (default: 0.9). A higher value of `rho` will
            result in a slower average, which can be helpful for preventing
            oscillations in the learning process.
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-6).
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        z	
        zd

    .. _ADADELTA\: An Adaptive Learning Rate Method:
        https://arxiv.org/abs/1212.5701

    r   rG   rH   rI   rJ   r   r   r    r!   r   r   r   rX   c                  ^ [         R                  R                  5       (       d>  U(       a7  [        SS9m[	        U4S j[        X5       5       5      (       d   ST S35       e[        XX#U5       GH  u  pnnnUS-  nU	(       d  UOU* nUS:w  a  UR                  XS9n[         R                  " U5      (       aB  [         R                  " U5      n[         R                  " U5      n[         R                  " U5      nUR                  U5      R                  XSU-
  S	9  UR                  U5      R                  5       nUR                  U5      R                  5       nU
(       a  UR                  5       nUR                  U5      R                  U5        UR                  U5      R                  UUSU-
  S	9  [         R                  " U5      (       a  [         R                  " U5      nUR                  UU* S9  GM     g )
NFsupports_xlac              3      >#    U  HT  u  pUR                   R                  UR                   R                  :H  =(       a    UR                   R                  T;   v   MV     g 7fra   r5   type.0rB   r2   capturable_supported_devicess      r.   	<genexpr>*_single_tensor_adadelta.<locals>.<genexpr>	  N      
 4 HHMMT[[--- >!==>3   AAIIf capturable=True, params and state_steps must be on supported devices: .r   r   alphavalue)r=   compileris_compilingr   allzipaddrQ   view_as_realmul_addcmul_sqrt_clonediv_view_as_complexadd_)r   rG   rH   rI   rJ   r   r   r    r!   r   r   r   rX   paramrP   rN   rO   r2   stddeltars   s                       @r.   _single_tensor_adadeltar      s   " >>&&((Z'H(
$  
 v3
 
 
 	w WWsVttuv		w 
 58{50ZD 		#t$188E86DE""++J7J**95I%%d+D%%dC%@nnS!'')c"((*KKME

3T"s$$UES$AE""))%0E

5
$15r0   c          	        ^ U
(       a   S5       e[         R                  R                  5       (       d>  U(       a7  [        SS9m[	        U4S j[        X5       5       5      (       d   ST S35       e[        U 5      S:X  a  g [        R                  " XX#U/5      nUR                  5        GH  u  u  nnnnnn[        [        [           U5      n[        [        [           U5      n[        [        [           U5      n[        [        [           U5      n[        [        [           U5      nU(       a  [        UUUU5        [         R                  R                  5       (       d>  US   R                  (       a*  [         R                  " U[         R                   " SS	S
9SS9  O[         R                  " US5        U	(       a  [         R"                  " U5      nUS:w  a4  U	(       a  [         R                  " UUUS9  O[         R$                  " UUUS9n[         R&                  " UU5        [         R(                  " UUUSU-
  S9  [         R$                  " UU5      n[         R*                  " U5        [         R$                  " UU5      n[         R*                  " U5        [         R,                  " UU5        [         R&                  " UU5        [         R&                  " UU5        [         R(                  " UUUSU-
  S9  U(       aQ  [/        U[         R                  5      (       a2  [         R&                  " UU* 5        [         R                  " UU5        GM  [         R                  " UUU* S9  GM     g )Nz#_foreach ops don't support autogradFrl   c              3      >#    U  HT  u  pUR                   R                  UR                   R                  :H  =(       a    UR                   R                  T;   v   MV     g 7fra   ro   rq   s      r.   rt   )_multi_tensor_adadelta.<locals>.<genexpr>A  rv   rw   rx   ry   r   r$   cpu)r5   rz   r   r|   )r=   r~   r   r   r   r   r<   r   "_group_tensors_by_device_and_dtypevaluesr   rh   r   r   is_cpu_foreach_add_r@   _foreach_neg_foreach_add_foreach_mul__foreach_addcmul__foreach_sqrt__foreach_div_r%   )r   rG   rH   rI   rJ   r   r   r    r!   r   r   r   rX   grouped_tensorsdevice_params_device_grads_device_square_avgs_device_acc_deltas_device_state_steps__device_paramsdevice_gradsdevice_square_avgsdevice_acc_deltasdevice_state_stepsr   deltasrs   s                              @r.   _multi_tensor_adadeltar   *  s     DDD >>&&((Z'H(
$  
 v3
 
 
 	w WWsVttuv		w 
 6{aBB	=O ""$		 	T&\>:DL-8!$v,0CD f/AB!$v,0CD|-?AR ~~**,,1CA1F1M1M"ELLU$C3  2A6 --l;L1##L-|T$11 -|  	.4l!c'	
   !3S9S!##$5s;V$FC(FL1-s3 166SQ *R66,v6vbSAq %r0   )single_tensor_fnr"   c	                   [         R                  R                  5       (       d"  [        S U 5       5      (       d  [	        S5      eUc  [        XSS9u  pU(       a.  [         R                  R                  5       (       a  [	        S5      eU(       a*  [         R                  R                  5       (       d  [        nO[        nU" U UUUUU	U
UUUUUUS9  g)znFunctional API that performs Adadelta algorithm computation.

See :class:`~torch.optim.Adadelta` for details.
c              3   V   #    U  H  n[        U[        R                  5      v   M!     g 7fra   )r%   r=   r   )rr   ts     r.   rt   adadelta.<locals>.<genexpr>  s!      5-8
1ell##[s   ')zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNF)	use_fusedz6torch.jit.script not supported with foreach optimizers)r   r   r    r!   r   r   r   rX   )
r=   r~   r   r   rT   r
   jitis_scriptingr   r   )r   rG   rH   rI   rJ   r   r"   r   rX   r   r   r    r!   r   r   funcs                   r.   r   r     s    6 >>&&(( 5-85 2 2 ^
 	

 1e

 599))++STTuyy--//%&!%r0   )FNFF)typingr   r   r   r   r=   r   	optimizerr	   r
   r   r   r   r   r   r   r   r   r   r   r   __all__r   __doc__rh   r?   rf   r   r   r   rL   r0   r.   <module>r      s   - -     " z
"ay aJ8		 
	 
 		 		 		 90 	 j3%L3%<3% f3% V	3%
 f3% 	3% 
3% 
3% 3% 3% 3% 3% 3%laBLaB<aB faB V	aB
 faB 	aB 
aB 
aB aB aB aB aB aBH  1HI " =L=<= f= V	=
 f= = d^= = = 	= 
= 
=  !=" #= J=r0   