
    JThP              !       x   S SK JrJrJr  S SKrS SKJr  SSKJrJrJ	r	J
r
JrJrJrJrJrJrJrJr  SS/r " S S\5      rS	S
\ S\
 S\ S\	 S3	-   \l               S$S\\   S\\   S\\   S\\   S\\   S\\   S\\   S\S\\   S\S\S\S\S\S\S\4 S jjrS rS\\   S\\   S\\   S\\   S\\   S\\   S\S\S\S\S\S\S\S\4S  jrS\\   S\\   S\\   S\\   S\\   S\\   S\S\S\S\S\S\S\S\4S! jrS\\   S\\   S\\   S\\   S\\   S\\   S\S\S\S\S\S\S\S\S"S4S# jrg)%    )castOptionalUnionN)Tensor   )_default_to_fused_or_foreach_device_dtype_check_for_fused_differentiable_doc_foreach_doc_get_scalar_dtype
_get_value_maximize_doc_params_doc_use_grad_for_differentiable_view_as_real	OptimizerParamsTAdagradadagradc                      ^  \ rS rSr      SSSSS.S\S\\\4   S\S\S	\S
\S\\	   S\	S\	S\\	   4U 4S jjjjr
U 4S jrS rS r\SS j5       rSrU =r$ )r      NF)maximizedifferentiablefusedparamslrlr_decayweight_decayinitial_accumulator_valueepsforeachr   r   r   c                z  > [        U[        5      (       a  UR                  5       S:w  a  [        S5      eSU::  d  [        SU 35      eSU::  d  [        SU 35      eSU::  d  [        SU 35      eSU::  d  [        SU 35      eSU::  d  [        SU 35      e[	        UUUUUUUU	U
S	9	n[
        TU ]  X5        U
(       a+  U	(       a  [        S
5      eU(       a  [        S5      eSU l        U R                   H  nUS    H  nU R                  U   nUS   (       a*  [        R                  " S[        US   S9UR                  S9O[        R                  " S[        5       S9US'   [        R                   " U5      (       a  [#        XU5      OUn[        R$                  " X[        R&                  S9US'   M     M     g )Nr   zTensor lr must be 1-elementg        zInvalid learning rate: zInvalid lr_decay value: zInvalid weight_decay value: z)Invalid initial_accumulator_value value: zInvalid epsilon value: )	r   r   r    r   r   r!   r   r   r   z)`fused` does not support `differentiable`z0`fused` and `foreach` cannot be `True` together.Tr   r    is_fused)dtypedevicer&   step)memory_formatsum)
isinstancer   numel
ValueErrordictsuper__init__RuntimeError"_need_device_dtype_check_for_fusedparam_groupsstatetorchzerosr   r'   tensor
is_complexcomplex	full_likepreserve_format)selfr   r   r   r   r   r    r!   r   r   r   defaultsgrouppr5   
init_value	__class__s                   K/var/www/auris/envauris/lib/python3.13/site-packages/torch/optim/adagrad.pyr1   Adagrad.__init__   s    b&!!bhhjAo:;;by6rd;<<h7zBCCl";L>JKK//;<U;VW  cz6se<==%&?)

 	*"#NOO"#UVV6:D3&&E8_

1 W~ KK/wH xx c1B1DE f ''** 5Q2 
  %1F1F e! % '    c                   > [         TU ]  U5        S nU R                   HK  nUR                  SS 5        UR                  SS5        UR                  SS5        UR                  SS 5      nMM     [	        U R
                  R                  5       5      n[        U5      S:g  =(       a    [        R                  " US   S   5      nU(       d5  U H.  n[        R                  " [        US   5      [        US9S	9US'   M0     g g )
Nr!   r   Fr   r   r   r)   r$   r(   )r0   __setstate__r4   
setdefaultlistr5   valueslenr6   	is_tensorr8   floatr   )r=   r5   r   r?   state_valuesstep_is_tensorsrB   s          rC   rG   Adagrad.__setstate__a   s    U# &&EY-Z/-u5$$Wd3E	 ' DJJ--/0l+q0 
eooOF#7
 !!LL!F)$,=u,M&	 " rE   c                     U R                    H1  nUS    H%  nU R                  U   nUS   R                  5         M'     M3     g )Nr   r+   )r4   r5   share_memory_)r=   r?   r@   r5   s       rC   share_memoryAdagrad.share_memoryv   s=    &&E8_

1e**, % 'rE   c                    Su  pgUS    H  nUR                   c  M  US   (       a#  [        U SS5      (       a  [        USS9  SU l        XhR                   R                  -  nU[
        R                  " U5      -  nUR                  U5        UR                  UR                   5        U R                  U   n	UR                  U	S   5        UR                  U	S	   5        M     Xg4$ )
N)FFr   r   r3   T)cuda_unsupportedFr+   r)   )	gradgetattrr	   r3   	is_sparser6   r9   appendr5   )
r=   r?   params_with_gradgrads
state_sumsstate_stepshas_sparse_gradhas_complexr@   r5   s
             rC   _init_groupAdagrad._init_group|   s    '3$xAvv!>g8' '
 2!dK>CD;66#3#33u//22 ''*QVV$

1!!%,/""5=1 !" ++rE   c                 h   SnUb%  [         R                  " 5          U" 5       nSSS5        U R                   Hf  n/ n/ n/ n/ nU R                  X4XVU5      u  p[	        UUUUUS   US   US   US   UUS   US   US   U	US	   [        U S
S5      [        U SS5      S9  Mh     U$ ! , (       d  f       N= f)zPerform a single optimization step.

Args:
    closure (Callable, optional): A closure that reevaluates the model
        and returns the loss.
Nr   r   r   r    r!   r   r   r   
grad_scale	found_inf)r   r   r   r    r`   r!   r   r   ra   r   re   rf   )r6   enable_gradr4   rb   r   rY   )
r=   closurelossr?   r\   r]   r^   r_   r`   ra   s
             rC   r)   Adagrad.step   s     ""$y % &&E-/"$E')J(*K+/+;+;K,(O  ;">2z*%L /i(z*$%56'Gn"4t<!$T:! ': A %$s   B##
B1)r3   )g{Gz?r   r   r   g|=NN)__name__
__module____qualname____firstlineno__r   r   rM   r   r   boolr1   rG   rT   rb   r   r)   __static_attributes____classcell__)rB   s   @rC   r   r      s     $(+,"&D $ $DD %- D 	D
 D $)D D $D D D ~D DL*-,* "* "*rE   a[  Implements Adagrad algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \gamma \text{ (lr)}, \: \theta_0 \text{ (params)}, \: f(\theta)
                \text{ (objective)}, \: \lambda \text{ (weight decay)},                          \\
            &\hspace{12mm}    \tau \text{ (initial accumulator value)}, \: \eta\text{ (lr decay)}\\
            &\textbf{initialize} :  state\_sum_0 \leftarrow \tau                          \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm} \tilde{\gamma}    \leftarrow \gamma / (1 +(t-1) \eta)                  \\
            &\hspace{5mm} \textbf{if} \: \lambda \neq 0                                          \\
            &\hspace{10mm} g_t \leftarrow g_t + \lambda \theta_{t-1}                             \\
            &\hspace{5mm}state\_sum_t  \leftarrow  state\_sum_{t-1} + g^2_t                      \\
            &\hspace{5mm}\theta_t \leftarrow
                \theta_{t-1}- \tilde{\gamma} \frac{g_t}{\sqrt{state\_sum_t}+\epsilon}            \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to `Adaptive Subgradient Methods for Online Learning
    and Stochastic Optimization`_.
    z
    Args:
        a  
        lr (float, Tensor, optional): learning rate (default: 1e-2)
        lr_decay (float, optional): learning rate decay (default: 0)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        initial_accumulator_value (float, optional): initial value of the
            sum of squares of gradients (default: 0)
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-10)
        z	
        a  
        fused (bool, optional): whether the fused implementation (CPU only) is used.
            Currently, `torch.float64`, `torch.float32`, `torch.float16`, and `torch.bfloat16`
            are supported. (default: None). Please note that the fused implementations does not
            support sparse or complex gradients.
    .. _Adaptive Subgradient Methods for Online Learning and Stochastic
        Optimization: http://jmlr.org/papers/v12/duchi11a.html

    r   r]   r^   r_   r   re   rf   r`   r!   r   ra   r   r   r   r    r   c                L   [        S U 5       5      (       d  [        S5      eUc  Uc  [        X	SS9u  nnUc  SnUc  SnU(       a.  [        R                  R                  5       (       a  [        S5      eU(       a.  [        R                  R                  5       (       a  [        S5      eU(       a*  [        R                  R                  5       (       d  [        nO7U(       a*  [        R                  R                  5       (       d  [        nO[        nU" U UUUUUUUUUU	U
UUS9  g)	zlFunctional API that performs Adagrad algorithm computation.

See :class:`~torch.optim.Adagrad` for details.
c              3   V   #    U  H  n[        U[        R                  5      v   M!     g 7frk   )r,   r6   r   ).0ts     rC   	<genexpr>adagrad.<locals>.<genexpr>  s     @Kqz!U\\**Ks   ')zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNF)	use_fusedz6torch.jit.script not supported with foreach optimizersz4torch.jit.script not supported with fused optimizers
r   r   r   r    r`   r   r   ra   re   rf   )	allr2   r   r6   jitis_scripting_fused_adagrad_multi_tensor_adagrad_single_tensor_adagrad)r   r]   r^   r_   r   re   rf   r`   r!   r   ra   r   r   r   r    r   _funcs                     rC   r   r      s   2 @K@@@^
 	
 }1e

7 }599))++STT''))QRRUYY++--	//11$%!'%rE   c                 P    U R                  5       n[        R                  " XU5      $ rk   )sizer6   sparse_coo_tensor)rX   grad_indicesrJ   r   s       rC   _make_sparser   <  s     99;D""<>>rE   c          
      v   Uc  Ub   e[        XX#5       GH   u  pnnUS-  n[        U5      nU(       d  UOU* nUS:w  a+  UR                  (       a  [        S5      eUR	                  XS9nUSUS-
  U-  -   -  nUR                  (       a  UR                  5       nUR                  5       nUR                  5       nUR                  [        UUUR                  S5      5      5        UR                  U5      nUR                  5       R                  5       R                  U	5      nUR                  [        UUUU-  5      U* S9  GM/  [        R                  " U5      nU(       aB  [        R                  " U5      n[        R                  " U5      n[        R                  " U5      nUR!                  XSS9  U(       a  UR#                  5       U	-   nOUR#                  5       R                  U	5      nUR%                  UUU* S9  U(       d  GM  [        R&                  " U5      n[        R&                  " U5      nGM#     g )Nr   r   z;weight_decay option is not compatible with sparse gradientsalpha   value)zipr   rZ   r2   addcoalesce_indices_valuesadd_r   powsparse_masksqrt_r6   r9   view_as_realaddcmul_sqrtaddcdiv_view_as_complex)r   r]   r^   r_   re   rf   r   r   r   r    r`   r   r   ra   paramrX   	state_sumstep_tr)   clrr   grad_valuesstd
std_valuesr9   s                            rC   r   r   A  s   " )"333*-fZ*U&Y!&!#t$1~~"Q  88E86DAX--.>>==?D==?L,,.KNN<lKOOA<NOP''-C,,.33C8JJJT<z1IJSVRV   ))%0J))$/!..y9	**51t3nn&,nn&++C0NN4SDN1z--e4!11)<	Q +VrE   c                B   U(       a   S5       eUc  Ub   e[        U 5      S:X  a  g [        R                  " XX#/5      nUR                  5        GHG  u  u  nnnnn[	        [
        [           U5      n[	        [
        [           U5      n[	        [
        [           U5      n[	        [
        [           U5      nU
=(       a    [        S U 5       5      nU(       a  [        UUUUUUUU	SUUUUUS9  M  U(       a  [        UUU5        U(       a  [        R                  " U5      n[        R                  R                  5       (       d>  US   R                  (       a*  [        R                  " U[        R                   " SSS9SS	9  O[        R                  " US
5        US:w  a4  U(       a  [        R                  " UUUS	9  O[        R"                  " UUUS	9nU Vs/ s H  nU* S
[%        U5      S
-
  U-  -   -  PM     nn[        R&                  " UUUS
S9  [        R(                  " U5      n[        R                  " UU	5        US:w  d  U(       a  [        R*                  " UU5        UnO[        R,                  " UU5      n[        R.                  " UUU5        GMJ     g s  snf )Nz#_foreach ops don't support autogradr   c              3   8   #    U  H  oR                   v   M     g 7frk   )rZ   )ru   rX   s     rC   rw   (_multi_tensor_adagrad.<locals>.<genexpr>  s      9
'3tNN|s   Trz   g      ?cpu)r'   r   r   r   )rK   r   "_group_tensors_by_device_and_dtyperJ   r   rI   r   anyr   r   r6   _foreach_negcompileris_compilingis_cpu_foreach_add_r8   _foreach_addr   _foreach_addcmul__foreach_sqrt_foreach_mul__foreach_mul_foreach_addcdiv_)r   r]   r^   r_   re   rf   r   r   r   r    r`   r   r   ra   grouped_tensorlistsdevice_params_device_grads_device_state_sums_device_state_steps_r   device_paramsdevice_gradsdevice_state_sumsdevice_state_stepsdevice_has_sparse_gradr)   	minus_clrr   	numerators                                rC   r   r   ~  s   " DDD)"333 6{a#FF	
0  &&(		 	T&\>:DL-8 f/AB!$v,0CD!0 "
S 9
'39
 6
 ""!")! $!-'%#   -7HI --l;L ~~**,,1CA1F1M1M"ELLU$C3  2A61##L-|T$11 -| 
 GY
FXdRC1
4(1,889FX 	 
 	 1<UVW!!"34C%1i8$I**<CIy#>Q )p
s   "Jreturnc                @   U (       d  g U
(       d  U(       a  [        S5      eU(       a  [        S5      eUb  UR                  U0OS nUb  UR                  U0OS n[        R                  " XX#/5      nUR	                  5        GH  u  u  nnu  u  nnnnn[        [        [           U5      n[        [        [           U5      n[        [        [           U5      n[        [        [           U5      nSu  nnUb!  Ub  UU;  a  UR                  USS9UU'   UU   nUb   Ub  X_;  a  UR                  USS9UU'   UU   n[        R                  " US5        [        R                  " UUUUUUUU	UUUS9  Uc  M  [        R                  " UU/[        U5      -  5        GM     g )Nz5`fused` does not support sparse grad or complex paramz<adagrad with fused=True does not support differentiable=True)NNT)non_blockingr   )r   r   r   r    r   re   rf   )r2   r'   r   r   itemsr   rI   r   tor6   r   _fused_adagrad__foreach_sub_rK   )r   r]   r^   r_   re   rf   r   r   r   r    r`   r   r   ra   grad_scale_dictfound_inf_dictgrouped_tensorsr'   r   r   r   r   r   r   r   r   r   device_grad_scaledevice_found_infs                                rC   r~   r~     s   " +RSSJ
 	

 ,6+A		J't  7@6Ki&&	2QUNBB	
0O 
			 	 	
	T&\>:DL-8 f/AB!$v,0CD.8++!o&A_,*4--T-*R' / 7 ^%?.)2f4)Pv&-f5.2%(&	
 '"%5$6=O9P$P= 
!rE   )NNNFNFF)typingr   r   r   r6   r   	optimizerr   r	   r
   r   r   r   r   r   r   r   r   r   __all__r   __doc__rI   rp   rM   r   r   r   r   r~   r#   rE   rC   <module>r      s   ( (       i
 bi bL4		 	 
 		 		 5. p !#'"& "" GLG<G VG f	G
 D>G  G G G d^G G G 	G  !G" #G$ 
%G& 'GT?
:=L:=<:= V:= f	:=
  := := 	:= := := 
:= := := := :=zh?Lh?<h? Vh? f	h?
  h? h? 	h? h? h? 
h? h? h? h? h?VKLK<K VK f	K
  K K 	K K K 
K K K K K  
!KrE   