a
    hTQ                     @   s  d dl mZmZmZ d dlZd dlmZ ddlmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZ ddgZG dd deZd	d
e de
 de de	 d	 e_dee ee ee ee ee ee ee eee eeeeeeedddZdd Zee ee ee ee ee ee eeeeeeeedddZee ee ee ee ee ee eeeeeeeedddZee ee ee ee ee ee eeeeeeeeddddZdS )    )castOptionalUnionN)Tensor   )_default_to_fused_or_foreach_device_dtype_check_for_fused_differentiable_doc_foreach_doc_get_scalar_dtype
_get_value_maximize_doc_params_doc
_to_scalar_use_grad_for_differentiable_view_as_real	OptimizerParamsTAdagradadagradc                       sz   e Zd Zdddddeeeef eeeeee eeee d
 fdd	Z	 fd
dZ
dd Zdd ZedddZ  ZS )r   {Gz?r   绽|=NF)maximizedifferentiablefused)
paramslrlr_decayweight_decayinitial_accumulator_valueepsforeachr   r   r   c                   sh  t |tr| dkrtdd|ks4td| d|ksJtd| d|ks`td| d|ksvtd| d|kstd| t||||||||	|
d		}t || |
r|	rtd
|rtdd| _| j	D ]}|d D ]x}| j
| }|d rtjdt|d d|jdntjdt d|d< t|rFt||n|}tj||tjd|d< qqd S )Nr   zTensor lr must be 1-elementg        zInvalid learning rate: zInvalid lr_decay value: zInvalid weight_decay value: z)Invalid initial_accumulator_value value: zInvalid epsilon value: )	r   r   r    r   r   r!   r   r   r   z)`fused` does not support `differentiable`z0`fused` and `foreach` cannot be `True` together.Tr   r    Zis_fused)dtypedevicer$   step)Zmemory_formatsum)
isinstancer   Znumel
ValueErrordictsuper__init__RuntimeError"_need_device_dtype_check_for_fusedparam_groupsstatetorchzerosr   r%   tensor
is_complexcomplexZ	full_likeZpreserve_format)selfr   r   r   r   r   r    r!   r   r   r   defaultsgrouppr1   Z
init_value	__class__r"   A/var/www/auris/lib/python3.9/site-packages/torch/optim/adagrad.pyr-      sd    

zAdagrad.__init__c                    s   t  | d }| jD ]4}|dd  |dd |dd |dd }qt| j }t|dkovt	|d d }|s|D ]$}tj
t|d t|dd	|d< qd S )
Nr!   r   Fr   r   r   r'   r#   r&   )r,   __setstate__r0   
setdefaultlistr1   valueslenr2   Z	is_tensorr4   floatr   )r7   r1   r   r9   Zstate_valuesZstep_is_tensorsr;   r"   r=   r>   b   s     

zAdagrad.__setstate__c                 C   s4   | j D ](}|d D ]}| j| }|d   qqd S )Nr   r(   )r0   r1   Zshare_memory_)r7   r9   r:   r1   r"   r"   r=   share_memoryw   s    

zAdagrad.share_memoryc           
      C   s   d\}}|d D ]}|j d ur|d rDt| ddrDt|dd d| _||j jO }|t|O }|| ||j  | j| }	||	d  ||	d	  q||fS )
N)FFr   r   r/   T)Zcuda_unsupportedFr(   r'   )	gradgetattrr   r/   	is_sparser2   r5   appendr1   )
r7   r9   params_with_gradgrads
state_sumsstate_stepshas_sparse_gradhas_complexr:   r1   r"   r"   r=   _init_group}   s$    



zAdagrad._init_groupc           
      C   s   d}|dur:t   | }W d   n1 s00    Y  | jD ]}g }g }g }g }| |||||\}}	t|||||d |d |d |d ||d |d |d |	|d	 t| d
dt| ddd q@|S )zPerform a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r    r!   r   r   r   
grad_scale	found_inf)r   r   r   r    rN   r!   r   r   rO   r   rQ   rR   )r2   Zenable_gradr0   rP   r   rG   )
r7   closureZlossr9   rJ   rK   rL   rM   rN   rO   r"   r"   r=   r'      s>    
$



zAdagrad.step)r   r   r   r   r   N)N)__name__
__module____qualname__r   r   rC   r   r   boolr-   r>   rE   rP   r   r'   __classcell__r"   r"   r;   r=   r      s6         

Fa[  Implements Adagrad algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \gamma \text{ (lr)}, \: \theta_0 \text{ (params)}, \: f(\theta)
                \text{ (objective)}, \: \lambda \text{ (weight decay)},                          \\
            &\hspace{12mm}    \tau \text{ (initial accumulator value)}, \: \eta\text{ (lr decay)}\\
            &\textbf{initialize} :  state\_sum_0 \leftarrow \tau                          \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm} \tilde{\gamma}    \leftarrow \gamma / (1 +(t-1) \eta)                  \\
            &\hspace{5mm} \textbf{if} \: \lambda \neq 0                                          \\
            &\hspace{10mm} g_t \leftarrow g_t + \lambda \theta_{t-1}                             \\
            &\hspace{5mm}state\_sum_t  \leftarrow  state\_sum_{t-1} + g^2_t                      \\
            &\hspace{5mm}\theta_t \leftarrow
                \theta_{t-1}- \tilde{\gamma} \frac{g_t}{\sqrt{state\_sum_t}+\epsilon}            \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to `Adaptive Subgradient Methods for Online Learning
    and Stochastic Optimization`_.
    z
    Args:
        a  
        lr (float, Tensor, optional): learning rate (default: 1e-2)
        lr_decay (float, optional): learning rate decay (default: 0)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        initial_accumulator_value (float, optional): initial value of the
            sum of squares of gradients (default: 0)
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-10)
        z	
        a  
        fused (bool, optional): whether the fused implementation (CPU only) is used.
            Currently, `torch.float64`, `torch.float32`, `torch.float16`, and `torch.bfloat16`
            are supported. (default: None). Please note that the fused implementations does not
            support sparse or complex gradients.
    .. _Adaptive Subgradient Methods for Online Learning and Stochastic
        Optimization: http://jmlr.org/papers/v12/duchi11a.html

    F)r   rK   rL   rM   r   rQ   rR   rN   r!   r   rO   r   r   r   r    r   c                C   s   t dd |D std|du r<|du r<t| |	dd\}}|du rHd}|du rTd}|rjtj rjtd|rtj rtd|rtj st}n|rtj st}nt}|| ||||||||||	|
||d	 dS )
ztFunctional API that performs Adagrad algorithm computation.

    See :class:`~torch.optim.Adagrad` for details.
    c                 s   s   | ]}t |tjV  qd S N)r)   r2   r   ).0tr"   r"   r=   	<genexpr>      zadagrad.<locals>.<genexpr>zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNF)Z	use_fusedz6torch.jit.script not supported with foreach optimizersz4torch.jit.script not supported with fused optimizers
r   r   r   r    rN   r   r   rO   rQ   rR   )	allr.   r   r2   jitis_scripting_fused_adagrad_multi_tensor_adagrad_single_tensor_adagrad)r   rK   rL   rM   r   rQ   rR   rN   r!   r   rO   r   r   r   r    r   _funcr"   r"   r=   r      sJ    
c                 C   s   |   }t|||S rY   )sizer2   Zsparse_coo_tensor)rF   grad_indicesrA   rg   r"   r"   r=   _make_sparse=  s    ri   )r   rK   rL   rM   rQ   rR   r   r   r   r    rN   r   r   rO   c             	   C   s  |d u r|d u sJ t j s&t|}t| |||D ]^\}}}}|d7 }t|}|sZ|n| }|dkr|jrvtd|j||d}|d|d |   }|jr
|	 }|
 }| }|t|||d ||}|  |	}|jt|||| | d q4t |}|r8t |}t |}t |}|j||dd |r\| |	 }n| |	}|j||| d |r4t |}t |}q4d S )Nr   r   z;weight_decay option is not compatible with sparse gradientsalpha   value)r2   r`   ra   r   zipr   rH   r.   addZcoalesceZ_indicesZ_valuesZadd_ri   powZsparse_maskZsqrt_r5   Zview_as_realZaddcmul_sqrtZaddcdiv_Zview_as_complex)r   rK   rL   rM   rQ   rR   r   r   r   r    rN   r   r   rO   paramrF   Z	state_sumZstep_tr'   Zclrrh   Zgrad_valuesstdZ
std_valuesr5   r"   r"   r=   rd   B  sJ    






rd   c                   s  |rJ d|d u r|d u s J t | dkr0d S t  t| |||g}| D ]\\}}}}}ttt |}ttt |}ttt |}ttt |}|
otdd |D }|rt	|||| ||	d|||||d qR|rt
||| |rt|}tj s.|d jr.tj|tjddd	dd
 nt|d |dkrl|r\tj|||d
 ntj|||d
} fdd|D }tj|||dd t|}t||	 |dks|rt|| |}nt||}t||| qRd S )Nz#_foreach ops don't support autogradr   c                 s   s   | ]}|j V  qd S rY   )rH   )rZ   rF   r"   r"   r=   r\     s   z(_multi_tensor_adagrad.<locals>.<genexpr>Tr^   g      ?cpu)r%   rj   r   c                    s&   g | ]}  d t |d     qS )r   )r   )rZ   r'   r   r   r"   r=   
<listcomp>  s   z)_multi_tensor_adagrad.<locals>.<listcomp>rm   )rB   r   r   "_group_tensors_by_device_and_dtyperA   r   r@   r   anyrd   r   r2   Z_foreach_negcompilerZis_compilingZis_cpu_foreach_add_r4   Z_foreach_addZ_foreach_addcmul_Z_foreach_sqrtZ_foreach_mul_Z_foreach_mulZ_foreach_addcdiv_)r   rK   rL   rM   rQ   rR   r   r   r   r    rN   r   r   rO   Zgrouped_tensorlistsdevice_params_device_grads_device_state_sums_device_state_steps_re   device_paramsdevice_gradsdevice_state_sumsdevice_state_stepsZdevice_has_sparse_gradZ	minus_clrrt   	numeratorr"   rv   r=   rc     s    




rc   )r   rK   rL   rM   rQ   rR   r   r   r   r    rN   r   r   rO   returnc                C   s  | sd S |
s|rt d|r$t dt|}|d ur>|j|ind }|d urT|j|ind }t| |||g}| D ]\\}}\\}}}}}ttt |}ttt |}ttt |}ttt |}d\}}|d ur|d ur||vr|j	|dd||< || }|d ur6|d ur6||vr.|j	|dd||< || }t
|d t
j||||||||	|||d |d urrt
||gt|  qrd S )Nz5`fused` does not support sparse grad or complex paramz<adagrad with fused=True does not support differentiable=True)NNT)Znon_blockingr   )r   r   r   r    r   rQ   rR   )r.   r   r%   r   rx   itemsr   r@   r   tor2   r{   Z_fused_adagrad_Z_foreach_sub_rB   )r   rK   rL   rM   rQ   rR   r   r   r   r    rN   r   r   rO   Zgrad_scale_dictZfound_inf_dictZgrouped_tensorsr%   re   r|   r}   r~   r   r   r   r   r   Zdevice_grad_scaleZdevice_found_infr"   r"   r=   rb     sl    

rb   )NNNFNFF)typingr   r   r   r2   r   Z	optimizerr   r   r	   r
   r   r   r   r   r   r   r   r   r   __all__r   __doc__r@   rW   rC   r   ri   rd   rc   rb   r"   r"   r"   r=   <module>   s   < '
8       JBn