a
    hy                     @   sj   U d dl mZ d dlZd dlm  mZ d dlmZ d dlm	Z	 g Z
ee ed< ejjG dd dZdS )    )OptionalN)Tensor)2_scripted_functional_optimizer_deprecation_warning__all__c                   @   sd   e Zd Zdee eeeef eeeeeeed
ddZee	e d	d
dZ
ee	e  dddZdS )_FunctionalAdamWMbP?g?g+?:0yE>{Gz?F)
paramslrbetasepsweight_decayamsgradmaximizeforeachfused_allow_empty_param_listc                 C   s$  t dd d|ks td| d|ks6td| d|d   krNdk sbn td|d  d|d	   krzdk sn td
|d	  d|kstd| |||d |d	 |d| _|| _|| _|| _|	| _tj	t
tjt
ttjf f i | _t|dkr|
stdd|i| _d S )N   )
stacklevel        zInvalid learning rate: zInvalid epsilon value: r   g      ?z#Invalid beta parameter at index 0:    z#Invalid beta parameter at index 1: zInvalid weight_decay value: )r   r   beta1beta2r   z%optimizer got an empty parameter listr   )r   
ValueErrordefaultsr   r   r   r   torchjitZannotatedictr   strstatelenparam_group)selfr   r   r   r   r   r   r   r   r   r    r%   V/var/www/auris/lib/python3.9/site-packages/torch/distributed/optim/functional_adamw.py__init__   s2    
$z_FunctionalAdamW.__init__)paramgradc                 C   sv  g }g }g }g }g }g }t |}	|d ur>|| || || jvri | j|< | j| }
t d|
d< t j|t jd|
d< t j|t jd|
d< | jrt j|t jd|
d< | j| }
||
d  ||
d  | jr||
d  ||
d  t  d t	j
||||||| j| j| jd | jd | jd	 | jd
 | jd | j| jd d |	d W d    n1 sh0    Y  d S )Nr   stepZmemory_formatexp_avg
exp_avg_sqmax_exp_avg_sqr   r   r   r   r   r   r   r   r   r   r   r   r   r   Z
grad_scaleZ	found_infhas_complex)r   
is_complexappendr!   tensor
zeros_likepreserve_formatr   no_gradFadamwr   r   r   r   )r$   r(   r)   params_with_gradgradsexp_avgsexp_avg_sqsmax_exp_avg_sqsstate_stepsr0   r!   r%   r%   r&   
step_paramG   sf    










z_FunctionalAdamW.step_param)	gradientsc                 C   s  | j d }g }g }g }g }g }g }t|t|krXtddt| d dt|  d}	t| j d |D ]\}
}|d url|	t|
O }	||
 || |
| jvri | j|
< | j|
 }td|d< tj	|
tj
d	|d
< tj	|
tj
d	|d< | jrtj	|
tj
d	|d< | j|
 }||d
  ||d  | jrL||d  ||d  qlt d tj||||||| j| j| jd | jd | jd | jd | jd | j| jd d |	d W d    n1 s0    Y  d S )Nr   zEthe gradients passed in does not equal to the size of the parameters!zParams length: z. zGradients length: Fr   r*   r+   r,   r-   r.   r   r   r   r   r   r/   )r#   r"   r   zipr   r1   r2   r!   r3   r4   r5   r   r6   r7   r8   r   r   r   r   )r$   r@   r   r9   r:   r;   r<   r=   r>   r0   r(   Zgradientr!   r%   r%   r&   r*      s|    









z_FunctionalAdamW.stepN)	r   r   r	   r
   FFFFF)__name__
__module____qualname__listr   floattupleboolr'   r   r?   r*   r%   r%   r%   r&   r      s.            
-=r   )typingr   r   Ztorch.optim._functionalZoptimZ_functionalr7   r   Z,torch.distributed.optim._deprecation_warningr   r   rE   r    __annotations__r   scriptr   r%   r%   r%   r&   <module>   s   