a
    h2                     @   sj   U d dl mZ d dlZd dlm  mZ d dlmZ d dlm	Z	 g Z
ee ed< ejjG dd dZdS )    )OptionalN)Tensor)2_scripted_functional_optimizer_deprecation_warning__all__c                   @   s\   e Zd Zdee eeeeeeeeed
ddZeee ddd	Z	eee  d
ddZ
dS )_FunctionalSGD{Gz?        F)
paramslrmomentum	dampeningweight_decaynesterovmaximizeforeachfused_allow_empty_param_listc                 C   s|   t dd ||||d| _|| _|| _|| _|	| _tjt	tj
t	ttj
f f i | _t|dkrn|
sntdd|i| _d S )N   )
stacklevel)r
   r   r   r   r   z%optimizer got an empty parameter listr	   )r   defaultsr   r   r   r   torchjitZannotatedictr   strstatelen
ValueErrorparam_group)selfr	   r
   r   r   r   r   r   r   r   r    r   T/var/www/auris/lib/python3.9/site-packages/torch/distributed/optim/functional_sgd.py__init__   s    
$z_FunctionalSGD.__init__)paramgradc                 C   s  | j d }| j d }| j d }| j d }|g}g }g }	d}
|dur|	| |jrVd}
|| jvrji | j|< | j| }d|vr|d n||d  t > tj||	|||||| j| j	|
| j
| jddd	 W d   n1 s0    Y  | j| }|d
 }|dur||d< dS )z[Similar to self.step, but operates on a single parameter and
        its gradient.
        r   r   r   r
   FNTmomentum_bufferr   r   r
   r   r   r   has_sparse_gradr   r   Z
grad_scaleZ	found_infr   )r   append	is_sparser   r   no_gradFsgdr   r   r   r   )r   r"   r#   r   r   r   r
   r	   momentum_buffer_listgradsr&   r   r$   r   r   r    
step_param;   sN    








$

z_FunctionalSGD.step_param)	gradientsc                 C   s  | j d }g }g }g }| jd }| jd }| jd }| jd }	t|t|krttddt| d d	t|  d
}
t||D ]n\}}|d ur|| || |jrd}
|| jvri | j|< | j| }d|vr|d  q||d  qt	 > t
j|||||||	| j| j|
| j| jd d d W d    n1 s@0    Y  t|D ].\}}| j| }|| }|d urR||d< qRd S )Nr	   r
   r   r   r   zEthe gradients passed in does not equal to the size of the parameters!zParams length: z. zGradients length: FTr$   r%   )r   r   r   r   zipr'   r(   r   r   r)   r*   r+   r   r   r   r   	enumerate)r   r/   r	   Zparams_with_gradr-   r,   r
   r   r   r   r&   r"   Zgradientr   ipr$   r   r   r    stepm   sf    










&

z_FunctionalSGD.stepN)	r   r   r   r   FFFFF)__name__
__module____qualname__listr   floatboolr!   r   r.   r4   r   r   r   r    r      s.            !2r   )typingr   r   Ztorch.optim._functionalZoptimZ_functionalr*   r   Z,torch.distributed.optim._deprecation_warningr   r   r8   r   __annotations__r   scriptr   r   r   r   r    <module>   s   