a
    h?                     @   s  d dl mZmZmZ d dlZd dlmZ ddlmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZ ddgZG dd deZd	e d
e de de	 de de_ee ee ee ee ee ee eeeeeeeeedddZee ee ee ee ee ee eeeeeeeeedddZe
eddee ee ee ee ee ee ee eeeeeeeeedddZdS )    )castOptionalUnionN)Tensor   )_capturable_doc_default_to_fused_or_foreach_differentiable_doc_disable_dynamo_if_unsupported_foreach_doc!_get_capturable_supported_devices_get_scalar_dtype
_get_value_maximize_doc_params_doc
_to_scalar_use_grad_for_differentiable_view_as_real	OptimizerParamsTASGDasgdc                       sd   e Zd Zdeeeef eeeeee eeed
 fd	d
Z	 fddZ
dd ZedddZ  ZS )r   {Gz?-C6?      ?    .Ar   NF)
paramslrlambdalphat0weight_decayforeachmaximizedifferentiable
capturablec                    sv   t |tr| dkrtdd|ks4td| d|ksJtd| t||||||||	|
d	}t || d S )Nr   zTensor lr must be 1-elementg        zInvalid learning rate: zInvalid weight_decay value: )	r   r   r   r    r!   r"   r#   r$   r%   )
isinstancer   Znumel
ValueErrordictsuper__init__)selfr   r   r   r   r    r!   r"   r#   r$   r%   defaults	__class__ >/var/www/auris/lib/python3.9/site-packages/torch/optim/asgd.pyr*      s$    zASGD.__init__c                    s   t  | | jD ]}|dd  |dd |dd |dd |d D ]}| j|g }t|dkrNt|d st	|d }tj
|t |jd	|d< t|d
 stj
|d
 t |jd	|d
< t|d sNtj
|d t |jd	|d< qNqd S )Nr"   r#   Fr$   r%   r   r   step)dtypedeviceetamu)r)   __setstate__param_groups
setdefaultstategetlentorchZ	is_tensorfloattensorr   r3   )r+   r9   grouppZp_stateZstep_valr-   r/   r0   r6   ?   s,    



zASGD.__setstate__c                 C   s  d}|d D ]}	|	j d ur|t|	O }||	 |	j jrBtd||	j  | j|	 }
t|
dkrtjd|	j	t
 d|
d< tjt|d |	j	t
 d  |
d	< tjd|	j	t
 d|
d
< tj|	tjd|
d< ||
d
  ||
d  ||
d	  ||
d  q|S )NFr   z&ASGD does not support sparse gradientsr   r/   )r3   r2   r1   r   r4   r5   )Zmemory_formatax)gradr<   
is_complexappendZ	is_sparseRuntimeErrorr9   r;   zerosr3   r   	as_tensorr   clonedetachZonesZ
zeros_likeZpreserve_format)r+   r?   params_with_gradgradsmusaxsetasstate_stepshas_complexr@   r9   r/   r/   r0   _init_groupW   s<    





	


zASGD._init_groupc                 C   s   |    d}|durBt  | }W d   n1 s80    Y  | jD ]~}g }g }g }g }g }g }	| |||||||	}
t||||||	|d |d |d |d |d |d |d |d	 |d
 |
d qH|S )zPerform a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r    r   r!   r"   r#   r$   r%   )
r   r   r    r   r!   r"   r#   r$   r%   rP   )Z _cuda_graph_capture_health_checkr<   Zenable_gradr7   rQ   r   )r+   closureZlossr?   rJ   rK   rL   rM   rN   rO   rP   r/   r/   r0   r1   }   sD    
$
z	ASGD.step)	r   r   r   r   r   NFFF)N)__name__
__module____qualname__r   r   r=   r   r   boolr*   r6   rQ   r   r1   __classcell__r/   r/   r-   r0   r      s2            
!&zImplements Averaged Stochastic Gradient Descent.

    It has been proposed in `Acceleration of stochastic approximation by
    averaging`_.

    Args:
        am  
        lr (float, Tensor, optional): learning rate (default: 1e-2)
        lambd (float, optional): decay term (default: 1e-4)
        alpha (float, optional): power for eta update (default: 0.75)
        t0 (float, optional): point at which to start averaging (default: 1e6)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        z	
        z

    .. _Acceleration of stochastic approximation by averaging:
        https://meyn.ece.ufl.edu/wp-content/uploads/sites/77/archive/spm_files/Courses/ECE555-2011/555media/poljud92.pdf

    )r   rK   rM   rL   rN   rO   r   r   r    r   r!   r#   r$   r%   rP   c       	      
   C   s,  t j st|}t| D ]
\}}|| }|s4|n| }|| }|| }|| }|| }t j s|rt }|jj	|jj	  kr|jj	  kr|jj	krn n|jj	|v sJ d| dt 
|rt |}t |}t |}|d7 }|
dkr|j||
d}|r.|d||   |j||dd n*t|}|d||   |j|| d |sl| dkr|||| n
|| |r||d|| |  |	   |dt || t |  qt|}t |d|| |  |	  }|| t dtd||  }|| qd S )NUIf capturable=True, params, mus, etas, and state_steps must be on supported devices: .r   r   r   value)r<   jitis_scriptingr   	enumeratecompileris_compilingr   r3   typerC   Zview_as_realaddZmul_Zaddcmul_r   Zadd_itemsubZcopy_maximumZ	ones_likerG   max)r   rK   rM   rL   rN   rO   r   r   r    r   r!   r#   r$   r%   rP   iparamrB   r5   rA   r4   Zstep_tcapturable_supported_devicesZ	eta_valuer1   Znew_etaZnew_mur/   r/   r0   _single_tensor_asgd   s`    








"
rl   c       	             s  t | dkrd S |rJ dtj sd|rdtddtfddt| |||D sdJ d dtt	| |||||g}|
 D ](\\}\\}}}}}}}ttt |}ttt |}ttt |}ttt |}ttt |}ttt |}|rt||| |rt|}tj sR|d jrRtj|tjd	d
dd	d nt|d |
dkr|rtj|||
d |}ntj|||
d}tj||d ntj||d}tj|||dd ~t||}t||| ~|rpt|}t|d	 t| t|| ~t|}t| t|d t|  t| t| t|| q fdd|D }fdd|D }t|| t|| qd S )Nr   z#_foreach ops don't support autogradF)Zsupports_xlac                 3   sT   | ]L\}}}}|j j|j j  ko:|j j  ko:|j jkn  oJ|j j v V  qd S )N)r3   rc   ).0r@   r5   r4   r1   )rk   r/   r0   	<genexpr>0  s   
2z%_multi_tensor_asgd.<locals>.<genexpr>rX   rY   g      ?cpur3   rZ   r   r[   r\   c                    s.   g | ]&}t jd  |     dqS r   rp   )r<   rG   rm   r1   )r   r3   r   r   r/   r0   
<listcomp>  s   z&_multi_tensor_asgd.<locals>.<listcomp>c                    s,   g | ]$}t jd td t|   dqS rq   )r<   rG   rh   r   rr   )r3   r    r/   r0   rs     s   )r;   r<   ra   rb   r   allzipr   r   Z"_group_tensors_by_device_and_dtypeitemsr   listr   r   Z_foreach_negZis_cpuZ_foreach_add_r>   Z_foreach_addZ_foreach_addcmul_Z_foreach_subZ_foreach_maximum_Z_foreach_reciprocal_Z_foreach_copy_Z_foreach_mulZ_foreach_mul_Z_foreach_pow_) r   rK   rM   rL   rN   rO   r   r   r    r   r!   r#   r$   r%   rP   Zgrouped_tensors_Zgrouped_params_Zgrouped_grads_Zgrouped_axs_Zgrouped_mus_Zgrouped_etas_Zgrouped_state_steps_Zgrouped_paramsZgrouped_gradsZgrouped_axsZgrouped_musZgrouped_etasZgrouped_state_stepsZintermediateZnew_musZnew_etasr/   )r   rk   r3   r   r   r    r0   _multi_tensor_asgd  s    






ry   )Zsingle_tensor_fnF)r   rK   rM   rL   rN   rO   r"   r#   r$   r%   rP   r   r   r    r   r!   c                C   sr   |du rt | |dd\}}|r0tj r0td|rDtj sDt}nt}|| |||||||||||||	|
d dS )znFunctional API that performs asgd algorithm computation.

    See :class:`~torch.optim.ASGD` for details.
    NF)Z	use_fusedz6torch.jit.script not supported with foreach optimizers)	r   r   r    r   r!   r#   r$   r%   rP   )r   r<   r^   r_   rE   ry   rl   )r   rK   rM   rL   rN   rO   r"   r#   r$   r%   rP   r   r   r    r   r!   rx   funcr/   r/   r0   r     s4    
)NFFFF)typingr   r   r   r<   r   Z	optimizerr   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   __all__r   __doc__rw   r=   rV   rl   ry   r   r/   r/   r/   r0   <module>   s   D 
P 
     