o
    wZh>                  "   @   s  d dl mZmZmZ d dlZd dlmZ ddlmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZ ddgZG dd deZd	e d
e de de	 de de_dee dee dee dee dee dee dededededededededefddZdee dee dee dee dee dee dededededededededefddZe
ed 		!	!	!	!d$dee dee dee dee dee dee d"ee dededededededededef d#dZdS )%    )castOptionalUnionN)Tensor   )_capturable_doc_default_to_fused_or_foreach_differentiable_doc_disable_dynamo_if_unsupported_foreach_doc!_get_capturable_supported_devices_get_scalar_dtype
_get_value_maximize_doc_params_doc_use_grad_for_differentiable_view_as_real	OptimizerParamsTASGDasgdc                       s   e Zd Z									dded	eeef d
ededededee dededef fddZ	 fddZ
dd ZedddZ  ZS )r   {Gz?-C6?      ?    .Ar   NFparamslrlambdalphat0weight_decayforeachmaximizedifferentiable
capturablec                    sv   t |tr| dkrtdd|kstd| d|ks%td| t||||||||	|
d	}t || d S )Nr   zTensor lr must be 1-elementg        zInvalid learning rate: zInvalid weight_decay value: )	r   r   r   r   r    r!   r"   r#   r$   )
isinstancer   Znumel
ValueErrordictsuper__init__)selfr   r   r   r   r   r    r!   r"   r#   r$   defaults	__class__ ?/var/www/auris/lib/python3.10/site-packages/torch/optim/asgd.pyr)      s$   zASGD.__init__c                    s   t  | | jD ]q}|dd  |dd |dd |dd |d D ]R}| j|g }t|dkryt|d sOt	|d }tj
|t |jd	|d< t|d
 sdtj
|d
 t |jd	|d
< t|d sytj
|d t |jd	|d< q'q	d S )Nr!   r"   Fr#   r$   r   r   step)dtypedeviceetamu)r(   __setstate__param_groups
setdefaultstategetlentorchZ	is_tensorfloattensorr   r2   )r*   r8   grouppZp_stateZstep_valr,   r.   r/   r5   >   s2   




zASGD.__setstate__c                 C   s
  d}|d D ]|}	|	j d ur|t|	O }||	 |	j jr!td||	j  | j|	 }
t|
dkrftjd|	j	t
 d|
d< tj|d |	j	t
 d  |
d	< tjd|	j	t
 d|
d
< tj|	tjd|
d< ||
d
  ||
d  ||
d	  ||
d  q|S )NFr   z&ASGD does not support sparse gradientsr   r.   )r2   r1   r0   r   r3   r4   )Zmemory_formatax)gradr;   
is_complexappendZ	is_sparseRuntimeErrorr8   r:   Zzerosr2   r   	as_tensorclonedetachZonesZ
zeros_likeZpreserve_format)r*   r>   params_with_gradgradsmusaxsetasstate_stepshas_complexr?   r8   r.   r.   r/   _init_groupV   s>   







zASGD._init_groupc                 C   s   |    d}|dur!t  | }W d   n1 sw   Y  | jD ]?}g }g }g }g }g }g }	| |||||||	}
t||||||	|d |d |d |d |d |d |d |d	 |d
 |
d q$|S )zPerform a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r   r    r!   r"   r#   r$   )
r   r   r   r   r    r!   r"   r#   r$   rN   )Z _cuda_graph_capture_health_checkr;   Zenable_gradr6   rO   r   )r*   closureZlossr>   rH   rI   rJ   rK   rL   rM   rN   r.   r.   r/   r0   z   sF   

z	ASGD.step)	r   r   r   r   r   NFFFN)__name__
__module____qualname__r   r   r<   r   r   boolr)   r5   rO   r   r0   __classcell__r.   r.   r,   r/   r      sF    
	
!$zImplements Averaged Stochastic Gradient Descent.

    It has been proposed in `Acceleration of stochastic approximation by
    averaging`_.

    Args:
        am  
        lr (float, Tensor, optional): learning rate (default: 1e-2)
        lambd (float, optional): decay term (default: 1e-4)
        alpha (float, optional): power for eta update (default: 0.75)
        t0 (float, optional): point at which to start averaging (default: 1e6)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        z	
        zx

    .. _Acceleration of stochastic approximation by averaging:
        https://dl.acm.org/citation.cfm?id=131098

    r   rI   rK   rJ   rL   rM   r   r   r   r   r    r"   r#   r$   rN   c       	      
   C   s  t | D ] \}}|| }|s|n| }|| }|| }|| }|| }tj sU|rUt }|jj|jj  krE|jj  krE|jjkrMn n|jj|v sUJ d| dt|rit|}t|}t|}|d7 }|
dkrx|j	||
d}|r|
d||   |j||dd nt|}|
d||   |j|| d |s| dkr|||
| n|| |r||d|| |  |	   |dt|| t|  qt|}t|d|| |  |	  }|| tdtd||  }|| qd S )NUIf capturable=True, params, mus, etas, and state_steps must be on supported devices: .r   r   r   value)	enumerater;   compileris_compilingr   r2   typerB   Zview_as_realaddZmul_Zaddcmul_r   Zadd_itemsubZcopy_maximumZ	ones_likerE   max)r   rI   rK   rJ   rL   rM   r   r   r   r   r    r"   r#   r$   rN   iparamrA   r4   r@   r3   Zstep_tcapturable_supported_devicesZ	eta_valuer0   Znew_etaZnew_mur.   r.   r/   _single_tensor_asgd   s\   





"
ri   c       	             s  t | dkrd S |rJ dtj s2|r2tddtfddt| |||D s2J d dt| |||||g}|	 D ]\\}\\}}}}}}}t
tt |}t
tt |}t
tt |}t
tt |}t
tt |}t
tt |}|rt||| |rt|}tj s|d jrtj|tjd	d
dd	d nt|d |
dkr|rtj|||
d |}ntj|||
d}tj||d ntj||d}tj|||dd ~t||}t||| ~|r.t|}t|d	 t| t|| ~t|}t| t|d t|  t| t| t|| qA fdd|D }fdd|D }t|| t|| qAd S )Nr   z#_foreach ops don't support autogradF)Zsupports_xlac                 3   sV    | ]&\}}}}|j j|j j  ko|j j  ko|j jkn  o&|j j v V  qd S rQ   )r2   r`   ).0r?   r4   r3   r0   )rh   r.   r/   	<genexpr>*  s    
2

z%_multi_tensor_asgd.<locals>.<genexpr>rW   rX   g      ?cpur2   rY   r   rZ   r[   c                    s.   g | ]}t jd  |     dqS r   rm   )r;   rE   rj   r0   )r   r2   r   r   r.   r/   
<listcomp>  s     z&_multi_tensor_asgd.<locals>.<listcomp>c                    s,   g | ]}t jd td t|   dqS rn   )r;   rE   re   r   ro   )r2   r   r.   r/   rp     s    )r:   r;   r^   r_   r   allzipr   Z"_group_tensors_by_device_and_dtypeitemsr   listr   r   Z_foreach_negZis_cpuZ_foreach_add_r=   Z_foreach_addZ_foreach_addcmul_Z_foreach_subZ_foreach_maximum_Z_foreach_reciprocal_Z_foreach_copy_Z_foreach_mulZ_foreach_mul_Z_foreach_pow_) r   rI   rK   rJ   rL   rM   r   r   r   r   r    r"   r#   r$   rN   Zgrouped_tensors_Zgrouped_params_Zgrouped_grads_Zgrouped_axs_Zgrouped_mus_Zgrouped_etas_Zgrouped_state_steps_Zgrouped_paramsZgrouped_gradsZgrouped_axsZgrouped_musZgrouped_etasZgrouped_state_stepsZintermediateZnew_musZnew_etasr.   )r   rh   r2   r   r   r   r/   _multi_tensor_asgd  s   





rv   )Zsingle_tensor_fnFr!   c                C   sr   |du rt | |dd\}}|rtj rtd|r"tj s"t}nt}|| |||||||||||||	|
d dS )znFunctional API that performs asgd algorithm computation.

    See :class:`~torch.optim.ASGD` for details.
    NF)Z	use_fusedz6torch.jit.script not supported with foreach optimizers)	r   r   r   r   r    r"   r#   r$   rN   )r   r;   ZjitZis_scriptingrD   rv   ri   )r   rI   rK   rJ   rL   rM   r!   r"   r#   r$   rN   r   r   r   r   r    ru   funcr.   r.   r/   r     s4   

)NFFFF)typingr   r   r   r;   r   Z	optimizerr   r   r	   r
   r   r   r   r   r   r   r   r   r   r   __all__r   __doc__rt   r<   rU   ri   rv   r   r.   r.   r.   r/   <module>   s   @ 
	

L	

 
	
