a
    h+                  	   @   s`  U d dl Z d dlZd dlZd dlZd dlmZmZmZ d dlmZ d dl	Z	d dl	m
Z
 d dlmZmZmZ g Zee ed< ee	j
eje	j
 f Zdd Zedeeeee e	j
dddZedeee	j
ee ddddZedeeeeee e	j
dddZededdeeeeee e	j
dddZed eeee ddddZde_de_de_dS )!    N)castOptionalUnion)
deprecated)Tensor)_device_has_foreach_support"_group_tensors_by_device_and_dtype_has_foreach_support__all__c                    s    fdd}t |  |S )z
    This wrapper is needed to avoid a circular import when using @torch.no_grad on the exposed functions
    clip_grad_norm_ and clip_grad_value_ themselves.
    c                     s:   t    | i |W  d    S 1 s,0    Y  d S N)torchZno_grad)argskwargsfunc F/var/www/auris/lib/python3.9/site-packages/torch/nn/utils/clip_grad.py_no_grad_wrapper"   s    
z"_no_grad.<locals>._no_grad_wrapper)	functoolsupdate_wrapper)r   r   r   r   r   _no_grad   s    r          @F)tensors	norm_typeerror_if_nonfiniteforeachreturnc           
         s"  t | tjr| g} nt| } tt| dkr:tdS | d j t| g}g }|	 D ]t\\}}\\}}|du r~t
||s|rt|r|t| qZ|rtd|j dqZ|fdd|D  qZtjt fdd|D }	|rt|	 |	 rtd	 d
|	S )a  Compute the norm of an iterable of tensors.

    The norm is computed over the norms of the individual tensors, as if the norms of
    the individual tensors were concatenated into a single vector.

    Args:
        tensors (Iterable[Tensor] or Tensor): an iterable of Tensors or a
            single Tensor that will be normalized
        norm_type (float): type of the used p-norm. Can be ``'inf'`` for
            infinity norm.
        error_if_nonfinite (bool): if True, an error is thrown if the total
            norm of :attr:`tensors` is ``nan``, ``inf``, or ``-inf``.
            Default: ``False``
        foreach (bool): use the faster foreach-based implementation.
            If ``None``, use the foreach implementation for CUDA and CPU native tensors and silently
            fall back to the slow implementation for other device types.
            Default: ``None``

    Returns:
        Total norm of the tensors (viewed as a single vector).
    r   g        N:foreach=True was passed, but can't use the foreach API on  tensorsc                    s   g | ]}t j| qS r   )r   linalgvector_norm).0g)r   r   r   
<listcomp>`       z#_get_total_norm.<locals>.<listcomp>c                    s   g | ]}|  qS r   )to)r!   Znorm)first_devicer   r   r#   d   r$   zThe total norm of order z for gradients from `parameters` is non-finite, so it cannot be clipped. To disable this error and scale the gradients by the non-finite norm anyway, set `error_if_nonfinite=False`)
isinstancer   r   listfloatlenZtensordevicer   itemsr	   r   extendZ_foreach_normRuntimeErrortyper   r    stack
logical_orisnanisinf)
r   r   r   r   Zgrouped_tensorsZnormsr+   _Zdevice_tensors
total_normr   )r&   r   r   _get_total_norm*   sD    


r6   )
parametersmax_normr5   r   r   c                 C   s   t | tjr| g} dd | D }t|}t|dkr8dS t|g}||d  }tj|dd}| D ]z\\}}	\\}
}	|du rt|
|s|rt	|rt
|
|| qd|rtd|j d	qd||}|
D ]}|| qqddS )
a  Scale the gradients of an iterable of parameters given a pre-calculated total norm and desired max norm.

    The gradients will be scaled by the following calculation

    .. math::
        grad = grad * \frac{max\_norm}{total\_norm + 1e-6}

    Gradients are modified in-place.

    This function is equivalent to :func:`torch.nn.utils.clip_grad_norm_` with a pre-calculated
    total norm.

    Args:
        parameters (Iterable[Tensor] or Tensor): an iterable of Tensors or a
            single Tensor that will have gradients normalized
        max_norm (float): max norm of the gradients
        total_norm (Tensor): total norm of the gradients to use for clipping
        foreach (bool): use the faster foreach-based implementation.
            If ``None``, use the foreach implementation for CUDA and CPU native tensors and silently
            fall back to the slow implementation for other device types.
            Default: ``None``

    Returns:
        None
    c                 S   s   g | ]}|j d ur|j qS r   gradr!   pr   r   r   r#      r$   z*_clip_grads_with_norm_.<locals>.<listcomp>r   Ngư>g      ?)maxr   r   )r'   r   r   r)   r*   r   clampr,   r	   r   Z_foreach_mul_r%   r.   r/   Zmul_)r7   r8   r5   r   gradsgrouped_gradsZ	clip_coefZclip_coef_clampedr+   r4   Zdevice_gradsZclip_coef_clamped_devicer"   r   r   r   _clip_grads_with_norm_q   s0     
rA   )r7   r8   r   r   r   r   c                 C   st   t | tjr| g} n2t | tj}t| } |rFt| dkrFtjddd dd | D }t	||||}t
| ||| |S )aS  Clip the gradient norm of an iterable of parameters.

    The norm is computed over the norms of the individual gradients of all parameters,
    as if the norms of the individual gradients were concatenated into a single vector.
    Gradients are modified in-place.

    This function is equivalent to :func:`torch.nn.utils.get_total_norm` followed by
    :func:`torch.nn.utils.clip_grads_with_norm_` with the ``total_norm`` returned by ``get_total_norm``.

    Args:
        parameters (Iterable[Tensor] or Tensor): an iterable of Tensors or a
            single Tensor that will have gradients normalized
        max_norm (float): max norm of the gradients
        norm_type (float, optional): type of the used p-norm. Can be ``'inf'`` for
            infinity norm. Default: 2.0
        error_if_nonfinite (bool, optional): if True, an error is thrown if the total
            norm of the gradients from :attr:`parameters` is ``nan``,
            ``inf``, or ``-inf``. Default: False
        foreach (bool, optional): use the faster foreach-based implementation.
            If ``None``, use the foreach implementation for CUDA and CPU native tensors and silently
            fall back to the slow implementation for other device types.
            Default: ``None``

    Returns:
        Total norm of the parameter gradients (viewed as a single vector).
    r   zD`parameters` is an empty generator, no gradient clipping will occur.   )
stacklevelc                 S   s   g | ]}|j d ur|j qS r   r9   r;   r   r   r   r#      r$   z#clip_grad_norm_.<locals>.<listcomp>)r'   r   r   typesGeneratorTyper(   r*   warningswarnr6   rA   )r7   r8   r   r   r   Zis_generatorr?   r5   r   r   r   clip_grad_norm_   s    "rH   z_`torch.nn.utils.clip_grad_norm` is now deprecated in favor of `torch.nn.utils.clip_grad_norm_`.)categoryc                 C   s   t | ||||S )zClip the gradient norm of an iterable of parameters.

    .. warning::
        This method is now deprecated in favor of
        :func:`torch.nn.utils.clip_grad_norm_`.
    )rH   )r7   r8   r   r   r   r   r   r   clip_grad_norm   s    rJ   )r7   
clip_valuer   r   c                 C   s   t | tjr| g} t|}dd | D }t|g}| D ]\\}}\\}}|du rjtttt ||dsv|rt	|rt
ttt ||  tttt || q:|rtd|j dq:|D ]}tt|j| |d qq:dS )a  Clip the gradients of an iterable of parameters at specified value.

    Gradients are modified in-place.

    Args:
        parameters (Iterable[Tensor] or Tensor): an iterable of Tensors or a
            single Tensor that will have gradients normalized
        clip_value (float): maximum allowed value of the gradients.
            The gradients are clipped in the range
            :math:`\left[\text{-clip\_value}, \text{clip\_value}\right]`
        foreach (bool, optional): use the faster foreach-based implementation
            If ``None``, use the foreach implementation for CUDA and CPU native tensors and
            silently fall back to the slow implementation for other device types.
            Default: ``None``
    c                 S   s   g | ]}|j d ur|j qS r   r9   r;   r   r   r   r#     r$   z$clip_grad_value_.<locals>.<listcomp>N)r+   r   r   )minr=   )r'   r   r   r)   r   r,   r	   r   r(   r   Z_foreach_clamp_min_Z_foreach_clamp_max_r.   r/   Zclamp_)r7   rK   r   r?   r@   r+   r4   r:   r   r   r   clip_grad_value_   s,    
rM   ztorch.nn.utils)r   FN)N)r   FN)r   FN)N)r   rD   typingrF   r   r   r   Ztyping_extensionsr   r   r   Ztorch.utils._foreach_utilsr   r   r	   r
   r(   str__annotations__IterableZ_tensor_or_tensorsr   r)   boolr6   rA   rH   FutureWarningrJ   rM   
__module__r   r   r   r   <module>   s   

   F =   2    +