a
    hj:                     @   s   d dl Z d dlmZmZ d dlZd dlmZmZ d dlmZ	m
Z
 d dlmZ ddlmZ ddlmZ g d	ZG d
d deZG dd deZeeee ef ZG dd deZG dd deZG dd deZdS )    N)OptionalUnion)SizeTensor)
functionalinit)	Parameter   )CrossMapLRN2d)Module)LocalResponseNormr
   	LayerNorm	GroupNormRMSNormc                       sr   e Zd ZU dZg dZeed< eed< eed< eed< deeeed
d fddZe	e	dddZ
dd Z  ZS )r   a  Applies local response normalization over an input signal.

    The input signal is composed of several input planes, where channels occupy the second dimension.
    Applies normalization across channels.

    .. math::
        b_{c} = a_{c}\left(k + \frac{\alpha}{n}
        \sum_{c'=\max(0, c-n/2)}^{\min(N-1,c+n/2)}a_{c'}^2\right)^{-\beta}

    Args:
        size: amount of neighbouring channels used for normalization
        alpha: multiplicative factor. Default: 0.0001
        beta: exponent. Default: 0.75
        k: additive factor. Default: 1

    Shape:
        - Input: :math:`(N, C, *)`
        - Output: :math:`(N, C, *)` (same shape as input)

    Examples::

        >>> lrn = nn.LocalResponseNorm(2)
        >>> signal_2d = torch.randn(32, 5, 24, 24)
        >>> signal_4d = torch.randn(16, 5, 7, 7, 7, 7)
        >>> output_2d = lrn(signal_2d)
        >>> output_4d = lrn(signal_4d)

    )sizealphabetakr   r   r   r   -C6?      ?      ?Nr   r   r   r   returnc                    s&   t    || _|| _|| _|| _d S Nsuper__init__r   r   r   r   selfr   r   r   r   	__class__ L/var/www/auris/lib/python3.9/site-packages/torch/nn/modules/normalization.pyr   5   s
    
zLocalResponseNorm.__init__inputr   c                 C   s   t || j| j| j| jS r   )FZlocal_response_normr   r   r   r   r   r$   r!   r!   r"   forward>   s    zLocalResponseNorm.forwardc                 C   s   dj f i | jS Nz){size}, alpha={alpha}, beta={beta}, k={k}format__dict__r   r!   r!   r"   
extra_reprA   s    zLocalResponseNorm.extra_repr)r   r   r   )__name__
__module____qualname____doc____constants__int__annotations__floatr   r   r'   r-   __classcell__r!   r!   r   r"   r      s   
 	r   c                       sl   e Zd ZU eed< eed< eed< eed< deeeedd	 fd
dZeedddZe	dddZ
  ZS )r
   r   r   r   r   r   r   r	   Nr   c                    s&   t    || _|| _|| _|| _d S r   r   r   r   r!   r"   r   K   s
    
zCrossMapLRN2d.__init__r#   c                 C   s   t || j| j| j| jS r   )_cross_map_lrn2dapplyr   r   r   r   r&   r!   r!   r"   r'   T   s    zCrossMapLRN2d.forwardr   c                 C   s   dj f i | jS r(   r)   r,   r!   r!   r"   r-   W   s    zCrossMapLRN2d.extra_repr)r   r   r	   )r.   r/   r0   r3   r4   r5   r   r   r'   strr-   r6   r!   r!   r   r"   r
   E   s   
 	r
   c                       s   e Zd ZU dZg dZeedf ed< eed< e	ed< de
ee	e	d	d
 fddZd	dddZeedddZedddZ  ZS )r   a  Applies Layer Normalization over a mini-batch of inputs.

    This layer implements the operation as described in
    the paper `Layer Normalization <https://arxiv.org/abs/1607.06450>`__

    .. math::
        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

    The mean and standard-deviation are calculated over the last `D` dimensions, where `D`
    is the dimension of :attr:`normalized_shape`. For example, if :attr:`normalized_shape`
    is ``(3, 5)`` (a 2-dimensional shape), the mean and standard-deviation are computed over
    the last 2 dimensions of the input (i.e. ``input.mean((-2, -1))``).
    :math:`\gamma` and :math:`\beta` are learnable affine transform parameters of
    :attr:`normalized_shape` if :attr:`elementwise_affine` is ``True``.
    The variance is calculated via the biased estimator, equivalent to
    `torch.var(input, unbiased=False)`.

    .. note::
        Unlike Batch Normalization and Instance Normalization, which applies
        scalar scale and bias for each entire channel/plane with the
        :attr:`affine` option, Layer Normalization applies per-element scale and
        bias with :attr:`elementwise_affine`.

    This layer uses statistics computed from input data in both training and
    evaluation modes.

    Args:
        normalized_shape (int or list or torch.Size): input shape from an expected input
            of size

            .. math::
                [* \times \text{normalized\_shape}[0] \times \text{normalized\_shape}[1]
                    \times \ldots \times \text{normalized\_shape}[-1]]

            If a single integer is used, it is treated as a singleton list, and this module will
            normalize over the last dimension which is expected to be of that specific size.
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        elementwise_affine: a boolean value that when set to ``True``, this module
            has learnable per-element affine parameters initialized to ones (for weights)
            and zeros (for biases). Default: ``True``.
        bias: If set to ``False``, the layer will not learn an additive bias (only relevant if
            :attr:`elementwise_affine` is ``True``). Default: ``True``.

    Attributes:
        weight: the learnable weights of the module of shape
            :math:`\text{normalized\_shape}` when :attr:`elementwise_affine` is set to ``True``.
            The values are initialized to 1.
        bias:   the learnable bias of the module of shape
                :math:`\text{normalized\_shape}` when :attr:`elementwise_affine` is set to ``True``.
                The values are initialized to 0.

    Shape:
        - Input: :math:`(N, *)`
        - Output: :math:`(N, *)` (same shape as input)

    Examples::

        >>> # NLP Example
        >>> batch, sentence_length, embedding_dim = 20, 5, 10
        >>> embedding = torch.randn(batch, sentence_length, embedding_dim)
        >>> layer_norm = nn.LayerNorm(embedding_dim)
        >>> # Activate module
        >>> layer_norm(embedding)
        >>>
        >>> # Image Example
        >>> N, C, H, W = 20, 5, 10, 10
        >>> input = torch.randn(N, C, H, W)
        >>> # Normalize over the last three dimensions (i.e. the channel and spatial dimensions)
        >>> # as shown in the image below
        >>> layer_norm = nn.LayerNorm([C, H, W])
        >>> output = layer_norm(input)

    .. image:: ../_static/img/nn/layer_norm.jpg
        :scale: 50 %

    normalized_shapeepselementwise_affine.r<   r=   r>   h㈵>TN)r<   r=   r>   biasr   c                    s   ||d}t    t|tjr&|f}t|| _|| _|| _| jrt	t
j| jfi || _|r|t	t
j| jfi || _q| dd  n| dd  | dd  |   d S )Ndevicedtyper@   weight)r   r   
isinstancenumbersIntegraltupler<   r=   r>   r   torchemptyrD   r@   register_parameterreset_parameters)r   r<   r=   r>   r@   rB   rC   factory_kwargsr   r!   r"   r      s&    	


zLayerNorm.__init__r9   c                 C   s,   | j r(t| j | jd ur(t| j d S r   )r>   r   ones_rD   r@   zeros_r,   r!   r!   r"   rL      s    
zLayerNorm.reset_parametersr#   c                 C   s   t || j| j| j| jS r   )r%   Z
layer_normr<   rD   r@   r=   r&   r!   r!   r"   r'      s    zLayerNorm.forwardc                 C   s   dj f i | jS )NF{normalized_shape}, eps={eps}, elementwise_affine={elementwise_affine}r)   r,   r!   r!   r"   r-      s    zLayerNorm.extra_repr)r?   TTNN)r.   r/   r0   r1   r2   rH   r3   r4   r5   bool_shape_tr   rL   r   r'   r:   r-   r6   r!   r!   r   r"   r   ^   s(   
M     !r   c                       s   e Zd ZU dZg dZeed< eed< eed< eed< deeeed	d
 fddZ	d	dddZ
eedddZedddZ  ZS )r   a  Applies Group Normalization over a mini-batch of inputs.

    This layer implements the operation as described in
    the paper `Group Normalization <https://arxiv.org/abs/1803.08494>`__

    .. math::
        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

    The input channels are separated into :attr:`num_groups` groups, each containing
    ``num_channels / num_groups`` channels. :attr:`num_channels` must be divisible by
    :attr:`num_groups`. The mean and standard-deviation are calculated
    separately over the each group. :math:`\gamma` and :math:`\beta` are learnable
    per-channel affine transform parameter vectors of size :attr:`num_channels` if
    :attr:`affine` is ``True``.
    The variance is calculated via the biased estimator, equivalent to
    `torch.var(input, unbiased=False)`.

    This layer uses statistics computed from input data in both training and
    evaluation modes.

    Args:
        num_groups (int): number of groups to separate the channels into
        num_channels (int): number of channels expected in input
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        affine: a boolean value that when set to ``True``, this module
            has learnable per-channel affine parameters initialized to ones (for weights)
            and zeros (for biases). Default: ``True``.

    Shape:
        - Input: :math:`(N, C, *)` where :math:`C=\text{num\_channels}`
        - Output: :math:`(N, C, *)` (same shape as input)

    Examples::

        >>> input = torch.randn(20, 6, 10, 10)
        >>> # Separate 6 channels into 3 groups
        >>> m = nn.GroupNorm(3, 6)
        >>> # Separate 6 channels into 6 groups (equivalent with InstanceNorm)
        >>> m = nn.GroupNorm(6, 6)
        >>> # Put all 6 channels into a single group (equivalent with LayerNorm)
        >>> m = nn.GroupNorm(1, 6)
        >>> # Activating the module
        >>> output = m(input)
    )
num_groupsnum_channelsr=   affinerS   rT   r=   rU   r?   TN)rS   rT   r=   rU   r   c                    s   ||d}t    || dkr(td|| _|| _|| _|| _| jrxttj	|fi || _
ttj	|fi || _n| dd  | dd  |   d S )NrA   r   z,num_channels must be divisible by num_groupsrD   r@   )r   r   
ValueErrorrS   rT   r=   rU   r   rI   rJ   rD   r@   rK   rL   )r   rS   rT   r=   rU   rB   rC   rM   r   r!   r"   r     s    	

zGroupNorm.__init__r9   c                 C   s"   | j rt| j t| j d S r   )rU   r   rN   rD   rO   r@   r,   r!   r!   r"   rL   3  s    zGroupNorm.reset_parametersr#   c                 C   s   t || j| j| j| jS r   )r%   Z
group_normrS   rD   r@   r=   r&   r!   r!   r"   r'   8  s    zGroupNorm.forwardc                 C   s   dj f i | jS )Nz8{num_groups}, {num_channels}, eps={eps}, affine={affine}r)   r,   r!   r!   r"   r-   ;  s    zGroupNorm.extra_repr)r?   TNN)r.   r/   r0   r1   r2   r3   r4   r5   rQ   r   rL   r   r'   r:   r-   r6   r!   r!   r   r"   r      s(   
-    r   c                       s   e Zd ZU dZg dZeedf ed< ee	 ed< e
ed< deee	 e
dd	 fd
dZddddZejejdddZedddZ  ZS )r   a  Applies Root Mean Square Layer Normalization over a mini-batch of inputs.

    This layer implements the operation as described in
    the paper `Root Mean Square Layer Normalization <https://arxiv.org/pdf/1910.07467.pdf>`__

    .. math::
        y_i = \frac{x_i}{\mathrm{RMS}(x)} * \gamma_i, \quad
        \text{where} \quad \text{RMS}(x) = \sqrt{\epsilon + \frac{1}{n} \sum_{i=1}^{n} x_i^2}

    The RMS is taken over the last ``D`` dimensions, where ``D``
    is the dimension of :attr:`normalized_shape`. For example, if :attr:`normalized_shape`
    is ``(3, 5)`` (a 2-dimensional shape), the RMS is computed over
    the last 2 dimensions of the input.

    Args:
        normalized_shape (int or list or torch.Size): input shape from an expected input
            of size

            .. math::
                [* \times \text{normalized\_shape}[0] \times \text{normalized\_shape}[1]
                    \times \ldots \times \text{normalized\_shape}[-1]]

            If a single integer is used, it is treated as a singleton list, and this module will
            normalize over the last dimension which is expected to be of that specific size.
        eps: a value added to the denominator for numerical stability. Default: ``torch.finfo(x.dtype).eps``
        elementwise_affine: a boolean value that when set to ``True``, this module
            has learnable per-element affine parameters initialized to ones (for weights). Default: ``True``.

    Shape:
        - Input: :math:`(N, *)`
        - Output: :math:`(N, *)` (same shape as input)

    Examples::

        >>> rms_norm = nn.RMSNorm([2, 3])
        >>> input = torch.randn(2, 2, 3)
        >>> rms_norm(input)

    r;   .r<   r=   r>   NT)r<   r=   r>   r   c                    sv   ||d}t    t|tjr&|f}t|| _|| _|| _| jr^t	t
j| jfi || _n| dd  |   d S )NrA   rD   )r   r   rE   rF   rG   rH   r<   r=   r>   r   rI   rJ   rD   rK   rL   )r   r<   r=   r>   rB   rC   rM   r   r!   r"   r   o  s    


zRMSNorm.__init__r9   c                 C   s   | j rt| j dS )zS
        Resets parameters based on their initialization used in __init__.
        N)r>   r   rN   rD   r,   r!   r!   r"   rL     s    zRMSNorm.reset_parameters)xr   c                 C   s   t || j| j| jS )z$
        Runs forward pass.
        )r%   Zrms_normr<   rD   r=   )r   rW   r!   r!   r"   r'     s    zRMSNorm.forwardc                 C   s   dj f i | jS )z5
        Extra information about the module.
        rP   r)   r,   r!   r!   r"   r-     s    zRMSNorm.extra_repr)NTNN)r.   r/   r0   r1   r2   rH   r3   r4   r   r5   rQ   rR   r   rL   rI   r   r'   r:   r-   r6   r!   r!   r   r"   r   A  s$   
(    r   )rF   typingr   r   rI   r   r   Ztorch.nnr   r%   r   Ztorch.nn.parameterr   Z
_functionsr
   r7   moduler   __all__r   r3   listrR   r   r   r   r!   r!   r!   r"   <module>   s   4 ]