a
    hd                  	   @   s$  d Z ddlZddlZddlmZmZmZmZm	Z	 ddl
mZ ddlZddlmZ g dZedZedZed	 Zed
 ZdMeeeeej edddZdNeeeeej edddZdOeeeeeeej edddZeeedddZeedddZdPeee	eef  edddZdQeeeeej eddd ZdReeeeej edd!d"ZdSeeeeeeej edd%d&Zeeedd'd(Zeedd)d*Z eedd+d,Z!eedd-d.Z"dTeeed0d1d2Z#ee$eef dd3d4Z%dUeeeej ed5d6d7Z&dVeeeej ed5d8d9Z'eeed:d;d<Z(dWeeeeeej ed?d@dAZ)dXeeeeeej ed?dBdCZ*dYeeeej ed5dDdEZ+dZeeeeej edGdHdIZ,eeef eeef dJdKdLZ-e-eZ.e-eZ/e-eZ0e-e"Z1e-e#Z2e-e&Z3e-e'Z4e-e)Z5e-e*Z6e-e+Z7e-e,Z8dS )[zHThis file contains utilities for initializing neural network parameters.    N)CallableLiteralOptionalTypeVarUnion)	ParamSpec)Tensor)calculate_gainuniform_normal_trunc_normal_	constant_ones_zeros_eye_dirac_xavier_uniform_xavier_normal_kaiming_uniform_kaiming_normal_orthogonal_sparse_uniformnormalconstanteyediracxavier_uniformxavier_normalkaiming_uniformkaiming_normal
orthogonalsparse_R_P)linearconv1dconv2dconv3dconv_transpose1dconv_transpose2dconv_transpose3dsigmoidtanhrelu
leaky_reluselu)fan_infan_out)tensorab	generatorreturnc                 C   s<   t    | j|||dW  d    S 1 s.0    Y  d S Nr6   )torchno_gradr
   r3   r4   r5   r6    r=   ;/var/www/auris/lib/python3.9/site-packages/torch/nn/init.py_no_grad_uniform_D   s    
r?   )r3   meanstdr6   r7   c                 C   s<   t    | j|||dW  d    S 1 s.0    Y  d S r8   )r:   r;   r   r3   r@   rA   r6   r=   r=   r>   _no_grad_normal_K   s    
rC   )r3   r@   rA   r4   r5   r6   r7   c           	      C   s   t t ddd}||d|  k s0||d|  kr>tjddd t  ||| | }||| | }| jd| d d| d |d |   | |t	d	  | 
| | j||d
 | W  d    S 1 s0    Y  d S )N)xr7   c                 S   s   dt | t d  d S )N      ?       @)matherfsqrt)rD   r=   r=   r>   norm_cdf^   s    z(_no_grad_trunc_normal_.<locals>.norm_cdf   zjmean is more than 2 std from [a, b] in nn.init.trunc_normal_. The distribution of values may be incorrect.
stacklevel   r9   rF   )minmax)floatwarningswarnr:   r;   r
   Zerfinv_mul_rG   rI   Zadd_Zclamp_)	r3   r@   rA   r4   r5   r6   rJ   lur=   r=   r>   _no_grad_trunc_normal_U   s    	 
 
rW   )r3   valr7   c                 C   s6   t   | |W  d    S 1 s(0    Y  d S N)r:   r;   Zfill_r3   rX   r=   r=   r>   _no_grad_fill_   s    
r[   )r3   r7   c                 C   s4   t   |  W  d    S 1 s&0    Y  d S rY   )r:   r;   zero_r3   r=   r=   r>   _no_grad_zero_   s    
r^   )nonlinearityparamr7   c                 C   s   g d}| |v s| dkrdS | dkr(dS | dkr:t dS | dkr|d	u rPd
}n4t|tsdt|tsnt|trt|}ntd| dt dd|d   S | dkrdS td|  d	S )a  Return the recommended gain value for the given nonlinearity function.

    The values are as follows:

    ================= ====================================================
    nonlinearity      gain
    ================= ====================================================
    Linear / Identity :math:`1`
    Conv{1,2,3}D      :math:`1`
    Sigmoid           :math:`1`
    Tanh              :math:`\frac{5}{3}`
    ReLU              :math:`\sqrt{2}`
    Leaky Relu        :math:`\sqrt{\frac{2}{1 + \text{negative\_slope}^2}}`
    SELU              :math:`\frac{3}{4}`
    ================= ====================================================

    .. warning::
        In order to implement `Self-Normalizing Neural Networks`_ ,
        you should use ``nonlinearity='linear'`` instead of ``nonlinearity='selu'``.
        This gives the initial weights a variance of ``1 / N``,
        which is necessary to induce a stable fixed point in the forward pass.
        In contrast, the default gain for ``SELU`` sacrifices the normalization
        effect for more stable gradient flow in rectangular layers.

    Args:
        nonlinearity: the non-linear function (`nn.functional` name)
        param: optional parameter for the non-linear function

    Examples:
        >>> gain = nn.init.calculate_gain(
        ...     "leaky_relu", 0.2
        ... )  # leaky_relu with negative_slope=0.2

    .. _Self-Normalizing Neural Networks: https://papers.nips.cc/paper/2017/hash/5d44ee6f2c3f71b73125876103c8f6c4-Abstract.html
    )r%   r&   r'   r(   r)   r*   r+   r,   rN   r-   g?r.   rF   r/   N{Gz?znegative_slope z not a valid numberrK   r0   g      ?zUnsupported nonlinearity )rG   rI   
isinstanceboolintrQ   
ValueError)r_   r`   Z
linear_fnsZnegative_sloper=   r=   r>   r	      s.    &	
r	           rE   c                 C   s4   t j| r&t jjt| f| |||dS t| |||S )a  Fill the input Tensor with values drawn from the uniform distribution.

    :math:`\mathcal{U}(a, b)`.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        a: the lower bound of the uniform distribution
        b: the upper bound of the uniform distribution
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.uniform_(w)
    r<   )r:   	overrideshas_torch_function_variadichandle_torch_functionr
   r?   r<   r=   r=   r>   r
      s
    r
   c                 C   s4   t j| r&t jjt| f| |||dS t| |||S )a  Fill the input Tensor with values drawn from the normal distribution.

    :math:`\mathcal{N}(\text{mean}, \text{std}^2)`.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        mean: the mean of the normal distribution
        std: the standard deviation of the normal distribution
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.normal_(w)
    rB   )r:   rg   rh   ri   r   rC   rB   r=   r=   r>   r      s
    r          rF   c                 C   s   t | |||||dS )a  Fill the input Tensor with values drawn from a truncated normal distribution.

    The values are effectively drawn from the
    normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
    with values outside :math:`[a, b]` redrawn until they are within
    the bounds. The method used for generating the random values works
    best when :math:`a \leq \text{mean} \leq b`.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        mean: the mean of the normal distribution
        std: the standard deviation of the normal distribution
        a: the minimum cutoff value
        b: the maximum cutoff value
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.trunc_normal_(w)
    r9   )rW   )r3   r@   rA   r4   r5   r6   r=   r=   r>   r     s    r   c                 C   s,   t j| r"t jjt| f| |dS t| |S )zFill the input Tensor with the value :math:`\text{val}`.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        val: the value to fill the tensor with

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.constant_(w, 0.3)
    rZ   )r:   rg   rh   ri   r   r[   rZ   r=   r=   r>   r   *  s
    
r   c                 C   s
   t | dS )zFill the input Tensor with the scalar value `1`.

    Args:
        tensor: an n-dimensional `torch.Tensor`

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.ones_(w)
    rE   )r[   r]   r=   r=   r>   r   <  s    
r   c                 C   s   t | S )zFill the input Tensor with the scalar value `0`.

    Args:
        tensor: an n-dimensional `torch.Tensor`

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.zeros_(w)
    )r^   r]   r=   r=   r>   r   I  s    
r   c                 C   sV   |   dkrtdt & tj| j| | jd W d   n1 sH0    Y  | S )a=  Fill the 2-dimensional input `Tensor` with the identity matrix.

    Preserves the identity of the inputs in `Linear` layers, where as
    many inputs are preserved as possible.

    Args:
        tensor: a 2-dimensional `torch.Tensor`

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.eye_(w)
    rK   ,Only tensors with 2 dimensions are supported)outrequires_gradN)
ndimensionre   r:   r;   r   shaperm   r]   r=   r=   r>   r   V  s
    
4r   rN   )r3   groupsr7   c                 C   s<  |   }|dvrtd|  }|d | dkr8td|d | }t||d }t  |   t|D ]}t|D ]}|dkrd| || | || dd f< qx|dkrd| || | || dd | dd f< qxd| || | || dd | dd | dd f< qxqlW d	   n1 s.0    Y  | S )
aF  Fill the {3, 4, 5}-dimensional input `Tensor` with the Dirac delta function.

    Preserves the identity of the inputs in `Convolutional`
    layers, where as many input channels are preserved as possible. In case
    of groups>1, each group of channels preserves identity

    Args:
        tensor: a {3, 4, 5}-dimensional `torch.Tensor`
        groups (int, optional): number of groups in the conv layer (default: 1)
    Examples:
        >>> w = torch.empty(3, 16, 5, 5)
        >>> nn.init.dirac_(w)
        >>> w = torch.empty(3, 24, 5, 5)
        >>> nn.init.dirac_(w, 3)
    )         z5Only tensors with 3, 4, or 5 dimensions are supportedr   z!dim 0 must be divisible by groupsrN   rq   rK   rr   N)rn   re   sizerO   r:   r;   r\   range)r3   rp   
dimensionssizesZout_chans_per_grpZmin_dimgdr=   r=   r>   r   k  sB    
"

&r   c                 C   sp   |   }|dk rtd| d}| d}d}|   dkrX| jdd  D ]}||9 }qJ|| }|| }||fS )NrK   zNFan in and fan out can not be computed for tensor with fewer than 2 dimensionsrN   r   )Zdimre   rt   ro   )r3   rv   Znum_input_fmapsZnum_output_fmapsZreceptive_field_sizesr1   r2   r=   r=   r>   _calculate_fan_in_and_fan_out  s    


r{   )r3   gainr6   r7   c                 C   sD   t | \}}|tdt||   }td| }t| | ||S )a  Fill the input `Tensor` with values using a Xavier uniform distribution.

    The method is described in `Understanding the difficulty of training
    deep feedforward neural networks` - Glorot, X. & Bengio, Y. (2010).
    The resulting tensor will have values sampled from
    :math:`\mathcal{U}(-a, a)` where

    .. math::
        a = \text{gain} \times \sqrt{\frac{6}{\text{fan\_in} + \text{fan\_out}}}

    Also known as Glorot initialization.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        gain: an optional scaling factor
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.xavier_uniform_(w, gain=nn.init.calculate_gain("relu"))

    Note:
        Be aware that ``fan_in`` and ``fan_out`` are calculated assuming
        that the weight matrix is used in a transposed manner,
        (i.e., ``x @ w.T`` in ``Linear`` layers, where ``w.shape = [fan_out, fan_in]``).
        This is important for correct initialization.
        If you plan to use ``x @ w``, where ``w.shape = [fan_in, fan_out]``,
        pass in a transposed weight matrix, i.e. ``nn.init.xavier_uniform_(w.T, ...)``.
    rF         @)r{   rG   rI   rQ   r?   )r3   r|   r6   r1   r2   rA   r4   r=   r=   r>   r     s    "r   c                 C   s4   t | \}}|tdt||   }t| d||S )a  Fill the input `Tensor` with values using a Xavier normal distribution.

    The method is described in `Understanding the difficulty of training deep feedforward
    neural networks` - Glorot, X. & Bengio, Y. (2010). The resulting tensor
    will have values sampled from :math:`\mathcal{N}(0, \text{std}^2)` where

    .. math::
        \text{std} = \text{gain} \times \sqrt{\frac{2}{\text{fan\_in} + \text{fan\_out}}}

    Also known as Glorot initialization.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        gain: an optional scaling factor
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.xavier_normal_(w)

    Note:
        Be aware that ``fan_in`` and ``fan_out`` are calculated assuming
        that the weight matrix is used in a transposed manner,
        (i.e., ``x @ w.T`` in ``Linear`` layers, where ``w.shape = [fan_out, fan_in]``).
        This is important for correct initialization.
        If you plan to use ``x @ w``, where ``w.shape = [fan_in, fan_out]``,
        pass in a transposed weight matrix, i.e. ``nn.init.xavier_normal_(w.T, ...)``.
    rF   rf   )r{   rG   rI   rQ   rC   )r3   r|   r6   r1   r2   rA   r=   r=   r>   r     s    !r   )r3   moder7   c                 C   sH   |  }ddg}||vr,td| d| t| \}}|dkrD|S |S )Nr1   r2   zMode z" not supported, please use one of )lowerre   r{   )r3   r~   Zvalid_modesr1   r2   r=   r=   r>   _calculate_correct_fan  s    r   r1   r/   )r3   r4   r~   r_   r6   r7   c           	   	   C   s   t j| r(t jjt| f| ||||dS d| jv r@td | S t| |}t	||}|t
| }t
d| }t  " | j| ||dW  d   S 1 s0    Y  dS )a  Fill the input `Tensor` with values using a Kaiming uniform distribution.

    The method is described in `Delving deep into rectifiers: Surpassing
    human-level performance on ImageNet classification` - He, K. et al. (2015).
    The resulting tensor will have values sampled from
    :math:`\mathcal{U}(-\text{bound}, \text{bound})` where

    .. math::
        \text{bound} = \text{gain} \times \sqrt{\frac{3}{\text{fan\_mode}}}

    Also known as He initialization.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        a: the negative slope of the rectifier used after this layer (only
            used with ``'leaky_relu'``)
        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
            preserves the magnitude of the variance of the weights in the
            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
            backwards pass.
        nonlinearity: the non-linear function (`nn.functional` name),
            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.kaiming_uniform_(w, mode="fan_in", nonlinearity="relu")

    Note:
        Be aware that ``fan_in`` and ``fan_out`` are calculated assuming
        that the weight matrix is used in a transposed manner,
        (i.e., ``x @ w.T`` in ``Linear`` layers, where ``w.shape = [fan_out, fan_in]``).
        This is important for correct initialization.
        If you plan to use ``x @ w``, where ``w.shape = [fan_in, fan_out]``,
        pass in a transposed weight matrix, i.e. ``nn.init.kaiming_uniform_(w.T, ...)``.
    )r3   r4   r~   r_   r6   r   ,Initializing zero-element tensors is a no-opr}   r9   N)r:   rg   rh   ri   r   ro   rR   rS   r   r	   rG   rI   r;   r
   )	r3   r4   r~   r_   r6   fanr|   rA   boundr=   r=   r>   r     s&    +





r   c                 C   sv   d| j v rtd | S t| |}t||}|t| }t   | j	d||dW  d   S 1 sh0    Y  dS )a  Fill the input `Tensor` with values using a Kaiming normal distribution.

    The method is described in `Delving deep into rectifiers: Surpassing
    human-level performance on ImageNet classification` - He, K. et al. (2015).
    The resulting tensor will have values sampled from
    :math:`\mathcal{N}(0, \text{std}^2)` where

    .. math::
        \text{std} = \frac{\text{gain}}{\sqrt{\text{fan\_mode}}}

    Also known as He initialization.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        a: the negative slope of the rectifier used after this layer (only
            used with ``'leaky_relu'``)
        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
            preserves the magnitude of the variance of the weights in the
            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
            backwards pass.
        nonlinearity: the non-linear function (`nn.functional` name),
            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.kaiming_normal_(w, mode="fan_out", nonlinearity="relu")

    Note:
        Be aware that ``fan_in`` and ``fan_out`` are calculated assuming
        that the weight matrix is used in a transposed manner,
        (i.e., ``x @ w.T`` in ``Linear`` layers, where ``w.shape = [fan_out, fan_in]``).
        This is important for correct initialization.
        If you plan to use ``x @ w``, where ``w.shape = [fan_in, fan_out]``,
        pass in a transposed weight matrix, i.e. ``nn.init.kaiming_normal_(w.T, ...)``.
    r   r   r9   N)
ro   rR   rS   r   r	   rG   rI   r:   r;   r   )r3   r4   r~   r_   r6   r   r|   rA   r=   r=   r>   r   P  s    +




r   c           
      C   s   |   dk rtd|  dkr$| S | d}|  | }| ||fjdd|d}||k rd|  tj	|\}}t
|d}| }	||	9 }||k r|  t * | || | | W d   n1 s0    Y  | S )a  Fill the input `Tensor` with a (semi) orthogonal matrix.

    Described in `Exact solutions to the nonlinear dynamics of learning in deep
    linear neural networks` - Saxe, A. et al. (2013). The input tensor must have
    at least 2 dimensions, and for tensors with more than 2 dimensions the
    trailing dimensions are flattened.

    Args:
        tensor: an n-dimensional `torch.Tensor`, where :math:`n \geq 2`
        gain: optional scaling factor
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_LAPACK)
        >>> w = torch.empty(3, 5)
        >>> nn.init.orthogonal_(w)
    rK   z4Only tensors with 2 or more dimensions are supportedr   rN   r9   N)rn   re   Znumelrt   Z	new_emptyr   Zt_r:   ZlinalgZqrZdiagsignr;   Zview_asZcopy_rT   )
r3   r|   r6   rowscolsZ	flattenedqrry   phr=   r=   r>   r     s&    

(r   ra   )r3   sparsityrA   r6   r7   c           
      C   s   |   dkrtd| j\}}tt|| }t P | jd||d t	|D ]&}t
|}|d| }	d| |	|f< qRW d   n1 s0    Y  | S )a  Fill the 2D input `Tensor` as a sparse matrix.

    The non-zero elements will be drawn from the normal distribution
    :math:`\mathcal{N}(0, 0.01)`, as described in `Deep learning via
    Hessian-free optimization` - Martens, J. (2010).

    Args:
        tensor: an n-dimensional `torch.Tensor`
        sparsity: The fraction of elements in each column to be set to zero
        std: the standard deviation of the normal distribution used to generate
            the non-zero values
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.sparse_(w, sparsity=0.1)
    rK   rk   r   r9   N)rn   re   ro   rd   rG   ceilr:   r;   r   ru   Zrandperm)
r3   r   rA   r6   r   r   Z	num_zerosZcol_idxZrow_indicesZzero_indicesr=   r=   r>   r     s    


,r   )methr7   c                    sT    j d d tjtjtd fdd}d d d d|_|_ |S )	N)argskwargsr7   c                     s,   t jd d dtdd  | i |S )Nz	`nn.init.z)` is now deprecated in favor of `nn.init.z`.rK   rL   )rR   rS   FutureWarning)r   r   r   new_nameZold_namer=   r>   deprecated_init  s    z(_make_deprecate.<locals>.deprecated_initz
    z_(...)

    .. warning::
        This method is now deprecated in favor of :func:`torch.nn.init.z"`.

    See :func:`~torch.nn.init.z` for details.)__name__r$   r   r   r#   __doc__)r   r   r=   r   r>   _make_deprecate  s    
r   )N)N)N)N)rf   rE   N)rf   rE   N)rf   rE   rj   rF   N)rN   )rE   N)rE   N)r   r1   r/   N)r   r1   r/   N)rN   N)ra   N)9r   rG   rR   typingr   r   r   Z	_Optionalr   r   Ztyping_extensionsr   r:   r   __all__r#   r$   Z_NonlinearityTypeZ_FanModerQ   	Generatorr?   rC   rW   r[   r^   rd   r	   r
   r   r   r   r   r   r   r   tupler{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r=   r=   r=   r>   <module>   s2     , L           5  +  '    C    7  6  ' 