
    JThh^                        S r SSKrSSKrSSKJr  SSKrSSKJr  S'S jrS'S jr	S'S jr
S rS	 rS'S
 jr   S(S\S\S\S\\R                     S\4
S jjr   S(S\S\S\S\\R                     S\4
S jjr     S)S\S\S\S\S\S\\R                     S\4S jjrS\S\S\4S jrS\S\4S jrS\S\4S jrS rS*S jrS r  S+S\S\S\\R                     S\4S jjr  S+S\S\S\\R                     S\4S jjrS r    S,S\S\S \S!\S\\R                     4
S" jjr    S,S\S\S \S!\S\\R                     4
S# jjr  S-S\\R                     4S$ jjr  S.S\\R                     4S% jjr S& r!\!" \5      r"\!" \5      r#\!" \5      r$\!" \5      r%\!" \5      r&\!" \5      r'\!" \5      r(\!" \5      r)\!" \5      r*\!" \5      r+\!" \ 5      r,g)/zHThis file contains utilities for initializing neural network parameters.    N)Optional)Tensorc                     [         R                  " 5          U R                  XUS9sS S S 5        $ ! , (       d  f       g = fN	generator)torchno_graduniform_tensorabr   s       E/var/www/auris/envauris/lib/python3.13/site-packages/torch/nn/init.py_no_grad_uniform_r      s%    	qy9 
   0
>c                     [         R                  " 5          U R                  XUS9sS S S 5        $ ! , (       d  f       g = fr   )r	   r
   normal_r   meanstdr   s       r   _no_grad_normal_r      s%    	~~d9~= 
r   c                    S nXSU-  -
  :  d  XSU-  -   :  a  [         R                  " SSS9  [        R                  " 5          U" X1-
  U-  5      nU" XA-
  U-  5      nU R	                  SU-  S-
  SU-  S-
  US9  U R                  5         U R                  U[        R                  " S5      -  5        U R                  U5        U R                  X4S9  U sS S S 5        $ ! , (       d  f       g = f)	Nc                 h    S[         R                  " U [         R                  " S5      -  5      -   S-  $ )N      ?       @)matherfsqrt)xs    r   norm_cdf(_no_grad_trunc_normal_.<locals>.norm_cdf   s(    dhhq499S>122c99       zjmean is more than 2 std from [a, b] in nn.init.trunc_normal_. The distribution of values may be incorrect.
stacklevel   r   r   )minmax)warningswarnr	   r
   r   erfinv_mul_r   r   add_clamp_)	r   r   r   r   r   r   r!   lus	            r   _no_grad_trunc_normal_r2      s    : 	1s7{1s7{ 2;	
 
 ah#%&ah#%& 	A	1q519	B 	 	C$))C.()D 	!#+ 
s   BC
C-c                     [         R                  " 5          U R                  U5      sS S S 5        $ ! , (       d  f       g = fN)r	   r
   fill_r   vals     r   _no_grad_fill_r8   >   s!    	||C  
s   1
?c                     [         R                  " 5          U R                  5       sS S S 5        $ ! , (       d  f       g = fr4   )r	   r
   zero_r   s    r   _no_grad_zero_r<   C   s    	||~ 
r   c                    / SQnX;   d  U S:X  a  gU S:X  a  gU S:X  a  [         R                  " S5      $ U S:X  av  Uc  S	nOQ[        U[        5      (       d  [        U[        5      (       d  [        U[
        5      (       a  UnO[        S
U S35      e[         R                  " SSUS-  -   -  5      $ U S:X  a   g[        SU  35      e)av  Return the recommended gain value for the given nonlinearity function.

The values are as follows:

================= ====================================================
nonlinearity      gain
================= ====================================================
Linear / Identity :math:`1`
Conv{1,2,3}D      :math:`1`
Sigmoid           :math:`1`
Tanh              :math:`\frac{5}{3}`
ReLU              :math:`\sqrt{2}`
Leaky Relu        :math:`\sqrt{\frac{2}{1 + \text{negative\_slope}^2}}`
SELU              :math:`\frac{3}{4}`
================= ====================================================

.. warning::
    In order to implement `Self-Normalizing Neural Networks`_ ,
    you should use ``nonlinearity='linear'`` instead of ``nonlinearity='selu'``.
    This gives the initial weights a variance of ``1 / N``,
    which is necessary to induce a stable fixed point in the forward pass.
    In contrast, the default gain for ``SELU`` sacrifices the normalization
    effect for more stable gradient flow in rectangular layers.

Args:
    nonlinearity: the non-linear function (`nn.functional` name)
    param: optional parameter for the non-linear function

Examples:
    >>> gain = nn.init.calculate_gain('leaky_relu', 0.2)  # leaky_relu with negative_slope=0.2

.. _Self-Normalizing Neural Networks: https://papers.nips.cc/paper/2017/hash/5d44ee6f2c3f71b73125876103c8f6c4-Abstract.html
)linearconv1dconv2dconv3dconv_transpose1dconv_transpose2dconv_transpose3dsigmoidr'   tanhg?relur   
leaky_relu{Gz?znegative_slope z not a valid numberr$   selug      ?zUnsupported nonlinearity )r   r   
isinstanceboolintfloat
ValueError)nonlinearityparam
linear_fnsnegative_slopes       r   calculate_gainrT   H   s    DJ !\Y%>				yy~		%=!N5$''5#&&%'' #Nug5HIJJyyNA$5 5677			
 4\NCDDr#   r   r   r   r   returnc           	          [         R                  R                  U 5      (       a%  [         R                  R                  [        U 4XX#S9$ [        XX#5      $ )a  Fill the input Tensor with values drawn from the uniform distribution.

:math:`\mathcal{U}(a, b)`.

Args:
    tensor: an n-dimensional `torch.Tensor`
    a: the lower bound of the uniform distribution
    b: the upper bound of the uniform distribution
    generator: the torch Generator to sample from (default: None)

Examples:
    >>> w = torch.empty(3, 5)
    >>> nn.init.uniform_(w)
r   )r	   	overrideshas_torch_function_variadichandle_torch_functionr   r   r   s       r   r   r      sO    ( 226::44viq 5 
 	
 V55r#   r   r   c           	          [         R                  R                  U 5      (       a%  [         R                  R                  [        U 4XX#S9$ [        XX#5      $ )a  Fill the input Tensor with values drawn from the normal distribution.

:math:`\mathcal{N}(\text{mean}, \text{std}^2)`.

Args:
    tensor: an n-dimensional `torch.Tensor`
    mean: the mean of the normal distribution
    std: the standard deviation of the normal distribution
    generator: the torch Generator to sample from (default: None)

Examples:
    >>> w = torch.empty(3, 5)
    >>> nn.init.normal_(w)
r   )r	   rW   rX   rY   r   r   r   s       r   r   r      sO    ( 226::44fYvc 5 
 	
 F#99r#   c           	          [        XX#XES9$ )a  Fill the input Tensor with values drawn from a truncated normal distribution.

The values are effectively drawn from the
normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
with values outside :math:`[a, b]` redrawn until they are within
the bounds. The method used for generating the random values works
best when :math:`a \leq \text{mean} \leq b`.

Args:
    tensor: an n-dimensional `torch.Tensor`
    mean: the mean of the normal distribution
    std: the standard deviation of the normal distribution
    a: the minimum cutoff value
    b: the maximum cutoff value
    generator: the torch Generator to sample from (default: None)

Examples:
    >>> w = torch.empty(3, 5)
    >>> nn.init.trunc_normal_(w)
r   )r2   )r   r   r   r   r   r   s         r   trunc_normal_r\      s    8 "&OOr#   r7   c                     [         R                  R                  U 5      (       a$  [         R                  R                  [        U 4XS9$ [        X5      $ )zFill the input Tensor with the value :math:`\text{val}`.

Args:
    tensor: an n-dimensional `torch.Tensor`
    val: the value to fill the tensor with

Examples:
    >>> w = torch.empty(3, 5)
    >>> nn.init.constant_(w, 0.3)
r6   )r	   rW   rX   rY   	constant_r8   r6   s     r   r^   r^      sK     226::44y 5 
 	
 &&&r#   c                     [        U S5      $ )zFill the input Tensor with the scalar value `1`.

Args:
    tensor: an n-dimensional `torch.Tensor`

Examples:
    >>> w = torch.empty(3, 5)
    >>> nn.init.ones_(w)
r   )r8   r;   s    r   ones_r`      s     &#&&r#   c                     [        U 5      $ )zFill the input Tensor with the scalar value `0`.

Args:
    tensor: an n-dimensional `torch.Tensor`

Examples:
    >>> w = torch.empty(3, 5)
    >>> nn.init.zeros_(w)
)r<   r;   s    r   zeros_rb     s     &!!r#   c                     U R                  5       S:w  a  [        S5      e[        R                  " 5          [        R                  " U R
                  X R                  S.6  SSS5        U $ ! , (       d  f       U $ = f)a  Fill the 2-dimensional input `Tensor` with the identity matrix.

Preserves the identity of the inputs in `Linear` layers, where as
many inputs are preserved as possible.

Args:
    tensor: a 2-dimensional `torch.Tensor`

Examples:
    >>> w = torch.empty(3, 5)
    >>> nn.init.eye_(w)
r$   ,Only tensors with 2 dimensions are supported)outrequires_gradN)
ndimensionrO   r	   r
   eyeshaperf   r;   s    r   eye_rj     sY     aGHH			6<<V;O;OP 
M 
Ms   +A**
A9c                    U R                  5       nUS;  a  [        S5      eU R                  5       nUS   U-  S:w  a  [        S5      eUS   U-  n[        XCS   5      n[        R
                  " 5          U R                  5         [        U5       H  n[        U5       H  nUS:X  a   SXU-  U-   XpR                  S5      S-  4'   M)  US:X  a4  SU Xd-  U-   UU R                  S5      S-  U R                  S5      S-  4'   Mc  SU Xd-  U-   UU R                  S5      S-  U R                  S5      S-  U R                  S5      S-  4'   M     M     S	S	S	5        U $ ! , (       d  f       U $ = f)
a  Fill the {3, 4, 5}-dimensional input `Tensor` with the Dirac delta function.

Preserves the identity of the inputs in `Convolutional`
layers, where as many input channels are preserved as possible. In case
of groups>1, each group of channels preserves identity

Args:
    tensor: a {3, 4, 5}-dimensional `torch.Tensor`
    groups (int, optional): number of groups in the conv layer (default: 1)
Examples:
    >>> w = torch.empty(3, 16, 5, 5)
    >>> nn.init.dirac_(w)
    >>> w = torch.empty(3, 24, 5, 5)
    >>> nn.init.dirac_(w, 3)
)         z5Only tensors with 3, 4, or 5 dimensions are supportedr   z!dim 0 must be divisible by groupsr'   rl   r$   rm   N)rg   rO   sizer(   r	   r
   r:   range)r   groups
dimensionssizesout_chans_per_grpmin_dimgds           r   dirac_rx   $  s|     ""$J"PQQKKMEQx&A<==aF*#1X.G	vA7^?PQF0014aQ19LLM1_  -1A!+A!+-  -1A!+A!+A!+	- $  
, M- 
, Ms   4CE
E'c                     U R                  5       nUS:  a  [        S5      eU R                  S5      nU R                  S5      nSnU R                  5       S:  a  U R                  SS   H  nXE-  nM	     X$-  nX4-  nXg4$ )Nr$   zNFan in and fan out can not be computed for tensor with fewer than 2 dimensionsr'   r   )dimrO   ro   ri   )r   rr   num_input_fmapsnum_output_fmapsreceptive_field_sizesfan_infan_outs           r   _calculate_fan_in_and_fan_outr   Y  s    JA~\
 	
 kk!nO{{1~zz|a ab!A %  "3F5G?r#   gainc                     [        U 5      u  p4U[        R                  " S[        X4-   5      -  5      -  n[        R                  " S5      U-  n[	        X* Xb5      $ )a[  Fill the input `Tensor` with values using a Xavier uniform distribution.

The method is described in `Understanding the difficulty of training
deep feedforward neural networks` - Glorot, X. & Bengio, Y. (2010).
The resulting tensor will have values sampled from
:math:`\mathcal{U}(-a, a)` where

.. math::
    a = \text{gain} \times \sqrt{\frac{6}{\text{fan\_in} + \text{fan\_out}}}

Also known as Glorot initialization.

Args:
    tensor: an n-dimensional `torch.Tensor`
    gain: an optional scaling factor
    generator: the torch Generator to sample from (default: None)

Examples:
    >>> w = torch.empty(3, 5)
    >>> nn.init.xavier_uniform_(w, gain=nn.init.calculate_gain('relu'))

Note:
    Be aware that ``fan_in`` and ``fan_out`` are calculated assuming
    that the weight matrix is used in a transposed manner,
    (i.e., ``x @ w.T`` in ``Linear`` layers, where ``w.shape = [fan_out, fan_in]``).
    This is important for correct initialization.
    If you plan to use ``x @ w``, where ``w.shape = [fan_in, fan_out]``,
    pass in a transposed weight matrix, i.e. ``nn.init.xavier_uniform_(w.T, ...)``.
r         @)r   r   r   rN   r   )r   r   r   r   r   r   r   s          r   xavier_uniform_r   n  sR    D 4F;OF
3v'7!889
9C		#AVR66r#   c                     [        U 5      u  p4U[        R                  " S[        X4-   5      -  5      -  n[	        U SXR5      $ )aF  Fill the input `Tensor` with values using a Xavier normal distribution.

The method is described in `Understanding the difficulty of training deep feedforward
neural networks` - Glorot, X. & Bengio, Y. (2010). The resulting tensor
will have values sampled from :math:`\mathcal{N}(0, \text{std}^2)` where

.. math::
    \text{std} = \text{gain} \times \sqrt{\frac{2}{\text{fan\_in} + \text{fan\_out}}}

Also known as Glorot initialization.

Args:
    tensor: an n-dimensional `torch.Tensor`
    gain: an optional scaling factor
    generator: the torch Generator to sample from (default: None)

Examples:
    >>> w = torch.empty(3, 5)
    >>> nn.init.xavier_normal_(w)

Note:
    Be aware that ``fan_in`` and ``fan_out`` are calculated assuming
    that the weight matrix is used in a transposed manner,
    (i.e., ``x @ w.T`` in ``Linear`` layers, where ``w.shape = [fan_out, fan_in]``).
    This is important for correct initialization.
    If you plan to use ``x @ w``, where ``w.shape = [fan_in, fan_out]``,
    pass in a transposed weight matrix, i.e. ``nn.init.xavier_normal_(w.T, ...)``.
r           )r   r   r   rN   r   )r   r   r   r   r   r   s         r   xavier_normal_r     s@    B 4F;OF
3v'7!889
9CFC88r#   c                     UR                  5       nSS/nX;  a  [        SU SU 35      e[        U 5      u  p4US:X  a  U$ U$ )Nr   r   zMode z" not supported, please use one of )lowerrO   r   )r   modevalid_modesr   r   s        r   _calculate_correct_fanr     sT    ::<DY'K5&HVWW3F;OFX%6272r#   r   rP   c           
         [         R                  R                  U 5      (       a(  [         R                  R                  [        U 4U UUUUS9$ SU R
                  ;   a  [        R                  " S5        U $ [        X5      n[        X15      nU[        R                  " U5      -  n[        R                  " S5      U-  n[         R                  " 5          U R                  U* XS9sSSS5        $ ! , (       d  f       g= f)aD  Fill the input `Tensor` with values using a Kaiming uniform distribution.

The method is described in `Delving deep into rectifiers: Surpassing
human-level performance on ImageNet classification` - He, K. et al. (2015).
The resulting tensor will have values sampled from
:math:`\mathcal{U}(-\text{bound}, \text{bound})` where

.. math::
    \text{bound} = \text{gain} \times \sqrt{\frac{3}{\text{fan\_mode}}}

Also known as He initialization.

Args:
    tensor: an n-dimensional `torch.Tensor`
    a: the negative slope of the rectifier used after this layer (only
        used with ``'leaky_relu'``)
    mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
        preserves the magnitude of the variance of the weights in the
        forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
        backwards pass.
    nonlinearity: the non-linear function (`nn.functional` name),
        recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
    generator: the torch Generator to sample from (default: None)

Examples:
    >>> w = torch.empty(3, 5)
    >>> nn.init.kaiming_uniform_(w, mode='fan_in', nonlinearity='relu')

Note:
    Be aware that ``fan_in`` and ``fan_out`` are calculated assuming
    that the weight matrix is used in a transposed manner,
    (i.e., ``x @ w.T`` in ``Linear`` layers, where ``w.shape = [fan_out, fan_in]``).
    This is important for correct initialization.
    If you plan to use ``x @ w``, where ``w.shape = [fan_in, fan_out]``,
    pass in a transposed weight matrix, i.e. ``nn.init.kaiming_uniform_(w.T, ...)``.
)r   r   r   rP   r   r   ,Initializing zero-element tensors is a no-opr   r   N)r	   rW   rX   rY   kaiming_uniform_ri   r*   r+   r   rT   r   r   r
   r   )	r   r   r   rP   r   fanr   r   bounds	            r   r   r     s    V 226::44I% 5 
 	
 	FLLDE
 
.C,*D
3
CIIcNS E	vuB 
s   C--
C;c                 0   SU R                   ;   a  [        R                  " S5        U $ [        X5      n[	        X15      nU[
        R                  " U5      -  n[        R                  " 5          U R                  SXtS9sSSS5        $ ! , (       d  f       g= f)a+  Fill the input `Tensor` with values using a Kaiming normal distribution.

The method is described in `Delving deep into rectifiers: Surpassing
human-level performance on ImageNet classification` - He, K. et al. (2015).
The resulting tensor will have values sampled from
:math:`\mathcal{N}(0, \text{std}^2)` where

.. math::
    \text{std} = \frac{\text{gain}}{\sqrt{\text{fan\_mode}}}

Also known as He initialization.

Args:
    tensor: an n-dimensional `torch.Tensor`
    a: the negative slope of the rectifier used after this layer (only
        used with ``'leaky_relu'``)
    mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
        preserves the magnitude of the variance of the weights in the
        forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
        backwards pass.
    nonlinearity: the non-linear function (`nn.functional` name),
        recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
    generator: the torch Generator to sample from (default: None)

Examples:
    >>> w = torch.empty(3, 5)
    >>> nn.init.kaiming_normal_(w, mode='fan_out', nonlinearity='relu')

Note:
    Be aware that ``fan_in`` and ``fan_out`` are calculated assuming
    that the weight matrix is used in a transposed manner,
    (i.e., ``x @ w.T`` in ``Linear`` layers, where ``w.shape = [fan_out, fan_in]``).
    This is important for correct initialization.
    If you plan to use ``x @ w``, where ``w.shape = [fan_in, fan_out]``,
    pass in a transposed weight matrix, i.e. ``nn.init.kaiming_normal_(w.T, ...)``.
r   r   r   N)
ri   r*   r+   r   rT   r   r   r	   r
   r   )r   r   r   rP   r   r   r   r   s           r   kaiming_normal_r   	  sl    V 	FLLDE
 
.C,*D
3
C	~~a~: 
s   -B
Bc                    U R                  5       S:  a  [        S5      eU R                  5       S:X  a  U $ U R                  S5      nU R                  5       U-  nU R	                  X445      R                  SSUS9nX4:  a  UR                  5         [        R                  R                  U5      u  pg[        R                  " US5      nUR                  5       n	Xi-  nX4:  a  UR                  5         [        R                  " 5          U R                  U5      R                  U5        U R                  U5        SSS5        U $ ! , (       d  f       U $ = f)al  Fill the input `Tensor` with a (semi) orthogonal matrix.

Described in `Exact solutions to the nonlinear dynamics of learning in deep
linear neural networks` - Saxe, A. et al. (2013). The input tensor must have
at least 2 dimensions, and for tensors with more than 2 dimensions the
trailing dimensions are flattened.

Args:
    tensor: an n-dimensional `torch.Tensor`, where :math:`n \geq 2`
    gain: optional scaling factor
    generator: the torch Generator to sample from (default: None)

Examples:
    >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_LAPACK)
    >>> w = torch.empty(3, 5)
    >>> nn.init.orthogonal_(w)
r$   z4Only tensors with 2 or more dimensions are supportedr   r'   r   N)rg   rO   numelro   	new_emptyr   t_r	   linalgqrdiagsignr
   view_ascopy_r-   )
r   r   r   rowscols	flattenedqrrw   phs
             r   orthogonal_r   >  s   , QOPP||~;;q>D<<>T!D  $.66q!y6QI{ <<??9%DA

1aA	
BGA{		q"D 
 M 
 Ms   2E
Ec                    U R                  5       S:w  a  [        S5      eU R                  u  pE[        [        R
                  " X-  5      5      n[        R                  " 5          U R                  SX#S9  [        U5       H$  n[        R                  " U5      nUSU n	SX	U4'   M&     SSS5        U $ ! , (       d  f       U $ = f)aZ  Fill the 2D input `Tensor` as a sparse matrix.

The non-zero elements will be drawn from the normal distribution
:math:`\mathcal{N}(0, 0.01)`, as described in `Deep learning via
Hessian-free optimization` - Martens, J. (2010).

Args:
    tensor: an n-dimensional `torch.Tensor`
    sparsity: The fraction of elements in each column to be set to zero
    std: the standard deviation of the normal distribution used to generate
        the non-zero values
    generator: the torch Generator to sample from (default: None)

Examples:
    >>> w = torch.empty(3, 5)
    >>> nn.init.sparse_(w, sparsity=0.1)
r$   rd   r   r   N)rg   rO   ri   rM   r   ceilr	   r
   r   rp   randperm)
r   sparsityr   r   r   r   	num_zeroscol_idxrow_indiceszero_indicess
             r   sparse_r   q  s    . aGHHJDDIIho./I	q#3T{G...K&z	2L,-F() # 
 M 
 Ms   $AB22
Cc                 n   ^ ^^ T R                   mTS S mU UU4S jnST ST ST S3Ul        TUl         U$ )Nc                  V   > [         R                  " ST ST S3[        SS9  T" U 0 UD6$ )Nz	`nn.init.z)` is now deprecated in favor of `nn.init.z`.r$   r%   )r*   r+   FutureWarning)argskwargsmethnew_nameold_names     r   deprecated_init(_make_deprecate.<locals>.deprecated_init  s;    z!J8*TVW	

 T$V$$r#   z
    z_(...)

    .. warning::
        This method is now deprecated in favor of :func:`torch.nn.init.z"`.

    See :func:`~torch.nn.init.z` for details.)__name____doc__)r   r   r   r   s   ` @@r   _make_deprecater     s\    }}H}H%$J H IQz R'j:O  (Or#   r4   )r   r   N)r   r   g       r   N)r'   )r   N)r   r   rH   N)r'   N)rI   N)-r   r   r*   typingr   	_Optionalr	   r   r   r   r2   r8   r<   rT   rN   	Generatorr   r   r\   r^   r`   rb   rj   rx   r   r   r   r   strr   r   r   r   r   uniformnormalconstantrh   diracxavier_uniformxavier_normalkaiming_uniformkaiming_normal
orthogonalsparse r#   r   <module>r      sO   N   (  :
>
"J!

CEP ,0	666 6 )	6
 6: ,0	::
: 
: )	:
 :: ,0PP
P 
P 	P
 P )P P>'f '5 'V '$
'& 
'V 
'
"6 
"f 
"*2j. ,0&7&7
&7 )&7 	&7V ,0$9$9
$9 )$9 	$9N3 $,0>C>C>C >C 	>C
 )>CF $,02;2;2; 2; 	2;
 )2;n 
,00 )0l 	,0	# )	#N. (
#		!9%d 1/!"23 1[)
		!r#   