
    JThBd                     P   S SK JrJr  S SKJr  S SKrS SKJs  Jr	  S SKJ
r
  S SKJr  S SKJr  / SQrSS jrS	 r " S
 S\5      r " S S\5      r  S SS.S\S\S\\   S\S\4
S jjjr " S S\5      rS!S\S\S\4S jjr " S S\5      r    S"S\S\S\S\S\\   S\4S jjrg)#    )autoEnum)OptionalN)Tensor)Module)parametrize)
orthogonalspectral_normweight_normc                 :   U R                  S5      U R                  S5      p2[        R                  " X0R                  U R                  S9nSU-  [        R
                  " U R                  5      R                  -  n[        R                  " U R                  U -  XAS9$ )Ndtypedeviceg      $@)atol)	sizetorcheyer   r   finfoepsallclosemH)Qr   nkIds        W/var/www/auris/envauris/lib/python3.13/site-packages/torch/nn/utils/parametrizations.py_is_orthogonalr      sk    66":qvvbzq	1GGAHH	5B
(U[[)--
-C>>!$$(B11    c                     [         R                  " U 5      u  p[         R                  R                  X5      nX1R	                  SSS9R                  5       R                  S5      -  nU$ )z{Assume that A is a tall matrix.

Compute the Q factor s.t. A = QR (A may be complex) and diag(R) is real and non-negative.
r   r   dim1dim2)r   geqrflinalghouseholder_productdiagonalsgn	unsqueeze)AXtaur   s       r   _make_orthogonalr.      sU    
 [[^FA((0A"	%	)	)	+	5	5b	99AHr    c                   >    \ rS rSr\" 5       r\" 5       r\" 5       rSrg)	_OrthMaps#    N)	__name__
__module____qualname____firstlineno__r   
matrix_expcayleyhouseholder__static_attributes__r2   r    r   r0   r0   #   s    JVF&Kr    r0   c                      ^  \ rS rSr% \\S'   SS.S\SS4U 4S jjjrS	\R                  S\R                  4S
 jr	\R                  R                  5       S\R                  S\R                  4S j5       rSrU =r$ )_Orthogonal)   baseTuse_trivializationorthogonal_mapreturnNc                   > [         TU ]  5         UR                  5       (       a  U[        R                  :X  a  [        S5      eUR                  U l        X l        U(       a  U R                  SS 5        g g )NzAThe householder parametrization does not support complex tensors.r>   )	super__init__
is_complexr0   r9   
ValueErrorshaperA   register_buffer)selfweightrA   r@   	__class__s       r   rE   _Orthogonal.__init__,   si     	 >Y5J5J#JS  \\
,  . r    r,   c           	         UR                  S5      UR                  S5      p2X#:  nU(       a  UR                  nX2p2U R                  [        R                  :X  d  U R                  [        R
                  :X  Ga=  UR                  5       nX#:w  aI  [        R                  " XR                  X"U-
  5      R                  " / UR                  S S QSPSP76 /SS9nXR                  -
  nU R                  [        R                  :X  a  [        R                  " U5      nOU R                  [        R
                  :X  ao  [        R                  " X%R                  UR                  S9n[        R                   R#                  [        R$                  " XuSS9[        R$                  " XuSS95      nX#:w  a	  WSS U24   nOtUR                  SS	9nS
SXU-  R'                  SS9-   -  n[        R                   R)                  XX5      nXaR+                  SSS9R-                  5       R/                  S5      -  n[1        U S5      (       a  U R2                  W-  nU(       a  WR                  nW$ )Nr   r   dimr   g      )alphag      ?.)r(   g       @g      ?r"   r>   )r   mTrA   r0   r7   r8   trilr   cat	new_zerosexpandrH   r   r   r   r   r&   solveaddsumr'   r(   intr*   hasattrr>   )	rJ   r,   r   r   
transposedr+   r   r   r-   s	            r   forward_Orthogonal.forwardG   s   vvbz166":1U
Aq 9#7#77""i&6&66 AvIIA1u-44KaggcrlKBKKLRT DDA""i&:&::$$Q'$$	(8(88YYqALL&&IIb40%))B2M vc2A2gJ #A{{r{223C008A JJBRJ0446@@DDA4  		AAAr    r   c                 "   UR                   U R                   :w  a&  [        SU R                    SUR                    S35      eUnUR                  S5      UR                  S5      pCX4:  nU(       a  UR                  nXCpC[	        U S5      (       d  U R
                  [        R                  :X  d  U R
                  [        R                  :X  a  [        S5      e[        R                  " U5      u  pgUR                  SSS9R                  5         UR                  SSS9US	:H  ==   S-  ss'   U(       a  UR                  $ U$ X4:X  a-  [        U5      (       d  [        U5      nOuUR!                  5       nOd[        R"                  " UR                  5       S S X3U-
  4-   UR$                  UR&                  S
.6n[        R(                  " X/SS9n[        U5      nXl        [        R,                  " U5      n	U	R                  SSS9R/                  S5        U	$ )Nz0Expected a matrix or batch of matrices of shape z. Got a tensor of shape .r   r   r>   ztIt is not possible to assign to the matrix exponential or the Cayley parametrizations when use_trivialization=False.r"   g        r   rO   g      )rH   rG   r   rR   r[   rA   r0   r8   r7   NotImplementedErrorr   r%   r(   sign_r   r.   clonerandnr   r   rT   r>   
zeros_likefill_)
rJ   r   Q_initr   r   	transposer+   r-   Nneg_Ids
             r   right_inverse_Orthogonal.right_inversev   s   77djj B4::, O))*	4 
 vvbz166":1E	Aq tV$$ ##y'7'77&&)*>*>>)T  [[^FA JJBRJ(..0 JJBRJ(4:4$144+!+v%a(((+A	A KKffhsmqa%j0 IIqf"-$Q'I %%f-FOO"O-33D9Mr    )r>   rA   rH   )r3   r4   r5   r6   r   __annotations__r0   rE   r   r]   autogradno_gradrk   r:   __classcell__rL   s   @r   r<   r<   )   s    
L HL/&//	/ /6- -%,, -^ ^^Eu|| E E Er    r<   Tr?   modulenamerA   r@   rB   c                   [        XS5      n[        U[        5      (       d  [        SU  SU S35      eUR                  S:  a  [        SUR                   S35      eUc=  UR                  S5      UR                  S	5      :X  d  UR                  5       (       a  S
OSn[        [        US5      nUc  [        SU 35      e[        XEUS9n[        R                  " XUSS9  U $ )a  Apply an orthogonal or unitary parametrization to a matrix or a batch of matrices.

Letting :math:`\mathbb{K}` be :math:`\mathbb{R}` or :math:`\mathbb{C}`, the parametrized
matrix :math:`Q \in \mathbb{K}^{m \times n}` is **orthogonal** as

.. math::

    \begin{align*}
        Q^{\text{H}}Q &= \mathrm{I}_n \mathrlap{\qquad \text{if }m \geq n}\\
        QQ^{\text{H}} &= \mathrm{I}_m \mathrlap{\qquad \text{if }m < n}
    \end{align*}

where :math:`Q^{\text{H}}` is the conjugate transpose when :math:`Q` is complex
and the transpose when :math:`Q` is real-valued, and
:math:`\mathrm{I}_n` is the `n`-dimensional identity matrix.
In plain words, :math:`Q` will have orthonormal columns whenever :math:`m \geq n`
and orthonormal rows otherwise.

If the tensor has more than two dimensions, we consider it as a batch of matrices of shape `(..., m, n)`.

The matrix :math:`Q` may be parametrized via three different ``orthogonal_map`` in terms of the original tensor:

- ``"matrix_exp"``/``"cayley"``:
  the :func:`~torch.matrix_exp` :math:`Q = \exp(A)` and the `Cayley map`_
  :math:`Q = (\mathrm{I}_n + A/2)(\mathrm{I}_n - A/2)^{-1}` are applied to a skew-symmetric
  :math:`A` to give an orthogonal matrix.
- ``"householder"``: computes a product of Householder reflectors
  (:func:`~torch.linalg.householder_product`).

``"matrix_exp"``/``"cayley"`` often make the parametrized weight converge faster than
``"householder"``, but they are slower to compute for very thin or very wide matrices.

If ``use_trivialization=True`` (default), the parametrization implements the "Dynamic Trivialization Framework",
where an extra matrix :math:`B \in \mathbb{K}^{n \times n}` is stored under
``module.parametrizations.weight[0].base``. This helps the
convergence of the parametrized layer at the expense of some extra memory use.
See `Trivializations for Gradient-Based Optimization on Manifolds`_ .

Initial value of :math:`Q`:
If the original tensor is not parametrized and ``use_trivialization=True`` (default), the initial value
of :math:`Q` is that of the original tensor if it is orthogonal (or unitary in the complex case)
and it is orthogonalized via the QR decomposition otherwise (see :func:`torch.linalg.qr`).
Same happens when it is not parametrized and ``orthogonal_map="householder"`` even when ``use_trivialization=False``.
Otherwise, the initial value is the result of the composition of all the registered
parametrizations applied to the original tensor.

.. note::
    This function is implemented using the parametrization functionality
    in :func:`~torch.nn.utils.parametrize.register_parametrization`.


.. _`Cayley map`: https://en.wikipedia.org/wiki/Cayley_transform#Matrix_map
.. _`Trivializations for Gradient-Based Optimization on Manifolds`: https://arxiv.org/abs/1909.09501

Args:
    module (nn.Module): module on which to register the parametrization.
    name (str, optional): name of the tensor to make orthogonal. Default: ``"weight"``.
    orthogonal_map (str, optional): One of the following: ``"matrix_exp"``, ``"cayley"``, ``"householder"``.
        Default: ``"matrix_exp"`` if the matrix is square or complex, ``"householder"`` otherwise.
    use_trivialization (bool, optional): whether to use the dynamic trivialization framework.
        Default: ``True``.

Returns:
    The original module with an orthogonal parametrization registered to the specified
    weight

Example::

    >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_LAPACK)
    >>> orth_linear = orthogonal(nn.Linear(20, 40))
    >>> orth_linear
    ParametrizedLinear(
    in_features=20, out_features=40, bias=True
    (parametrizations): ModuleDict(
        (weight): ParametrizationList(
        (0): _Orthogonal()
        )
    )
    )
    >>> # xdoctest: +IGNORE_WANT
    >>> Q = orth_linear.weight
    >>> torch.dist(Q.T @ Q, torch.eye(20))
    tensor(4.9332e-07)
NModule '(' has no parameter or buffer with name ''   z8Expected a matrix or batch of matrices. Got a tensor of z dimensions.r   r   r7   r9   zLorthogonal_map has to be one of "matrix_exp", "cayley", "householder". Got: r?   Tunsafe)getattr
isinstancer   rG   ndimr   rF   r0   r<   r   register_parametrization)rr   rs   rA   r@   rK   	orth_enumorths          r   r	   r	      s   v V4(Fff%%vhFtfAN
 	
 {{Q%{{m<9
 	

  {{2&++b/1V5F5F5H5H  	 	>48I"#%
 	
 v=OPD((tDIMr    c                   L   ^  \ rS rSr S	S\\   SS4U 4S jjjrS rS rSr	U =r
$ )
_WeightNormi:  rP   rB   Nc                 8   > [         TU ]  5         Uc  SnXl        g )Nr   )rD   rE   rP   )rJ   rP   rL   s     r   rE   _WeightNorm.__init__;  s     	;Cr    c                 D    [         R                  " X!U R                  5      $ N)r   _weight_normrP   )rJ   weight_gweight_vs      r   r]   _WeightNorm.forwardD  s    !!(dhh??r    c                 P    [         R                  " USU R                  5      nUnX#4$ )Nrx   )r   norm_except_dimrP   )rJ   rK   r   r   s       r   rk   _WeightNorm.right_inverseG  s(    ((DHH=!!r    rO   )r   )r3   r4   r5   r6   r   rZ   rE   r]   rk   r:   rp   rq   s   @r   r   r   :  s8     c] 
 @" "r    r   rP   c                 z   ^ [        U5      n[        R                  " U TUSS9  U4S jnU R                  U5        U $ )a  Apply weight normalization to a parameter in the given module.

.. math::
     \mathbf{w} = g \dfrac{\mathbf{v}}{\|\mathbf{v}\|}

Weight normalization is a reparameterization that decouples the magnitude
of a weight tensor from its direction. This replaces the parameter specified
by :attr:`name` with two parameters: one specifying the magnitude
and one specifying the direction.

By default, with ``dim=0``, the norm is computed independently per output
channel/plane. To compute a norm over the entire weight tensor, use
``dim=None``.

See https://arxiv.org/abs/1602.07868

Args:
    module (Module): containing module
    name (str, optional): name of weight parameter
    dim (int, optional): dimension over which to compute the norm

Returns:
    The original module with the weight norm hook

Example::

    >>> m = weight_norm(nn.Linear(20, 40), name='weight')
    >>> m
    ParametrizedLinear(
      in_features=20, out_features=40, bias=True
      (parametrizations): ModuleDict(
        (weight): ParametrizationList(
          (0): _WeightNorm()
        )
      )
    )
    >>> m.parametrizations.weight.original0.size()
    torch.Size([40, 1])
    >>> m.parametrizations.weight.original1.size()
    torch.Size([40, 20])

Try   c                    > U T S3nU T S3nXp;   a=  X;   a7  U R                  U5      n	U R                  U5      n
XU ST S3'   XU ST S3'   g g g )N_g_vzparametrizations.z
.original0z
.original1)pop)
state_dictprefixlocal_metadatastrictmissing_keysunexpected_keys
error_msgsg_keyv_key	original0	original1rs   s              r   _weight_norm_compat_hook-weight_norm.<locals>._weight_norm_compat_hook|  s     (4&#(4&#5#6"u-I"u-IGP&!24&
CDGP&!24&
CD	 $7r    )r   r   r~   "_register_load_state_dict_pre_hook)rr   rs   rP   r   r   s    `   r   r   r   N  s@    V s#L((|DQQ" --.FGMr    c                   p  ^  \ rS rSr   SS\R
                  S\S\S\SS4
U 4S jjjrS\R
                  S\R
                  4S	 jr	\R                  R                  5       S
\R
                  S\SS4S j5       rS\R
                  S\R
                  4S jrS\R
                  S\R
                  4S jrSrU =r$ )_SpectralNormi  rK   n_power_iterationsrP   r   rB   Nc           	        > [         TU ]  5         UR                  nX5:  d  X5* :  a  [        SU SUS-
   SU S35      eUS::  a  [	        SU 35      eUS:  a  UOX5-   U l        X@l        US:  a  X l        U R                  U5      nUR                  5       u  pxUR                  U5      R                  SS5      n	UR                  U5      R                  SS5      n
U R                  S[        R                  " U	SU R                  S	95        U R                  S
[        R                  " U
SU R                  S	95        U R                  US5        g g )Nz5Dimension out of range (expected to be in range of [-z,    z
] but got )r   zGExpected n_power_iterations to be positive, but got n_power_iterations=_urP   r   r      )rD   rE   r}   
IndexErrorrG   rP   r   r   _reshape_weight_to_matrixr   	new_emptynormal_rI   F	normalize_power_method)rJ   rK   r   rP   r   r}   
weight_mathwuvrL   s              r   rE   _SpectralNorm.__init__  sO    	{{;#+F"TAXJjQ8 
 "**<)=?  (3
!8&8#77?J??$DA$$Q'//15A$$Q'//15A  q{{1!'JK  q{{1!'JK z2. r    c                    ^  UR                   S:  d   eT R                  S:w  a>  UR                  " T R                  /U 4S j[        UR                  5       5       5       Q76 nUR	                  S5      $ )Nr   r   c              3   J   >#    U  H  oTR                   :w  d  M  Uv   M     g 7fr   rO   ).0drJ   s     r   	<genexpr>:_SpectralNorm._reshape_weight_to_matrix.<locals>.<genexpr>  s     L':!488mAA':s   #	#)r}   rP   permuterangeflatten)rJ   rK   s   ` r   r   '_SpectralNorm._reshape_weight_to_matrix  s\    {{Q88q=^^LuVZZ\':LF ~~a  r    r   c                    UR                   S:  d   e[        U5       H  n[        R                  " [        R
                  " XR                  5      SU R                  U R                  S9U l        [        R                  " [        R
                  " UR                  U R                  5      SU R                  U R                  S9U l        M     g )Nr   r   )rP   r   out)
r}   r   r   r   r   mvr   r   r   H)rJ   r   r   _s       r   r   _SpectralNorm._power_method  s    D """)*A kkWW-HHGG	DG kktww/HHGG	DG +r    c                    UR                   S:X  a   [        R                  " USU R                  S9$ U R	                  U5      nU R
                  (       a  U R                  X R                  5        U R                  R                  [        R                  S9nU R                  R                  [        R                  S9n[        R                  " U[        R                  " X$5      5      nX-  $ )Nr   r   r   )memory_format)r}   r   r   r   r   trainingr   r   r   rc   r   contiguous_formatr   vdotr   )rJ   rK   r   r   r   sigmas         r   r]   _SpectralNorm.forward  s    ;;!;;v1$((;;77?J}}"":/F/FGE,C,CDAE,C,CDA JJq%((:"9:E>!r    valuec                     U$ r   r2   )rJ   r   s     r   rk   _SpectralNorm.right_inverse	  s	     r    )r   r   rP   r   r   )r   r   -q=)r3   r4   r5   r6   r   r   rZ   floatrE   r   rn   ro   r   r]   rk   r:   rp   rq   s   @r   r   r     s     #$#/#/  #/ 	#/
 #/ 
#/ #/J
! 
! 
! ^^2 2# 2RV 2 2h"ell "u|| ""5<< ELL  r    r   r   r   c           
      v   [        XS5      n[        U[        5      (       d  [        SU  SU S35      eUca  [        U [        R
                  R                  [        R
                  R                  [        R
                  R                  45      (       a  SnOSn[        R                  " X[        XRXC5      5        U $ )as  Apply spectral normalization to a parameter in the given module.

.. math::
    \mathbf{W}_{SN} = \dfrac{\mathbf{W}}{\sigma(\mathbf{W})},
    \sigma(\mathbf{W}) = \max_{\mathbf{h}: \mathbf{h} \ne 0} \dfrac{\|\mathbf{W} \mathbf{h}\|_2}{\|\mathbf{h}\|_2}

When applied on a vector, it simplifies to

.. math::
    \mathbf{x}_{SN} = \dfrac{\mathbf{x}}{\|\mathbf{x}\|_2}

Spectral normalization stabilizes the training of discriminators (critics)
in Generative Adversarial Networks (GANs) by reducing the Lipschitz constant
of the model. :math:`\sigma` is approximated performing one iteration of the
`power method`_ every time the weight is accessed. If the dimension of the
weight tensor is greater than 2, it is reshaped to 2D in power iteration
method to get spectral norm.


See `Spectral Normalization for Generative Adversarial Networks`_ .

.. _`power method`: https://en.wikipedia.org/wiki/Power_iteration
.. _`Spectral Normalization for Generative Adversarial Networks`: https://arxiv.org/abs/1802.05957

.. note::
    This function is implemented using the parametrization functionality
    in :func:`~torch.nn.utils.parametrize.register_parametrization`. It is a
    reimplementation of :func:`torch.nn.utils.spectral_norm`.

.. note::
    When this constraint is registered, the singular vectors associated to the largest
    singular value are estimated rather than sampled at random. These are then updated
    performing :attr:`n_power_iterations` of the `power method`_ whenever the tensor
    is accessed with the module on `training` mode.

.. note::
    If the `_SpectralNorm` module, i.e., `module.parametrization.weight[idx]`,
    is in training mode on removal, it will perform another power iteration.
    If you'd like to avoid this iteration, set the module to eval mode
    before its removal.

Args:
    module (nn.Module): containing module
    name (str, optional): name of weight parameter. Default: ``"weight"``.
    n_power_iterations (int, optional): number of power iterations to
        calculate spectral norm. Default: ``1``.
    eps (float, optional): epsilon for numerical stability in
        calculating norms. Default: ``1e-12``.
    dim (int, optional): dimension corresponding to number of outputs.
        Default: ``0``, except for modules that are instances of
        ConvTranspose{1,2,3}d, when it is ``1``

Returns:
    The original module with a new parametrization registered to the specified
    weight

Example::

    >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_LAPACK)
    >>> # xdoctest: +IGNORE_WANT("non-deterministic")
    >>> snm = spectral_norm(nn.Linear(20, 40))
    >>> snm
    ParametrizedLinear(
      in_features=20, out_features=40, bias=True
      (parametrizations): ModuleDict(
        (weight): ParametrizationList(
          (0): _SpectralNorm()
        )
      )
    )
    >>> torch.linalg.matrix_norm(snm.weight, 2)
    tensor(1.0081, grad_fn=<AmaxBackward0>)
Nru   rv   rw   r   r   )r{   r|   r   rG   r   nnConvTranspose1dConvTranspose2dConvTranspose3dr   r~   r   )rr   rs   r   r   rP   rK   s         r   r
   r
     s    ` V4(Fff%%vhFtfAN
 	
 {((((((
 
 CC((mFI Mr    r   )rK   N)rK   r   )rK   r   r   N)enumr   r   typingr   r   torch.nn.functionalr   
functionalr   r   torch.nn.modulesr   torch.nn.utilsr   __all__r   r.   r0   r<   strboolr	   r   rZ   r   r   r   r
   r2   r    r   <module>r      s*         # & 92	 S& Sp $(x
  $xx
x SMx
 x xv"& "(@ @c @3 @F{F {@ ee
e e 
	e
 
#e er    