
    JTh"             
          S SK r S SKrS SKrS SKrS SKJrJr  S SKJr  S SK	r	S SK	J
r
Jr  S SKJr  S SKJr  S SKJr  SS	KJr  / S
Qr\
R*                  \
R,                  S.rS$S\S\S\S\4S jjr\" S\S9S$S\S\S\S\4S jj5       r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r  " S S\ 5      r! " S  S!\ 5      r" " S" S#\ 5      r#g)%    N)Optionaloverload)
deprecated)_VFTensor)init)	Parameter)PackedSequence   )Module)RNNBaseRNNLSTMGRURNNCellBaseRNNCellLSTMCellGRUCell)RNN_TANHRNN_RELUtensorpermutationdimreturnc                 $    U R                  X!5      $ N)index_selectr   r   r   s      L/var/www/auris/envauris/lib/python3.13/site-packages/torch/nn/modules/rnn.py_apply_permutationr    $   s    s00    z]`apply_permutation` is deprecated, please use `tensor.index_select(dim, permutation)` instead)categoryc                     [        XU5      $ r   r    r   s      r   apply_permutationr%   (   s    
 f377r!   c                     ^  \ rS rSr% Sr/ SQrS/r\\S'   \	\S'   \	\S'   \	\S'   \
\S	'   \
\S
'   \\S'   \
\S'   \	\S'           S*S\S\	S\	S\	S	\
S
\
S\S\
S\	SS4U 4S jjjrS rU 4S jrS+S jrS,U 4S jjrS+S jrS\S\\   SS4S jrS\S\\   S\\	\	\	4   4S jr S-S\S\\	\	\	4   S\SS4S jjrS rS\S\S\\   4S  jrS\S!\\   4S" jrS\4S# jrS$ rS% rU 4S& jr\S\ \ \!      4S' j5       r"U 4S( jr#S)r$U =r%$ ).r   0   ad  Base class for RNN modules (RNN, LSTM, GRU).

Implements aspects of RNNs shared by the RNN, LSTM, and GRU classes, such as module initialization
and utility methods for parameter storage management.

.. note::
    The forward method is not implemented by the RNNBase class.

.. note::
    LSTM and GRU classes override some methods implemented by RNNBase.
)	mode
input_sizehidden_size
num_layersbiasbatch_firstdropoutbidirectional	proj_sizeall_weightsr(   r)   r*   r+   r,   r-   r.   r/   r0   Nr   c           	        > XS.n[         TU ]  5         Xl        X l        X0l        X@l        XPl        X`l        [        U5      U l	        Xl
        Xl        / U l        U(       a  SOSn[        U[        R                  5      (       a%  SUs=::  a  S::  a  O  O[        U[         5      (       a  [#        S5      eUS:  a"  US:X  a  [$        R&                  " SU SU 35        [        U[(        5      (       d!  [+        S[-        U5      R.                   35      eUS::  a  [#        S	5      eUS::  a  [#        S
5      eU	S:  a  [#        S5      eX:  a  [#        S5      eUS:X  a  SU-  nO,US:X  a  SU-  nO US:X  a  UnOUS:X  a  UnO[#        SU-   5      e/ U l        / U l        [5        U5       GH  n[5        U5       GH  nU	S:  a  U	OUnUS:X  a  UOUU-  n[7        [8        R:                  " UU440 UD65      n[7        [8        R:                  " UU440 UD65      n[7        [8        R:                  " U40 UD65      n[7        [8        R:                  " U40 UD65      nSnU R                  S:X  a  U(       a  UUUU4nO:UU4nO5[7        [8        R:                  " X440 UD65      nU(       a  UUUUU4nOUUU4nUS:X  a  SOSnSS/nU(       a  USS/-  nU R                  S:  a  US/-  nU Vs/ s H  nUR=                  UU5      PM     nn[?        UU5       H  u  nn[A        U UU5        M     U R0                  RC                  U5        U R2                  RE                  U5        GM     GM     U RG                  5         U RI                  5         g s  snf )Ndevicedtype   r   r   zbdropout should be a number in range [0, 1] representing the probability of an element being zeroedzdropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=z and num_layers=z(hidden_size should be of type int, got: z%hidden_size must be greater than zeroz$num_layers must be greater than zerozEproj_size should be a positive integer or zero to disable projectionsz,proj_size has to be smaller than hidden_sizer      r      r   r   zUnrecognized RNN mode:  _reverse weight_ih_l{}{}weight_hh_l{}{}bias_ih_l{}{}bias_hh_l{}{}weight_hr_l{}{})%super__init__r(   r)   r*   r+   r,   r-   floatr.   r/   r0   _flat_weight_refs
isinstancenumbersNumberbool
ValueErrorwarningswarnint	TypeErrortype__name___flat_weights_names_all_weightsranger	   torchemptyformatzipsetattrextendappend_init_flat_weightsreset_parameters)selfr(   r)   r*   r+   r,   r-   r.   r/   r0   r4   r5   factory_kwargsnum_directions	gate_sizelayer	directionreal_hidden_sizelayer_input_sizew_ihw_hhb_ihb_hhlayer_paramsw_hrsuffixparam_namesxnameparam	__class__s                                 r   rB   RNNBase.__init__T   s    %+;	$&$	&W~*"SU+ 7GNN33$1$'4(( 
 Q;:?MM>>EY G(\+ +s++:4;L;U;U:VW  !DEE?CDDq=W  #KLL6>KIU]KIZ#IZ#I6=>>#% :&E">2	09A9; "'1*J2B^2S ! !KK,< =PP !KK,< =PP !Y!I.!IJ !Y!I.!IJ35>>Q&(,dD$'?(,d|$Y$<OOD (,dD$'E(,dD'9'0A~202CDO_#EEK>>A%$5#66K@KL1qxxv6L#&{L#AKD%D$. $B((//<!!((5U 3 'Z 	! Ms   %Nc                 &   U R                    Vs/ s H   n[        X5      (       a  [        X5      OS PM"     snU l        U R                   Vs/ s H  o"b  [        R
                  " U5      OS PM     snU l        U R                  5         g s  snf s  snf r   )rP   hasattrgetattr_flat_weightsweakrefrefrD   flatten_parameters)r\   wnws      r   rZ   RNNBase._init_flat_weights   s     ..
. ")!2!2GD<.

 @D?Q?Q"
?Q!mGKKN5?Q"
 	!
"
s   'B	$Bc                    > [        U S5      (       a8  XR                  ;   a)  U R                  R                  U5      nX R                  U'   [        TU ]  X5        g )NrP   )rr   rP   indexrt   rA   __setattr__)r\   attrvalueidxro   s       r   r}   RNNBase.__setattr__   sM    4.//D<T<T4T**006C&+s#D(r!   c                    [        U R                  5      [        U R                  5      :w  a  gU R                   H  n[        U[        5      (       a  M    g   U R                  S   nUR
                  nU R                   Hi  n[        U[        5      (       aQ  UR
                  U:X  aA  UR                  (       a0  [        R                  R                  R                  U5      (       a  Mi    g   U R                   Vs1 s H  oUR                  5       iM     nn[        U5      [        U R                  5      :w  a  g[        R                  R                  U5         SSKJs  J	s  Jn  [        R                   " 5          [        R"                  " 5       (       a  U R$                  (       a  SOSnU R&                  S:  a  US-  n[        R(                  " U R                  UU R*                  UR-                  U R.                  5      U R0                  U R&                  U R2                  U R4                  [7        U R8                  5      5	        SSS5        SSS5        gs  snf ! , (       d  f       N= f! , (       d  f       g= f)zReset parameter data pointer so that they can use faster code paths.

Right now, this works only if the module is on the GPU and cuDNN is enabled.
Otherwise, it's a no-op.
Nr   r7   r6   r   )lenrt   rP   rE   r   r5   is_cudarS   backendscudnnis_acceptabledata_ptrcuda	device_oftorch.backends.cudnn.rnnrnnno_grad_use_cudnn_rnn_flatten_weightr,   r0   _cudnn_rnn_flatten_weightr)   get_cudnn_moder(   r*   r+   r-   rH   r/   )	r\   ry   first_fwr5   fwpunique_data_ptrsr   num_weightss	            r   rw   RNNBase.flatten_parameters   s    t!!"c$*B*B&CC##Aa(( $ %%a($$Br6**E)zz~~++99"== % #'"4"4
"4QJJL"4 	 
  C(:(:$;;ZZ!!(+22 6688'+yy!aK~~)#q(33**#**4995((((T//0
 ! ,+
 ! ,+s+   :I"I+9CII+
I(	$I++
I9c                 T   > / U l         [        TU ]	  X5      nU R                  5         U$ r   )rD   rA   _applyrZ   )r\   fnrecurseretro   s       r   r   RNNBase._apply  s,    !#gnR)
 	!
r!   c                     U R                   S:  a#  S[        R                  " U R                   5      -  OSnU R                  5        H  n[        R
                  " X!* U5        M     g Nr   g      ?r*   mathsqrt
parametersr   uniform_r\   stdvweights      r   r[   RNNBase.reset_parameters&  K    484D4Dq4HsTYYt//00aoo'FMM&%. (r!   inputbatch_sizesc                 H   [         R                  R                  5       (       d|  UR                  U R                  S   R                  :w  aU  [         R
                  R                  5       (       d2  [        SU R                  S   R                   SUR                   35      eUb  SOSnUR                  5       U:w  a  [        SU SUR                  5        35      eU R                  UR                  S5      :w  a*  [        S	U R                   S
UR                  S5       35      eg )Nr   zinput must have the type z, got type r6   r8   zinput must have z dimensions, got z5input.size(-1) must be equal to input_size. Expected z, got )rS   jitis_scriptingr5   rt   _C_is_any_autocast_enabledrI   r   RuntimeErrorr)   size)r\   r   r   expected_input_dims       r   check_inputRNNBase.check_input+  s   yy%%''t11!4:::99;; /0B0B10E0K0K/LKX]XcXcWde  #."9Qq99;,,"#5"66G		}U  ??ejjn,GGXX^_d_i_ijl_m^no  -r!   c                 N   Ub  [        US   5      nO3U R                  (       a  UR                  S5      OUR                  S5      nU R                  (       a  SOSnU R                  S:  a  U R
                  U-  UU R                  4nU$ U R
                  U-  UU R                  4nU$ Nr   r   r6   )rL   r-   r   r/   r0   r+   r*   r\   r   r   
mini_batchr^   expected_hidden_sizes         r   get_expected_hidden_size RNNBase.get_expected_hidden_size>  s     "[^,J*.*:*:A

1J"00a>>A.0$  $#	 .0  $ 
 $#r!   hxr   msgc           	          UR                  5       U:w  a2  [        UR                  U[        UR                  5       5      5      5      eg r   )r   r   rU   list)r\   r   r   r   s       r   check_hidden_sizeRNNBase.check_hidden_sizeT  s9     779,,szz*>RWWYPQQ -r!   c                     Sn[        U R                  U R                  5       H;  u  p#[        X5      (       a  [	        X5      OS nUc  M'  Uc  M,  U" 5       ULd  M8  Sn  U$    U$ )NFT)rV   rD   rP   rr   rs   )r\   weights_changedrv   rm   r   s        r   _weights_have_changedRNNBase._weights_have_changed]  sh      T33T5M5MNIC,3D,?,?WT(TF!co#%v:M"& O
 r!   hiddenc                 j    U R                  X5        U R                  X5      nU R                  X$5        g r   )r   r   r   )r\   r   r   r   r   s        r   check_forward_argsRNNBase.check_forward_argsh  s1     	,#<<UPv<r!   r   c                 "    Uc  U$ [        X5      $ r   r$   r\   r   r   s      r   permute_hiddenRNNBase.permute_hiddenp  s    I!"22r!   c                 4   SnU R                   S:w  a  US-  nU R                  S:w  a  US-  nU R                  SLa  US-  nU R                  SLa  US	-  nU R                  S:w  a  US
-  nU R
                  SLa  US-  nUR                  " S0 U R                  D6$ )N{input_size}, {hidden_size}r   z, proj_size={proj_size}r   z, num_layers={num_layers}T, bias={bias}Fz, batch_first={batch_first}z, dropout={dropout}z, bidirectional={bidirectional}r9   )r0   r+   r,   r-   r.   r/   rU   __dict__r\   ss     r   
extra_reprRNNBase.extra_repru  s    )>>Q**A??a,,A99D  A5(..A<<1&&AU*22Axx($--((r!   c                     [         R                  R                  5       (       d'  U R                  5       (       a  U R	                  5         g g g r   )rS   r   r   r   rZ   )r\   s    r   _update_flat_weightsRNNBase._update_flat_weights  s:    yy%%''))++'') , (r!   c                 `    U R                  5         U R                  R                  5       nUS	 U$ )NrD   )r   r   copy)r\   states     r   __getstate__RNNBase.__getstate__  s.    !!#""$%&r!   c           	        > [         TU ]  U5        SU;   a
  US   U l        SU;  a  SU l        [	        U R                  S   S   [
        5      (       Gd  U R                  nU R                  (       a  SOSn/ U l        / U l        [        U5       GHb  n[        U5       GHN  nUS:X  a  SOSn/ SQnU Vs/ s H  oR                  XF5      PM     nnU R                  (       a|  U R                  S:  a3  U =R                  U/-  sl        U R                  R                  U5        M  U =R                  US S	 /-  sl        U R                  R                  US S	 5        M  U R                  S:  aH  U =R                  US S /US
S  /-   -  sl        U R                  R                  US S US
S  /-   5        GM  U =R                  US S /-  sl        U R                  R                  US S 5        GMQ     GMe     U R                   V	s/ s H   n	[        X	5      (       a  [        X	5      OS PM"     sn	U l        U R                   V
s/ s H  ob  [         R"                  " U
5      OS PM     sn
U l        g s  snf s  sn	f s  sn
f )Nr1   r0   r   r6   r   r:   r;   )r<   r=   r>   r?   r@   r7   r   )rA   __setstate__rQ   r0   rE   strr+   r/   rP   rR   rU   r,   rX   rr   rs   rt   ru   rv   rD   )r\   dr+   r^   r`   ra   rj   weightsrl   rx   ry   ro   s              r   r   RNNBase.__setstate__  sL   QA !- 0D aDN$++A.q1377J"&"4"4Q!N')D$ "Dz*!&~!6I+4>ZrFG AHH1xx6GHyy>>A- --':- 44;;GD --'"1+>- 44;;GBQKH>>A- --'"1+'"#,1OO- 44;; 'wrs|n < !--'"1+>- 44;;GBQKH3 "7 +: 22"2B &-T%6%6!D@2"D @D?Q?Q"
?Q!mGKKN5?Q"
- I""
"
s   3I-'I2 $I7c                     U R                    VVs/ s H  nU Vs/ s H  n[        X5      PM     snPM!     snn$ s  snf s  snnf r   )rQ   rs   )r\   r   r   s      r   r1   RNNBase.all_weights  sF      ,,
, 299vWT"9,
 	
9
s   
?:??c                 t   > [         TU ]  5       nUR                  S S  Ul        UR                  S S  Ul        U$ r   )rA   _replicate_for_data_parallelrt   rP   )r\   replicaro   s     r   r   $RNNBase._replicate_for_data_parallel  s=    '68 !( 5 5a 8&-&A&A!&D#r!   )rQ   rD   rt   rP   r-   r,   r/   r.   r*   r)   r(   r+   r0   r   TF        Fr   NNr   N)T)zExpected hidden size {}, got {})&rO   
__module____qualname____firstlineno____doc____constants____jit_unused_properties__r   __annotations__rL   rH   rC   rB   rZ   r}   rw   r   r[   r   r   r   tupler   r   r   r   r   r   r   r   r   propertyr   r	   r1   r   __static_attributes____classcell__ro   s   @r   r   r   0   s   

M "/
IOO
JNN !#y y  y  	y 
 y  y  y  y  y  y  
y  y v")9v	/
 hv6F 4 &$$*26*:$	sC}	$4 5	RR $CcM2R 	R
 
R	==%+=:B6:J=3 3hv6F 3
)C ) *
1
f 
T$y/2 
 
 r!   r   c                   b  ^  \ rS rSrSr\        SS\S\S\S\S\S	\S
\	S\SS4S jj5       r
\S 5       r
U 4S jr
\\R                  R                   SS\S\\   S\\\4   4S jj5       5       r\\R                  R                   SS\S\\   S\\\4   4S jj5       5       rSS jrSrU =r$ )r   i  ad  __init__(input_size,hidden_size,num_layers=1,nonlinearity='tanh',bias=True,batch_first=False,dropout=0.0,bidirectional=False,device=None,dtype=None)

Apply a multi-layer Elman RNN with :math:`\tanh` or :math:`\text{ReLU}`
non-linearity to an input sequence. For each element in the input sequence,
each layer computes the following function:

.. math::
    h_t = \tanh(x_t W_{ih}^T + b_{ih} + h_{t-1}W_{hh}^T + b_{hh})

where :math:`h_t` is the hidden state at time `t`, :math:`x_t` is
the input at time `t`, and :math:`h_{(t-1)}` is the hidden state of the
previous layer at time `t-1` or the initial hidden state at time `0`.
If :attr:`nonlinearity` is ``'relu'``, then :math:`\text{ReLU}` is used instead of :math:`\tanh`.

.. code-block:: python

    # Efficient implementation equivalent to the following with bidirectional=False
    def forward(x, hx=None):
        if batch_first:
            x = x.transpose(0, 1)
        seq_len, batch_size, _ = x.size()
        if hx is None:
            hx = torch.zeros(num_layers, batch_size, hidden_size)
        h_t_minus_1 = hx
        h_t = hx
        output = []
        for t in range(seq_len):
            for layer in range(num_layers):
                h_t[layer] = torch.tanh(
                    x[t] @ weight_ih[layer].T
                    + bias_ih[layer]
                    + h_t_minus_1[layer] @ weight_hh[layer].T
                    + bias_hh[layer]
                )
            output.append(h_t[-1])
            h_t_minus_1 = h_t
        output = torch.stack(output)
        if batch_first:
            output = output.transpose(0, 1)
        return output, h_t

Args:
    input_size: The number of expected features in the input `x`
    hidden_size: The number of features in the hidden state `h`
    num_layers: Number of recurrent layers. E.g., setting ``num_layers=2``
        would mean stacking two RNNs together to form a `stacked RNN`,
        with the second RNN taking in outputs of the first RNN and
        computing the final results. Default: 1
    nonlinearity: The non-linearity to use. Can be either ``'tanh'`` or ``'relu'``. Default: ``'tanh'``
    bias: If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`.
        Default: ``True``
    batch_first: If ``True``, then the input and output tensors are provided
        as `(batch, seq, feature)` instead of `(seq, batch, feature)`.
        Note that this does not apply to hidden or cell states. See the
        Inputs/Outputs sections below for details.  Default: ``False``
    dropout: If non-zero, introduces a `Dropout` layer on the outputs of each
        RNN layer except the last layer, with dropout probability equal to
        :attr:`dropout`. Default: 0
    bidirectional: If ``True``, becomes a bidirectional RNN. Default: ``False``

Inputs: input, hx
    * **input**: tensor of shape :math:`(L, H_{in})` for unbatched input,
      :math:`(L, N, H_{in})` when ``batch_first=False`` or
      :math:`(N, L, H_{in})` when ``batch_first=True`` containing the features of
      the input sequence.  The input can also be a packed variable length sequence.
      See :func:`torch.nn.utils.rnn.pack_padded_sequence` or
      :func:`torch.nn.utils.rnn.pack_sequence` for details.
    * **hx**: tensor of shape :math:`(D * \text{num\_layers}, H_{out})` for unbatched input or
      :math:`(D * \text{num\_layers}, N, H_{out})` containing the initial hidden
      state for the input sequence batch. Defaults to zeros if not provided.

    where:

    .. math::
        \begin{aligned}
            N ={} & \text{batch size} \\
            L ={} & \text{sequence length} \\
            D ={} & 2 \text{ if bidirectional=True otherwise } 1 \\
            H_{in} ={} & \text{input\_size} \\
            H_{out} ={} & \text{hidden\_size}
        \end{aligned}

Outputs: output, h_n
    * **output**: tensor of shape :math:`(L, D * H_{out})` for unbatched input,
      :math:`(L, N, D * H_{out})` when ``batch_first=False`` or
      :math:`(N, L, D * H_{out})` when ``batch_first=True`` containing the output features
      `(h_t)` from the last layer of the RNN, for each `t`. If a
      :class:`torch.nn.utils.rnn.PackedSequence` has been given as the input, the output
      will also be a packed sequence.
    * **h_n**: tensor of shape :math:`(D * \text{num\_layers}, H_{out})` for unbatched input or
      :math:`(D * \text{num\_layers}, N, H_{out})` containing the final hidden state
      for each element in the batch.

Attributes:
    weight_ih_l[k]: the learnable input-hidden weights of the k-th layer,
        of shape `(hidden_size, input_size)` for `k = 0`. Otherwise, the shape is
        `(hidden_size, num_directions * hidden_size)`
    weight_hh_l[k]: the learnable hidden-hidden weights of the k-th layer,
        of shape `(hidden_size, hidden_size)`
    bias_ih_l[k]: the learnable input-hidden bias of the k-th layer,
        of shape `(hidden_size)`
    bias_hh_l[k]: the learnable hidden-hidden bias of the k-th layer,
        of shape `(hidden_size)`

.. note::
    All the weights and biases are initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`
    where :math:`k = \frac{1}{\text{hidden\_size}}`

.. note::
    For bidirectional RNNs, forward and backward are directions 0 and 1 respectively.
    Example of splitting the output layers when ``batch_first=False``:
    ``output.view(seq_len, batch, num_directions, hidden_size)``.

.. note::
    ``batch_first`` argument is ignored for unbatched inputs.

.. include:: ../cudnn_rnn_determinism.rst

.. include:: ../cudnn_persistent_rnn.rst

Examples::

    >>> rnn = nn.RNN(10, 20, 2)
    >>> input = torch.randn(5, 3, 10)
    >>> h0 = torch.randn(2, 3, 20)
    >>> output, hn = rnn(input, h0)
Nr)   r*   r+   nonlinearityr,   r-   r.   r/   r   c                     g r   r9   )r\   r)   r*   r+   r   r,   r-   r.   r/   r4   r5   s              r   rB   RNN.__init__V       	r!   c                     g r   r9   r\   argskwargss      r   rB   r   f      r!   c                 F  > SU;   a  [        S5      e[        U5      S:  a  US   U l        US S USS  -   nOUR                  SS5      U l        U R                  S:X  a  SnO,U R                  S:X  a  S	nO[        S
U R                   S35      e[        TU ]  " U/UQ70 UD6  g )Nr0   =proj_size argument is only supported for LSTM, not RNN or GRUr8   r7   r   tanhr   relur   zUnknown nonlinearity 'z '. Select from 'tanh' or 'relu'.)rI   r   r   poprA   rB   )r\   r  r  r(   ro   s       r   rB   r   j  s    & O  t9q= $QD8d12h&D &

>6 BD&D&(D():):(;;[\  	///r!   r   r   c                     g r   r9   r\   r   r   s      r   forwardRNN.forward~      
 	r!   c                     g r   r9   r
  s      r   r  r    r  r!   c                    U R                  5         U R                  (       a  SOSnUn[        U[        5      (       ae  Uu  ppgUS   nUcE  [        R
                  " U R                  U-  UU R                  UR                  UR                  S9nGOU R                  X&5      nGO|S nUR                  5       S;  a  [        SUR                  5        S35      eUR                  5       S:H  n	U R                  (       a  SOSn
U	(       dW  UR                  U
5      nUbB  UR                  5       S:w  a  [        S	UR                  5        S
35      eUR                  S5      nO4Ub1  UR                  5       S:w  a  [        SUR                  5        S
35      eU R                  (       a  UR!                  S5      OUR!                  S5      nS nS nUcD  [        R
                  " U R                  U-  UU R                  UR                  UR                  S9nOU R                  X&5      nUc   eU R#                  XU5        U R$                  S:X  d  U R$                  S:X  d   eUc  U R$                  S:X  af  [&        R(                  " UUU R*                  U R,                  U R                  U R.                  U R0                  U R                  U R                  5	      nGO*[&        R2                  " UUU R*                  U R,                  U R                  U R.                  U R0                  U R                  U R                  5	      nOU R$                  S:X  a[  [&        R(                  " UUUU R*                  U R,                  U R                  U R.                  U R0                  U R                  5	      nOZ[&        R2                  " UUUU R*                  U R,                  U R                  U R.                  U R0                  U R                  5	      nUS   nUS   n[        U[        5      (       a  [        XXg5      nXR                  X5      4$ W	(       d"  UR5                  W
5      nUR5                  S5      nXR                  X5      4$ )Nr6   r   r   r5   r4   r6   r8   z(RNN: Expected input to be 2D or 3D, got zD tensor insteadr8   7For unbatched 2-D input, hx should also be 2-D but got 	-D tensor5For batched 3-D input, hx should also be 3-D but got r   r   )r   r/   rE   r
   rS   zerosr+   r*   r5   r4   r   r   rI   r-   	unsqueezer   r   r   r(   r   rnn_tanhrt   r,   r.   trainingrnn_relusqueeze)r\   r   r   r^   
orig_inputr   sorted_indicesunsorted_indicesmax_batch_size
is_batched	batch_dimresultoutputr   output_packeds                  r   r  r    s   !!#"00a
j.11CH@E(^Nz[[OOn4"$$++ << ((<Kyy{&( >uyy{mK[\  )J!--1I	2>vvx1}*UVXV\V\V^U__hi  aB>bffh!m&OPRPVPVPXzYbc  /3.>.>UZZ]EJJqMN!N#z[[OOn4"$$++ << ((<~~;7yyJ&$))z*AAAyyJ&&&IIOOLLMM&&$$
 &&IIOOLLMM&&$$
 yyJ&&&IIOOLLMM&&
 &&IIOOLLMM&&
 j.11*^M !"5"5f"OOO^^I.F^^A&F**6DDDr!   r   )r   r  TFr   FNNr   )rO   r   r   r   r   r   rL   r   rH   rC   rB   rS   _jit_internal_overload_methodr   r   r   r  r
   r   r   r   s   @r   r   r     sZ   ~@ 
 "!#  	
      
   0( 
))48!)&!1	vv~	 * 
 
))<@#)1&)9	~v%	& * 
~E ~Er!   r   c                     ^  \ rS rSrSr\        SS\S\S\S\S\S	\S
\S\SS4S jj5       r	\S 5       r	U 4S jr	S\
S\\
   S\\\\4   4S jrS\
S\\
\
4   S\\
   4S jrS\\
\
4   S\\
   S\\
\
4   4S jr\\R"                  R$                   SS\
S\\\
\
4      S\\
\\
\
4   4   4S jj5       5       r\\R"                  R$                   SS\S\\\
\
4      S\\\\
\
4   4   4S jj5       5       rSS jrSrU =r$ )r   i  a#  __init__(input_size,hidden_size,num_layers=1,bias=True,batch_first=False,dropout=0.0,bidirectional=False,proj_size=0,device=None,dtype=None)

Apply a multi-layer long short-term memory (LSTM) RNN to an input sequence.
For each element in the input sequence, each layer computes the following
function:

.. math::
    \begin{array}{ll} \\
        i_t = \sigma(W_{ii} x_t + b_{ii} + W_{hi} h_{t-1} + b_{hi}) \\
        f_t = \sigma(W_{if} x_t + b_{if} + W_{hf} h_{t-1} + b_{hf}) \\
        g_t = \tanh(W_{ig} x_t + b_{ig} + W_{hg} h_{t-1} + b_{hg}) \\
        o_t = \sigma(W_{io} x_t + b_{io} + W_{ho} h_{t-1} + b_{ho}) \\
        c_t = f_t \odot c_{t-1} + i_t \odot g_t \\
        h_t = o_t \odot \tanh(c_t) \\
    \end{array}

where :math:`h_t` is the hidden state at time `t`, :math:`c_t` is the cell
state at time `t`, :math:`x_t` is the input at time `t`, :math:`h_{t-1}`
is the hidden state of the layer at time `t-1` or the initial hidden
state at time `0`, and :math:`i_t`, :math:`f_t`, :math:`g_t`,
:math:`o_t` are the input, forget, cell, and output gates, respectively.
:math:`\sigma` is the sigmoid function, and :math:`\odot` is the Hadamard product.

In a multilayer LSTM, the input :math:`x^{(l)}_t` of the :math:`l` -th layer
(:math:`l \ge 2`) is the hidden state :math:`h^{(l-1)}_t` of the previous layer multiplied by
dropout :math:`\delta^{(l-1)}_t` where each :math:`\delta^{(l-1)}_t` is a Bernoulli random
variable which is :math:`0` with probability :attr:`dropout`.

If ``proj_size > 0`` is specified, LSTM with projections will be used. This changes
the LSTM cell in the following way. First, the dimension of :math:`h_t` will be changed from
``hidden_size`` to ``proj_size`` (dimensions of :math:`W_{hi}` will be changed accordingly).
Second, the output hidden state of each layer will be multiplied by a learnable projection
matrix: :math:`h_t = W_{hr}h_t`. Note that as a consequence of this, the output
of LSTM network will be of different shape as well. See Inputs/Outputs sections below for exact
dimensions of all variables. You can find more details in https://arxiv.org/abs/1402.1128.

Args:
    input_size: The number of expected features in the input `x`
    hidden_size: The number of features in the hidden state `h`
    num_layers: Number of recurrent layers. E.g., setting ``num_layers=2``
        would mean stacking two LSTMs together to form a `stacked LSTM`,
        with the second LSTM taking in outputs of the first LSTM and
        computing the final results. Default: 1
    bias: If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`.
        Default: ``True``
    batch_first: If ``True``, then the input and output tensors are provided
        as `(batch, seq, feature)` instead of `(seq, batch, feature)`.
        Note that this does not apply to hidden or cell states. See the
        Inputs/Outputs sections below for details.  Default: ``False``
    dropout: If non-zero, introduces a `Dropout` layer on the outputs of each
        LSTM layer except the last layer, with dropout probability equal to
        :attr:`dropout`. Default: 0
    bidirectional: If ``True``, becomes a bidirectional LSTM. Default: ``False``
    proj_size: If ``> 0``, will use LSTM with projections of corresponding size. Default: 0

Inputs: input, (h_0, c_0)
    * **input**: tensor of shape :math:`(L, H_{in})` for unbatched input,
      :math:`(L, N, H_{in})` when ``batch_first=False`` or
      :math:`(N, L, H_{in})` when ``batch_first=True`` containing the features of
      the input sequence.  The input can also be a packed variable length sequence.
      See :func:`torch.nn.utils.rnn.pack_padded_sequence` or
      :func:`torch.nn.utils.rnn.pack_sequence` for details.
    * **h_0**: tensor of shape :math:`(D * \text{num\_layers}, H_{out})` for unbatched input or
      :math:`(D * \text{num\_layers}, N, H_{out})` containing the
      initial hidden state for each element in the input sequence.
      Defaults to zeros if (h_0, c_0) is not provided.
    * **c_0**: tensor of shape :math:`(D * \text{num\_layers}, H_{cell})` for unbatched input or
      :math:`(D * \text{num\_layers}, N, H_{cell})` containing the
      initial cell state for each element in the input sequence.
      Defaults to zeros if (h_0, c_0) is not provided.

    where:

    .. math::
        \begin{aligned}
            N ={} & \text{batch size} \\
            L ={} & \text{sequence length} \\
            D ={} & 2 \text{ if bidirectional=True otherwise } 1 \\
            H_{in} ={} & \text{input\_size} \\
            H_{cell} ={} & \text{hidden\_size} \\
            H_{out} ={} & \text{proj\_size if } \text{proj\_size}>0 \text{ otherwise hidden\_size} \\
        \end{aligned}

Outputs: output, (h_n, c_n)
    * **output**: tensor of shape :math:`(L, D * H_{out})` for unbatched input,
      :math:`(L, N, D * H_{out})` when ``batch_first=False`` or
      :math:`(N, L, D * H_{out})` when ``batch_first=True`` containing the output features
      `(h_t)` from the last layer of the LSTM, for each `t`. If a
      :class:`torch.nn.utils.rnn.PackedSequence` has been given as the input, the output
      will also be a packed sequence. When ``bidirectional=True``, `output` will contain
      a concatenation of the forward and reverse hidden states at each time step in the sequence.
    * **h_n**: tensor of shape :math:`(D * \text{num\_layers}, H_{out})` for unbatched input or
      :math:`(D * \text{num\_layers}, N, H_{out})` containing the
      final hidden state for each element in the sequence. When ``bidirectional=True``,
      `h_n` will contain a concatenation of the final forward and reverse hidden states, respectively.
    * **c_n**: tensor of shape :math:`(D * \text{num\_layers}, H_{cell})` for unbatched input or
      :math:`(D * \text{num\_layers}, N, H_{cell})` containing the
      final cell state for each element in the sequence. When ``bidirectional=True``,
      `c_n` will contain a concatenation of the final forward and reverse cell states, respectively.

Attributes:
    weight_ih_l[k] : the learnable input-hidden weights of the :math:`\text{k}^{th}` layer
        `(W_ii|W_if|W_ig|W_io)`, of shape `(4*hidden_size, input_size)` for `k = 0`.
        Otherwise, the shape is `(4*hidden_size, num_directions * hidden_size)`. If
        ``proj_size > 0`` was specified, the shape will be
        `(4*hidden_size, num_directions * proj_size)` for `k > 0`
    weight_hh_l[k] : the learnable hidden-hidden weights of the :math:`\text{k}^{th}` layer
        `(W_hi|W_hf|W_hg|W_ho)`, of shape `(4*hidden_size, hidden_size)`. If ``proj_size > 0``
        was specified, the shape will be `(4*hidden_size, proj_size)`.
    bias_ih_l[k] : the learnable input-hidden bias of the :math:`\text{k}^{th}` layer
        `(b_ii|b_if|b_ig|b_io)`, of shape `(4*hidden_size)`
    bias_hh_l[k] : the learnable hidden-hidden bias of the :math:`\text{k}^{th}` layer
        `(b_hi|b_hf|b_hg|b_ho)`, of shape `(4*hidden_size)`
    weight_hr_l[k] : the learnable projection weights of the :math:`\text{k}^{th}` layer
        of shape `(proj_size, hidden_size)`. Only present when ``proj_size > 0`` was
        specified.
    weight_ih_l[k]_reverse: Analogous to `weight_ih_l[k]` for the reverse direction.
        Only present when ``bidirectional=True``.
    weight_hh_l[k]_reverse:  Analogous to `weight_hh_l[k]` for the reverse direction.
        Only present when ``bidirectional=True``.
    bias_ih_l[k]_reverse:  Analogous to `bias_ih_l[k]` for the reverse direction.
        Only present when ``bidirectional=True``.
    bias_hh_l[k]_reverse:  Analogous to `bias_hh_l[k]` for the reverse direction.
        Only present when ``bidirectional=True``.
    weight_hr_l[k]_reverse:  Analogous to `weight_hr_l[k]` for the reverse direction.
        Only present when ``bidirectional=True`` and ``proj_size > 0`` was specified.

.. note::
    All the weights and biases are initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`
    where :math:`k = \frac{1}{\text{hidden\_size}}`

.. note::
    For bidirectional LSTMs, forward and backward are directions 0 and 1 respectively.
    Example of splitting the output layers when ``batch_first=False``:
    ``output.view(seq_len, batch, num_directions, hidden_size)``.

.. note::
    For bidirectional LSTMs, `h_n` is not equivalent to the last element of `output`; the
    former contains the final forward and reverse hidden states, while the latter contains the
    final forward hidden state and the initial reverse hidden state.

.. note::
    ``batch_first`` argument is ignored for unbatched inputs.

.. note::
    ``proj_size`` should be smaller than ``hidden_size``.

.. include:: ../cudnn_rnn_determinism.rst

.. include:: ../cudnn_persistent_rnn.rst

Examples::

    >>> rnn = nn.LSTM(10, 20, 2)
    >>> input = torch.randn(5, 3, 10)
    >>> h0 = torch.randn(2, 3, 20)
    >>> c0 = torch.randn(2, 3, 20)
    >>> output, (hn, cn) = rnn(input, (h0, c0))
Nr)   r*   r+   r,   r-   r.   r/   r0   r   c                     g r   r9   )r\   r)   r*   r+   r,   r-   r.   r/   r0   r4   r5   s              r   rB   LSTM.__init__  r   r!   c                     g r   r9   r   s      r   rB   r)    r  r!   c                 .   > [         TU ]  " S/UQ70 UD6  g )Nr   rA   rB   r\   r  r  ro   s      r   rB   r)    s    1$1&1r!   r   r   c                     Ub  [        US   5      nO3U R                  (       a  UR                  S5      OUR                  S5      nU R                  (       a  SOSnU R                  U-  UU R
                  4nU$ r   )rL   r-   r   r/   r+   r*   r   s         r   get_expected_cell_sizeLSTM.get_expected_cell_size  sn     "[^,J*.*:*:A

1J"00aOOn, 

 $#r!   r   c                     U R                  X5        U R                  US   U R                  X5      S5        U R                  US   U R                  X5      S5        g )Nr   z"Expected hidden[0] size {}, got {}r   z"Expected hidden[1] size {}, got {})r   r   r   r/  )r\   r   r   r   s       r   r   LSTM.check_forward_args  s`     	,1I))%=0	

 	1I'';0	
r!   r   r   c                 H    Uc  U$ [        US   U5      [        US   U5      4$ )Nr   r   r$   r   s      r   r   LSTM.permute_hidden  s8    
 I!"Q%57IqE;8
 
 	
r!   c                     g r   r9   r
  s      r   r  LSTM.forward  r  r!   c                     g r   r9   r
  s      r   r  r6    r  r!   c                 n	   U R                  5         UnS nU R                  (       a  SOSnU R                  S:  a  U R                  OU R                  n[	        U[
        5      (       a  Uu  ppxUS   n	Uc  [        R                  " U R                  U-  U	UUR                  UR                  S9n
[        R                  " U R                  U-  U	U R                  UR                  UR                  S9nX4nGOlU R                  X'5      nGOYUR                  5       S;  a  [        SUR                  5        S35      eUR                  5       S:H  nU R                  (       a  SOSnU(       d  UR                  U5      nU R                  (       a  UR!                  S5      OUR!                  S5      n	S nS nUc  [        R                  " U R                  U-  U	UUR                  UR                  S9n
[        R                  " U R                  U-  U	U R                  UR                  UR                  S9nX4nU R#                  XU5        GOU(       ae  US   R                  5       S:w  d  US   R                  5       S:w  a6  S	US   R                  5        S
US   R                  5        S3n[%        U5      eOUS   R                  5       S:w  d  US   R                  5       S:w  a6  SUS   R                  5        S
US   R                  5        S3n[%        U5      eUS   R                  S5      US   R                  S5      4nU R#                  XU5        U R                  X'5      nUce  [&        R(                  " UUU R*                  U R,                  U R                  U R.                  U R0                  U R                  U R                  5	      nOZ[&        R(                  " UUUU R*                  U R,                  U R                  U R.                  U R0                  U R                  5	      nUS   nUSS  n[	        U[
        5      (       a!  [        UXGU5      nUU R                  UU5      4$ W(       d9  UR3                  W5      nUS   R3                  S5      US   R3                  S5      4nUU R                  UU5      4$ )Nr6   r   r   r  r  z)LSTM: Expected input to be 2D or 3D, got 	D insteadr8   z=For batched 3-D input, hx and cx should also be 3-D but got (z-D, z-D) tensorsz?For unbatched 2-D input, hx and cx should also be 2-D but got ()r   r/   r0   r*   rE   r
   rS   r  r+   r5   r4   r   r   rI   r-   r  r   r   r   r   lstmrt   r,   r.   r  r  )r\   r   r   r  r   r^   rb   r  r  r  h_zerosc_zerosr  r   r   r!  r"  r   r#  s                      r   r  r6    sC   !!#
"00a-1^^a-?4>>TEUEUj.11CH@E(^Nz++OOn4"$++ <<  ++OOn4"$$++ << ' ((<yy{&( ?		}IV  )J!--1I	2.2.>.>UZZ]EJJqMN!N#z++OOn4"$++ <<  ++OOn4"$$++ << ''';?!uyy{a'2a599;!+;446qEIIK=RUYY[MQ\^  +3// ,< !uyy{a'2a599;!+;446qEIIK=RUYY[MQ\^  +3//Q%//!,beooa.@AB '';?((<XX""		""  
F XX""		""
F j.11*5EM !$"5"5f>N"OOO	2 )++A.q	0A0A!0DE4..v7GHHHr!   r9   r   r   )rO   r   r   r   r   r   rL   rH   rC   rB   r   r   r   r/  r   r   rS   r%  r&  r  r
   r   r   r   s   @r   r   r     s   ^@ 
 !#  	
      
   2$$*26*:$	sC}	$"

 ffn%
 f%	
&	
&&.!	
 f%	
 
vv~			
 
))CG!)%*?!@	vuVV^,,	- *  
))KO#)1%2G)H	~uVV^44	5 * 
uI uIr!   r   c                   \  ^  \ rS rSrSr\       SS\S\S\S\S\S	\S
\SS4S jj5       r	\S 5       r	U 4S jr	\\
R                  R                   SS\S\\   S\\\4   4S jj5       5       r\\
R                  R                   SS\S\\   S\\\4   4S jj5       5       rSS jrSrU =r$ )r   i  a  __init__(input_size,hidden_size,num_layers=1,bias=True,batch_first=False,dropout=0.0,bidirectional=False,device=None,dtype=None)

Apply a multi-layer gated recurrent unit (GRU) RNN to an input sequence.
For each element in the input sequence, each layer computes the following
function:

.. math::
    \begin{array}{ll}
        r_t = \sigma(W_{ir} x_t + b_{ir} + W_{hr} h_{(t-1)} + b_{hr}) \\
        z_t = \sigma(W_{iz} x_t + b_{iz} + W_{hz} h_{(t-1)} + b_{hz}) \\
        n_t = \tanh(W_{in} x_t + b_{in} + r_t \odot (W_{hn} h_{(t-1)}+ b_{hn})) \\
        h_t = (1 - z_t) \odot n_t + z_t \odot h_{(t-1)}
    \end{array}

where :math:`h_t` is the hidden state at time `t`, :math:`x_t` is the input
at time `t`, :math:`h_{(t-1)}` is the hidden state of the layer
at time `t-1` or the initial hidden state at time `0`, and :math:`r_t`,
:math:`z_t`, :math:`n_t` are the reset, update, and new gates, respectively.
:math:`\sigma` is the sigmoid function, and :math:`\odot` is the Hadamard product.

In a multilayer GRU, the input :math:`x^{(l)}_t` of the :math:`l` -th layer
(:math:`l \ge 2`) is the hidden state :math:`h^{(l-1)}_t` of the previous layer multiplied by
dropout :math:`\delta^{(l-1)}_t` where each :math:`\delta^{(l-1)}_t` is a Bernoulli random
variable which is :math:`0` with probability :attr:`dropout`.

Args:
    input_size: The number of expected features in the input `x`
    hidden_size: The number of features in the hidden state `h`
    num_layers: Number of recurrent layers. E.g., setting ``num_layers=2``
        would mean stacking two GRUs together to form a `stacked GRU`,
        with the second GRU taking in outputs of the first GRU and
        computing the final results. Default: 1
    bias: If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`.
        Default: ``True``
    batch_first: If ``True``, then the input and output tensors are provided
        as `(batch, seq, feature)` instead of `(seq, batch, feature)`.
        Note that this does not apply to hidden or cell states. See the
        Inputs/Outputs sections below for details.  Default: ``False``
    dropout: If non-zero, introduces a `Dropout` layer on the outputs of each
        GRU layer except the last layer, with dropout probability equal to
        :attr:`dropout`. Default: 0
    bidirectional: If ``True``, becomes a bidirectional GRU. Default: ``False``

Inputs: input, h_0
    * **input**: tensor of shape :math:`(L, H_{in})` for unbatched input,
      :math:`(L, N, H_{in})` when ``batch_first=False`` or
      :math:`(N, L, H_{in})` when ``batch_first=True`` containing the features of
      the input sequence.  The input can also be a packed variable length sequence.
      See :func:`torch.nn.utils.rnn.pack_padded_sequence` or
      :func:`torch.nn.utils.rnn.pack_sequence` for details.
    * **h_0**: tensor of shape :math:`(D * \text{num\_layers}, H_{out})` or
      :math:`(D * \text{num\_layers}, N, H_{out})`
      containing the initial hidden state for the input sequence. Defaults to zeros if not provided.

    where:

    .. math::
        \begin{aligned}
            N ={} & \text{batch size} \\
            L ={} & \text{sequence length} \\
            D ={} & 2 \text{ if bidirectional=True otherwise } 1 \\
            H_{in} ={} & \text{input\_size} \\
            H_{out} ={} & \text{hidden\_size}
        \end{aligned}

Outputs: output, h_n
    * **output**: tensor of shape :math:`(L, D * H_{out})` for unbatched input,
      :math:`(L, N, D * H_{out})` when ``batch_first=False`` or
      :math:`(N, L, D * H_{out})` when ``batch_first=True`` containing the output features
      `(h_t)` from the last layer of the GRU, for each `t`. If a
      :class:`torch.nn.utils.rnn.PackedSequence` has been given as the input, the output
      will also be a packed sequence.
    * **h_n**: tensor of shape :math:`(D * \text{num\_layers}, H_{out})` or
      :math:`(D * \text{num\_layers}, N, H_{out})` containing the final hidden state
      for the input sequence.

Attributes:
    weight_ih_l[k] : the learnable input-hidden weights of the :math:`\text{k}^{th}` layer
        (W_ir|W_iz|W_in), of shape `(3*hidden_size, input_size)` for `k = 0`.
        Otherwise, the shape is `(3*hidden_size, num_directions * hidden_size)`
    weight_hh_l[k] : the learnable hidden-hidden weights of the :math:`\text{k}^{th}` layer
        (W_hr|W_hz|W_hn), of shape `(3*hidden_size, hidden_size)`
    bias_ih_l[k] : the learnable input-hidden bias of the :math:`\text{k}^{th}` layer
        (b_ir|b_iz|b_in), of shape `(3*hidden_size)`
    bias_hh_l[k] : the learnable hidden-hidden bias of the :math:`\text{k}^{th}` layer
        (b_hr|b_hz|b_hn), of shape `(3*hidden_size)`

.. note::
    All the weights and biases are initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`
    where :math:`k = \frac{1}{\text{hidden\_size}}`

.. note::
    For bidirectional GRUs, forward and backward are directions 0 and 1 respectively.
    Example of splitting the output layers when ``batch_first=False``:
    ``output.view(seq_len, batch, num_directions, hidden_size)``.

.. note::
    ``batch_first`` argument is ignored for unbatched inputs.

.. note::
    The calculation of new gate :math:`n_t` subtly differs from the original paper and other frameworks.
    In the original implementation, the Hadamard product :math:`(\odot)` between :math:`r_t` and the
    previous hidden state :math:`h_{(t-1)}` is done before the multiplication with the weight matrix
    `W` and addition of bias:

    .. math::
        \begin{aligned}
            n_t = \tanh(W_{in} x_t + b_{in} + W_{hn} ( r_t \odot h_{(t-1)} ) + b_{hn})
        \end{aligned}

    This is in contrast to PyTorch implementation, which is done after :math:`W_{hn} h_{(t-1)}`

    .. math::
        \begin{aligned}
            n_t = \tanh(W_{in} x_t + b_{in} + r_t \odot (W_{hn} h_{(t-1)}+ b_{hn}))
        \end{aligned}

    This implementation differs on purpose for efficiency.

.. include:: ../cudnn_persistent_rnn.rst

Examples::

    >>> rnn = nn.GRU(10, 20, 2)
    >>> input = torch.randn(5, 3, 10)
    >>> h0 = torch.randn(2, 3, 20)
    >>> output, hn = rnn(input, h0)
Nr)   r*   r+   r,   r-   r.   r/   r   c
                     g r   r9   )
r\   r)   r*   r+   r,   r-   r.   r/   r4   r5   s
             r   rB   GRU.__init__  s     	r!   c                     g r   r9   r   s      r   rB   r?    r  r!   c                 P   > SU;   a  [        S5      e[        TU ]  " S/UQ70 UD6  g )Nr0   r  r   )rI   rA   rB   r-  s      r   rB   r?    s4    & O  	000r!   r   r   c                     g r   r9   r
  s      r   r  GRU.forward&  r  r!   c                     g r   r9   r
  s      r   r  rC  -  r  r!   c                    U R                  5         Un[        U[        5      (       az  Uu  ppVUS   nUcZ  U R                  (       a  SOSn[        R
                  " U R                  U-  UU R                  UR                  UR                  S9nGOU R                  X%5      nGOS nUR                  5       S;  a  [        SUR                  5        S35      eUR                  5       S:H  n	U R                  (       a  SOSn
U	(       dW  UR                  U
5      nUbB  UR                  5       S:w  a  [        S	UR                  5        S
35      eUR                  S5      nO4Ub1  UR                  5       S:w  a  [        SUR                  5        S
35      eU R                  (       a  UR!                  S5      OUR!                  S5      nS nS nUcY  U R                  (       a  SOSn[        R
                  " U R                  U-  UU R                  UR                  UR                  S9nOU R                  X%5      nU R#                  XU5        Uce  [$        R&                  " UUU R(                  U R*                  U R                  U R,                  U R.                  U R                  U R                  5	      nOZ[$        R&                  " UUUU R(                  U R*                  U R                  U R,                  U R.                  U R                  5	      nUS   nUS   n[        U[        5      (       a  [        XXV5      nXR                  X5      4$ W	(       d"  UR1                  W
5      nUR1                  S5      nXR                  X5      4$ )Nr   r6   r   r  r  z(GRU: Expected input to be 2D or 3D, got r9  r8   r  r  r  )r   rE   r
   r/   rS   r  r+   r*   r5   r4   r   r   rI   r-   r  r   r   r   r   grurt   r,   r.   r  r  )r\   r   r   r  r   r  r  r  r^   r  r   r!  r"  r   r#  s                  r   r  rC  4  s   !!#
j.11CH@E(^Nz&*&8&8a[[OOn4"$$++ << ((<Kyy{&( >uyy{m9U  )J!--1I	2>vvx1}*UVXV\V\V^U__hi  aB>bffh!m&OPRPVPVPXzYbc  /3.>.>UZZ]EJJqMN!N#z&*&8&8a[[OOn4"$$++ << ((<;7WW""		""  
F WW""		""
F  j.11*^M !"5"5f"OOO	2*..vHHHr!   r9   )r   TFr   FNNr   )rO   r   r   r   r   r   rL   rH   rC   rB   rS   r%  r&  r   r   r   r  r
   r   r   r   s   @r   r   r     sM   B 
 !#  	
     
   1 
))48!)&!1	vv~	 * 
 
))<@#)1&)9	~v%	& * 
bI bIr!   r   c                      ^  \ rS rSr% / SQr\\S'   \\S'   \\S'   \\S'   \\S'     SS\S\S\S	\S
S4
U 4S jjjr	S
\
4S jrSS jrSrU =r$ )r   i  )r)   r*   r,   r)   r*   r,   	weight_ih	weight_hhN
num_chunksr   c                   > XVS.n[         TU ]  5         Xl        X l        X0l        [        [        R                  " XB-  U440 UD65      U l        [        [        R                  " XB-  U440 UD65      U l	        U(       aO  [        [        R                  " XB-  40 UD65      U l
        [        [        R                  " XB-  40 UD65      U l        O$U R                  SS 5        U R                  SS 5        U R                  5         g )Nr3   bias_ihbias_hh)rA   rB   r)   r*   r,   r	   rS   rT   rH  rI  rL  rM  register_parameterr[   )	r\   r)   r*   r,   rJ  r4   r5   r]   ro   s	           r   rB   RNNCellBase.__init__  s     %+;$&	"KK1:>Q.Q
 #KK1;?R>R
 $J4GGDL %J4GGDL ##It4##It4r!   c                     SnSU R                   ;   a  U R                  SLa  US-  nSU R                   ;   a  U R                  S:w  a  US-  nUR                  " S0 U R                   D6$ )	Nr   r,   Tr   r   r  z, nonlinearity={nonlinearity}r9   )r   r,   r   rU   r   s     r   r   RNNCellBase.extra_repr  sb    )T]]"tyy'< AT]]*t/@/@F/J00Axx($--((r!   c                     U R                   S:  a#  S[        R                  " U R                   5      -  OSnU R                  5        H  n[        R
                  " X!* U5        M     g r   r   r   s      r   r[   RNNCellBase.reset_parameters  r   r!   )r,   rM  rL  r*   r)   rI  rH  )NNr   )rO   r   r   r   r   rL   r   rH   r   rB   r   r   r[   r   r   r   s   @r   r   r     s}    9MO
J      	 
   
   B)C )/ /r!   r   c                      ^  \ rS rSr% Sr/ SQr\\S'       SS\S\S\	S\S	S4
U 4S
 jjjr
SS\S\\   S	\4S jjrSrU =r$ )r   i  a  An Elman RNN cell with tanh or ReLU non-linearity.

.. math::

    h' = \tanh(W_{ih} x + b_{ih}  +  W_{hh} h + b_{hh})

If :attr:`nonlinearity` is `'relu'`, then ReLU is used in place of tanh.

Args:
    input_size: The number of expected features in the input `x`
    hidden_size: The number of features in the hidden state `h`
    bias: If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`.
        Default: ``True``
    nonlinearity: The non-linearity to use. Can be either ``'tanh'`` or ``'relu'``. Default: ``'tanh'``

Inputs: input, hidden
    - **input**: tensor containing input features
    - **hidden**: tensor containing the initial hidden state
      Defaults to zero if not provided.

Outputs: h'
    - **h'** of shape `(batch, hidden_size)`: tensor containing the next hidden state
      for each element in the batch

Shape:
    - input: :math:`(N, H_{in})` or :math:`(H_{in})` tensor containing input features where
      :math:`H_{in}` = `input_size`.
    - hidden: :math:`(N, H_{out})` or :math:`(H_{out})` tensor containing the initial hidden
      state where :math:`H_{out}` = `hidden_size`. Defaults to zero if not provided.
    - output: :math:`(N, H_{out})` or :math:`(H_{out})` tensor containing the next hidden state.

Attributes:
    weight_ih: the learnable input-hidden weights, of shape
        `(hidden_size, input_size)`
    weight_hh: the learnable hidden-hidden weights, of shape
        `(hidden_size, hidden_size)`
    bias_ih: the learnable input-hidden bias, of shape `(hidden_size)`
    bias_hh: the learnable hidden-hidden bias, of shape `(hidden_size)`

.. note::
    All the weights and biases are initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`
    where :math:`k = \frac{1}{\text{hidden\_size}}`

Examples::

    >>> rnn = nn.RNNCell(10, 20)
    >>> input = torch.randn(6, 3, 10)
    >>> hx = torch.randn(3, 20)
    >>> output = []
    >>> for i in range(6):
    ...     hx = rnn(input[i], hx)
    ...     output.append(hx)
)r)   r*   r,   r   r   Nr)   r*   r,   r   c                 B   > XVS.n[         TU ]  " XU4SS0UD6  X@l        g )Nr3   rJ  r   )rA   rB   r   )	r\   r)   r*   r,   r   r4   r5   r]   ro   s	           r   rB   RNNCell.__init__  s,     %+;$W1WW(r!   r   r   c                    UR                  5       S;  a  [        SUR                  5        S35      eUb1  UR                  5       S;  a  [        SUR                  5        S35      eUR                  5       S:H  nU(       d  UR                  S5      nUcE  [        R                  " UR                  S5      U R                  UR                  UR                  S9nOU(       d  UR                  S5      OUnU R                  S:X  aD  [        R                  " UUU R                  U R                  U R                  U R                  5      nOnU R                  S	:X  aD  [        R                   " UUU R                  U R                  U R                  U R                  5      nOUn[#        S
U R                   35      eU(       d  UR%                  S5      nU$ )Nr   r6   z,RNNCell: Expected input to be 1D or 2D, got r9  z-RNNCell: Expected hidden to be 1D or 2D, got r6   r   r  r  r  zUnknown nonlinearity: )r   rI   r  rS   r  r   r*   r5   r4   r   r   rnn_tanh_cellrH  rI  rL  rM  rnn_relu_cellr   r  r\   r   r   r  r   s        r   r  RNNCell.forward  s   99;f$>uyy{m9U  >bffhf4?zS  YY[A%
OOA&E:

1t//u{{5<<B )3aB&##C &(##C C!78I8I7JKLL++a.C
r!   r$  )Tr  NNr   )rO   r   r   r   r   r   r   r   rL   rH   rB   r   r   r  r   r   r   s   @r   r   r     s    4l JM ")) ) 	)
 ) 
) )-V -&)9 -V - -r!   r   c            	          ^  \ rS rSrSr   SS\S\S\SS4U 4S jjjr SS	\S
\	\
\\4      S\
\\4   4S jjrSrU =r$ )r   iJ  a~	  A long short-term memory (LSTM) cell.

.. math::

    \begin{array}{ll}
    i = \sigma(W_{ii} x + b_{ii} + W_{hi} h + b_{hi}) \\
    f = \sigma(W_{if} x + b_{if} + W_{hf} h + b_{hf}) \\
    g = \tanh(W_{ig} x + b_{ig} + W_{hg} h + b_{hg}) \\
    o = \sigma(W_{io} x + b_{io} + W_{ho} h + b_{ho}) \\
    c' = f \odot c + i \odot g \\
    h' = o \odot \tanh(c') \\
    \end{array}

where :math:`\sigma` is the sigmoid function, and :math:`\odot` is the Hadamard product.

Args:
    input_size: The number of expected features in the input `x`
    hidden_size: The number of features in the hidden state `h`
    bias: If ``False``, then the layer does not use bias weights `b_ih` and
        `b_hh`. Default: ``True``

Inputs: input, (h_0, c_0)
    - **input** of shape `(batch, input_size)` or `(input_size)`: tensor containing input features
    - **h_0** of shape `(batch, hidden_size)` or `(hidden_size)`: tensor containing the initial hidden state
    - **c_0** of shape `(batch, hidden_size)` or `(hidden_size)`: tensor containing the initial cell state

      If `(h_0, c_0)` is not provided, both **h_0** and **c_0** default to zero.

Outputs: (h_1, c_1)
    - **h_1** of shape `(batch, hidden_size)` or `(hidden_size)`: tensor containing the next hidden state
    - **c_1** of shape `(batch, hidden_size)` or `(hidden_size)`: tensor containing the next cell state

Attributes:
    weight_ih: the learnable input-hidden weights, of shape
        `(4*hidden_size, input_size)`
    weight_hh: the learnable hidden-hidden weights, of shape
        `(4*hidden_size, hidden_size)`
    bias_ih: the learnable input-hidden bias, of shape `(4*hidden_size)`
    bias_hh: the learnable hidden-hidden bias, of shape `(4*hidden_size)`

.. note::
    All the weights and biases are initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`
    where :math:`k = \frac{1}{\text{hidden\_size}}`

On certain ROCm devices, when using float16 inputs this module will use :ref:`different precision<fp16_on_mi200>` for backward.

Examples::

    >>> rnn = nn.LSTMCell(10, 20)  # (input_size, hidden_size)
    >>> input = torch.randn(2, 3, 10)  # (time_steps, batch, input_size)
    >>> hx = torch.randn(3, 20)  # (batch, hidden_size)
    >>> cx = torch.randn(3, 20)
    >>> output = []
    >>> for i in range(input.size()[0]):
    ...     hx, cx = rnn(input[i], (hx, cx))
    ...     output.append(hx)
    >>> output = torch.stack(output, dim=0)
Nr)   r*   r,   r   c                 6   > XES.n[         TU ]  " XU4SS0UD6  g )Nr3   rJ  r7   r,  r\   r)   r*   r,   r4   r5   r]   ro   s          r   rB   LSTMCell.__init__  &     %+;$W1WWr!   r   r   c                 0   UR                  5       S;  a  [        SUR                  5        S35      eUbH  [        U5       H9  u  p4UR                  5       S;  d  M  [        SU SUR                  5        S35      e   UR                  5       S:H  nU(       d  UR                  S5      nUcH  [        R
                  " UR                  S5      U R                  UR                  UR                  S9nXf4nO1U(       d(  US   R                  S5      US	   R                  S5      4OUn[        R                  " UUU R                  U R                  U R                  U R                  5      nU(       d(  US   R!                  S5      US	   R!                  S5      4nU$ )
NrX  z-LSTMCell: Expected input to be 1D or 2D, got r9  zLSTMCell: Expected hx[z] to be 1D or 2D, got r6   r   r  r   )r   rI   	enumerater  rS   r  r   r*   r5   r4   r   	lstm_cellrH  rI  rL  rM  r  )r\   r   r   r   r   r  r  r   s           r   r  LSTMCell.forward  sb    99;f$?		}IV  >'m
99;f,$05KEIIK=Xab  ,
 YY[A%
OOA&E:KK

1t//u{{5<<E BAK"Q%//!$beooa&89QSBmmNNNNLLLL
 q6>>!$c!fnnQ&78C
r!   r9   TNNr   )rO   r   r   r   r   rL   rH   rB   r   r   r   r  r   r   r   s   @r   r   r   J  s    9~ 	X	X 	X 		X 
	X 	X DH$$!)%*?!@$	vv~	$ $r!   r   c            	       j   ^  \ rS rSrSr   SS\S\S\SS4U 4S jjjrSS	\S
\	\   S\4S jjr
SrU =r$ )r   i  a  A gated recurrent unit (GRU) cell.

.. math::

    \begin{array}{ll}
    r = \sigma(W_{ir} x + b_{ir} + W_{hr} h + b_{hr}) \\
    z = \sigma(W_{iz} x + b_{iz} + W_{hz} h + b_{hz}) \\
    n = \tanh(W_{in} x + b_{in} + r \odot (W_{hn} h + b_{hn})) \\
    h' = (1 - z) \odot n + z \odot h
    \end{array}

where :math:`\sigma` is the sigmoid function, and :math:`\odot` is the Hadamard product.

Args:
    input_size: The number of expected features in the input `x`
    hidden_size: The number of features in the hidden state `h`
    bias: If ``False``, then the layer does not use bias weights `b_ih` and
        `b_hh`. Default: ``True``

Inputs: input, hidden
    - **input** : tensor containing input features
    - **hidden** : tensor containing the initial hidden
      state for each element in the batch.
      Defaults to zero if not provided.

Outputs: h'
    - **h'** : tensor containing the next hidden state
      for each element in the batch

Shape:
    - input: :math:`(N, H_{in})` or :math:`(H_{in})` tensor containing input features where
      :math:`H_{in}` = `input_size`.
    - hidden: :math:`(N, H_{out})` or :math:`(H_{out})` tensor containing the initial hidden
      state where :math:`H_{out}` = `hidden_size`. Defaults to zero if not provided.
    - output: :math:`(N, H_{out})` or :math:`(H_{out})` tensor containing the next hidden state.

Attributes:
    weight_ih: the learnable input-hidden weights, of shape
        `(3*hidden_size, input_size)`
    weight_hh: the learnable hidden-hidden weights, of shape
        `(3*hidden_size, hidden_size)`
    bias_ih: the learnable input-hidden bias, of shape `(3*hidden_size)`
    bias_hh: the learnable hidden-hidden bias, of shape `(3*hidden_size)`

.. note::
    All the weights and biases are initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`
    where :math:`k = \frac{1}{\text{hidden\_size}}`

On certain ROCm devices, when using float16 inputs this module will use :ref:`different precision<fp16_on_mi200>` for backward.

Examples::

    >>> rnn = nn.GRUCell(10, 20)
    >>> input = torch.randn(6, 3, 10)
    >>> hx = torch.randn(3, 20)
    >>> output = []
    >>> for i in range(6):
    ...     hx = rnn(input[i], hx)
    ...     output.append(hx)
Nr)   r*   r,   r   c                 6   > XES.n[         TU ]  " XU4SS0UD6  g )Nr3   rJ  r8   r,  r_  s          r   rB   GRUCell.__init__  ra  r!   r   r   c                    UR                  5       S;  a  [        SUR                  5        S35      eUb1  UR                  5       S;  a  [        SUR                  5        S35      eUR                  5       S:H  nU(       d  UR                  S5      nUcE  [        R                  " UR                  S5      U R                  UR                  UR                  S9nOU(       d  UR                  S5      OUn[        R                  " UUU R                  U R                  U R                  U R                  5      nU(       d  UR                  S5      nU$ )NrX  z,GRUCell: Expected input to be 1D or 2D, got r9  z-GRUCell: Expected hidden to be 1D or 2D, got r6   r   r  )r   rI   r  rS   r  r   r*   r5   r4   r   gru_cellrH  rI  rL  rM  r  r[  s        r   r  GRUCell.forward  s   99;f$>uyy{m9U  >bffhf4?zS  YY[A%
OOA&E:

1t//u{{5<<B )3aBllNNNNLLLL
 ++a.C
r!   r9   rf  r   )rO   r   r   r   r   rL   rH   rB   r   r   r  r   r   r   s   @r   r   r     sn    ;B 	X	X 	X 		X 
	X 	X V  &)9  V    r!   r   )r   )$r   rF   rJ   ru   typingr   r   typing_extensionsr   rS   r   r   torch.nnr   torch.nn.parameterr	   torch.nn.utils.rnnr
   moduler   __all__r  r  
_rnn_implsrL   r    FutureWarningr%   r   r   r   r   r   r   r   r   r9   r!   r   <module>rv     s        % (    ( - 	 
1v 1F 1 1V 1 c8f 86 8 8F 8	8bf bJuE' uEL
lI7 lI^LI' LI^7/& 7/ttk tnk{ k\ik ir!   