
    hT'                     T   S SK JrJr  S SKrS/rS\R
                  S\R
                  4S jr " S S\R                  R                  5      r	 " S	 S
\R                  R                  5      r
 " S S\R                  R                  5      r " S S\R                  R                  5      rg)    )OptionalTupleN	Conformerlengthsreturnc                    U R                   S   n[        [        R                  " U 5      R	                  5       5      n[        R
                  " X R                  U R                  S9R                  X5      U R                  S5      :  nU$ )Nr   )devicedtype   )
shapeinttorchmaxitemaranger	   r
   expand	unsqueeze)r   
batch_size
max_lengthpadding_masks       S/var/www/auris/envauris/lib/python3.13/site-packages/torchaudio/models/conformer.py_lengths_to_padding_maskr   	   sm    q!JUYYw',,./J<<
>>W^^			1	L     c                      ^  \ rS rSrSr   SS\S\S\S\S\S\S	S
4U 4S jjjrS\	R                  S	\	R                  4S jrSrU =r$ )_ConvolutionModule   a  Conformer convolution module.

Args:
    input_dim (int): input dimension.
    num_channels (int): number of depthwise convolution layer input channels.
    depthwise_kernel_size (int): kernel size of depthwise convolution layer.
    dropout (float, optional): dropout probability. (Default: 0.0)
    bias (bool, optional): indicates whether to add bias term to each convolution layer. (Default: ``False``)
    use_group_norm (bool, optional): use GroupNorm rather than BatchNorm. (Default: ``False``)
	input_dimnum_channelsdepthwise_kernel_sizedropoutbiasuse_group_normr   Nc                   > [         TU ]  5         US-
  S-  S:w  a  [        S5      e[        R                  R                  U5      U l        [        R                  R                  [        R                  R                  USU-  SSSUS9[        R                  R                  SS9[        R                  R                  UUUSUS-
  S-  UUS9U(       a  [        R                  R                  SUS9O[        R                  R                  U5      [        R                  R                  5       [        R                  R                  UUSSSUS	9[        R                  R                  U5      5      U l        g )
Nr      r   z<depthwise_kernel_size must be odd to achieve 'SAME' padding.)stridepaddingr!   )dim)r%   r&   groupsr!   )
num_groupsr   )kernel_sizer%   r&   r!   )super__init__
ValueErrorr   nn	LayerNorm
layer_norm
SequentialConv1dGLU	GroupNormBatchNorm1dSiLUDropout
sequential)selfr   r   r   r    r!   r"   	__class__s          r   r,   _ConvolutionModule.__init__   sF    	!A%*a/[\\((,,Y7((--HHOOL    HHLLQLHHOO%.2q8#    HH!,G%%l3HHMMOHHOO   HHW%? 
r   inputc                     U R                  U5      nUR                  SS5      nU R                  U5      nUR                  SS5      $ )zu
Args:
    input (torch.Tensor): with shape `(B, T, D)`.

Returns:
    torch.Tensor: output, with shape `(B, T, D)`.
r   r$   )r0   	transposer8   )r9   r<   xs      r   forward_ConvolutionModule.forwardM   sA     OOE"KK1OOA{{1a  r   )r0   r8           FF)__name__
__module____qualname____firstlineno____doc__r   floatboolr,   r   Tensorr@   __static_attributes____classcell__r:   s   @r   r   r      s    	  $-
-
 -
  #	-

 -
 -
 -
 
-
 -
^!U\\ !ell ! !r   r   c            	       ~   ^  \ rS rSrSrSS\S\S\SS4U 4S jjjrS	\R                  S\R                  4S
 jr
SrU =r$ )_FeedForwardModule[   zPositionwise feed forward layer.

Args:
    input_dim (int): input dimension.
    hidden_dim (int): hidden dimension.
    dropout (float, optional): dropout probability. (Default: 0.0)
r   
hidden_dimr    r   Nc                   > [         TU ]  5         [        R                  R	                  [        R                  R                  U5      [        R                  R                  XSS9[        R                  R                  5       [        R                  R                  U5      [        R                  R                  X!SS9[        R                  R                  U5      5      U l	        g )NT)r!   )
r+   r,   r   r.   r1   r/   Linearr6   r7   r8   )r9   r   rR   r    r:   s       r   r,   _FeedForwardModule.__init__d   s    ((--HHy)HHOOIO=HHMMOHHW%HHOOJO=HHW%
r   r<   c                 $    U R                  U5      $ )zo
Args:
    input (torch.Tensor): with shape `(*, D)`.

Returns:
    torch.Tensor: output, with shape `(*, D)`.
r8   )r9   r<   s     r   r@   _FeedForwardModule.forwardo   s     u%%r   rW   )rC   )rD   rE   rF   rG   rH   r   rI   r,   r   rK   r@   rL   rM   rN   s   @r   rP   rP   [   sO    	
# 	
3 	
 	
QU 	
 	
&U\\ &ell & &r   rP   c                      ^  \ rS rSrSr   SS\S\S\S\S\S\S	\S
S4U 4S jjjrS\	R                  S
\	R                  4S jrS\	R                  S\\	R                     S
\	R                  4S jrSrU =r$ )ConformerLayerz   av  Conformer layer that constitutes Conformer.

Args:
    input_dim (int): input dimension.
    ffn_dim (int): hidden layer dimension of feedforward network.
    num_attention_heads (int): number of attention heads.
    depthwise_conv_kernel_size (int): kernel size of depthwise convolution layer.
    dropout (float, optional): dropout probability. (Default: 0.0)
    use_group_norm (bool, optional): use ``GroupNorm`` rather than ``BatchNorm1d``
        in the convolution module. (Default: ``False``)
    convolution_first (bool, optional): apply the convolution module ahead of
        the attention module. (Default: ``False``)
r   ffn_dimnum_attention_headsdepthwise_conv_kernel_sizer    r"   convolution_firstr   Nc           	        > [         TU ]  5         [        XUS9U l        [        R
                  R                  U5      U l        [        R
                  R                  XUS9U l	        [        R
                  R                  U5      U l        [        UUUUSUS9U l        [        XUS9U l        [        R
                  R                  U5      U l        Xpl        g )N)r    T)r   r   r   r    r!   r"   )r+   r,   rP   ffn1r   r.   r/   self_attn_layer_normMultiheadAttention	self_attnr7   self_attn_dropoutr   conv_moduleffn2final_layer_normr_   )	r9   r   r\   r]   r^   r    r"   r_   r:   s	           r   r,   ConformerLayer.__init__   s     	&y7K	$)HH$6$6y$A!44Y]d4e!&!1!1'!:-""<)
 'y7K	 % 2 29 =!2r   r<   c                 |    UnUR                  SS5      nU R                  U5      nUR                  SS5      nX!-   nU$ )Nr   r   )r>   rf   )r9   r<   residuals      r   _apply_convolution!ConformerLayer._apply_convolution   sD    1%  '1% r   key_padding_maskc                    UnU R                  U5      nUS-  U-   nU R                  (       a  U R                  U5      nUnU R                  U5      nU R	                  UUUUSS9u  pEU R                  U5      nXC-   nU R                  (       d  U R                  U5      nUnU R                  U5      nUS-  U-   nU R                  U5      nU$ )z
Args:
    input (torch.Tensor): input, with shape `(T, B, D)`.
    key_padding_mask (torch.Tensor or None): key padding mask to use in self attention layer.

Returns:
    torch.Tensor: output, with shape `(T, B, D)`.
g      ?F)querykeyvaluern   need_weights)ra   r_   rl   rb   rd   re   rg   rh   )r9   r<   rn   rk   r?   _s         r   r@   ConformerLayer.forward   s     IIeGh!!''*A%%a(~~-  
 ""1%L%%''*AIIaLGh!!!$r   )rf   r_   ra   rg   rh   rd   re   rb   rB   )rD   rE   rF   rG   rH   r   rI   rJ   r,   r   rK   rl   r   r@   rL   rM   rN   s   @r   rZ   rZ   z   s    ( $"'33 3 !	3
 %(3 3 3  3 
3 3>  $U\\ $Xell=S $X]XdXd $ $r   rZ   c                      ^  \ rS rSrSr   SS\S\S\S\S\S\S	\S
\4U 4S jjjrS\	R                  S\	R                  S\\	R                  \	R                  4   4S jrSrU =r$ )r      a  Conformer architecture introduced in
*Conformer: Convolution-augmented Transformer for Speech Recognition*
:cite:`gulati2020conformer`.

Args:
    input_dim (int): input dimension.
    num_heads (int): number of attention heads in each Conformer layer.
    ffn_dim (int): hidden layer dimension of feedforward networks.
    num_layers (int): number of Conformer layers to instantiate.
    depthwise_conv_kernel_size (int): kernel size of each Conformer layer's depthwise convolution layer.
    dropout (float, optional): dropout probability. (Default: 0.0)
    use_group_norm (bool, optional): use ``GroupNorm`` rather than ``BatchNorm1d``
        in the convolution module. (Default: ``False``)
    convolution_first (bool, optional): apply the convolution module ahead of
        the attention module. (Default: ``False``)

Examples:
    >>> conformer = Conformer(
    >>>     input_dim=80,
    >>>     num_heads=4,
    >>>     ffn_dim=128,
    >>>     num_layers=4,
    >>>     depthwise_conv_kernel_size=31,
    >>> )
    >>> lengths = torch.randint(1, 400, (10,))  # (batch,)
    >>> input = torch.rand(10, int(lengths.max()), input_dim)  # (batch, num_frames, input_dim)
    >>> output = conformer(input, lengths)
r   	num_headsr\   
num_layersr^   r    r"   r_   c	                    > [         T
U ]  5         [        R                  R	                  [        U5       V	s/ s H  n	[        UUUUUUUS9PM     sn	5      U l        g s  sn	f )N)r    r"   r_   )r+   r,   r   r.   
ModuleListrangerZ   conformer_layers)r9   r   rx   r\   ry   r^   r    r"   r_   rt   r:   s             r   r,   Conformer.__init__   sl     	 % 3 3 z* +A .##1&7 +!
s   Ar<   r   r   c                     [        U5      nUR                  SS5      nU R                   H  nU" XC5      nM     UR                  SS5      U4$ )a  
Args:
    input (torch.Tensor): with shape `(B, T, input_dim)`.
    lengths (torch.Tensor): with shape `(B,)` and i-th element representing
        number of valid frames for i-th batch element in ``input``.

Returns:
    (torch.Tensor, torch.Tensor)
        torch.Tensor
            output frames, with shape `(B, T, input_dim)`
        torch.Tensor
            output lengths, with shape `(B,)` and i-th element representing
            number of valid frames for i-th batch element in output frames.
r   r   )r   r>   r}   )r9   r<   r   encoder_padding_maskr?   layers         r   r@   Conformer.forward  sO      8@OOAq!**Ea.A +{{1a '))r   )r}   rB   )rD   rE   rF   rG   rH   r   rI   rJ   r,   r   rK   r   r@   rL   rM   rN   s   @r   r   r      s    H $"'

 
 	

 
 %(
 
 
  
 
8*U\\ *ELL *U5<<Y^YeYeKeEf * *r   )typingr   r   r   __all__rK   r   r.   Moduler   rP   rZ   r    r   r   <module>r      s    "  -ell u|| F! F!R& &>ZUXX__ ZzN* N*r   