
    h0                        % S SK r S SKJrJrJr  S SKrS SKJr  S SKJs  J	r
  S\S\4S jrS\" S5      4r\\\4   \S'    " S	 S
\R                  5      r " S S\R                  5      r " S S\R                  5      r " S S\R                  5      r " S S\R                  5      r " S S\R                  5      rS\S\S\S\R0                  R                  4S jr S"S\S\S\S\S\S\S\S\S\\   S\4S  jjrS\4S! jrg)#    N)ListOptionalTuplexreturnc                 L    SSS[         R                  " SU -  S-   5      -   -  -   $ )ap  The metric defined by ITU-T P.862 is often called 'PESQ score', which is defined
for narrow-band signals and has a value range of [-0.5, 4.5] exactly. Here, we use the metric
defined by ITU-T P.862.2, commonly known as 'wide-band PESQ' and will be referred to as "PESQ score".

Args:
    x (float): Narrow-band PESQ score.

Returns:
    (float): Wide-band PESQ score.
g+?g@   g;pΈgׁsF@)mathexp)r   s    Y/var/www/auris/envauris/lib/python3.13/site-packages/torchaudio/models/squim/objective.pytransform_wb_pesq_ranger   	   s+     Ma$((7Q;3G*H&HIII          ?g      @	PESQRangec                   |   ^  \ rS rSrS	S\\\4   SS4U 4S jjjrS\R                  S\R                  4S jr	Sr
U =r$ )
RangeSigmoid   	val_ranger   Nc                    > [         [        U ]  5         [        U[        5      (       a  [        U5      S:X  d   eXl        [        R                  " 5       U l	        g )N   )
superr   __init__
isinstancetuplelenr   nnSigmoidsigmoid)selfr   	__class__s     r   r   RangeSigmoid.__init__    s@    lD*,)U++I!0CCC.7*,**,r   r   c                     U R                  U5      U R                  S   U R                  S   -
  -  U R                  S   -   nU$ )Nr	   r   r   r   r   r   outs      r   forwardRangeSigmoid.forward&   s?    ll1o!2T^^A5F!FG$..YZJ[[
r   r#   ))        r   )__name__
__module____qualname____firstlineno__r   floatr   torchTensorr&   __static_attributes____classcell__r    s   @r   r   r      sD    7%u"5 7t 7 7 %,,  r   r   c                   z   ^  \ rS rSrSrSS\S\SS4U 4S jjjrS\R                  S\R                  4S	 jr	S
r
U =r$ )Encoder+   zEncoder module that transform 1D waveform to 2D representations.

Args:
    feat_dim (int, optional): The feature dimension after Encoder module. (Default: 512)
    win_len (int, optional): kernel size in the Conv1D layer. (Default: 32)
feat_dimwin_lenr   Nc                 h   > [         [        U ]  5         [        R                  " SXUS-  SS9U l        g )Nr	   r   F)stridebias)r   r4   r   r   Conv1dconv1d)r   r6   r7   r    s      r   r   Encoder.__init__3   s+    gt%'ii8W\PUVr   r   c                 n    UR                  SS9n[        R                  " U R                  U5      5      nU$ )zApply waveforms to convolutional layer and ReLU layer.

Args:
    x (torch.Tensor): Input waveforms. Tensor with dimensions `(batch, time)`.

Returns:
    (torch,Tensor): Feature Tensor with dimensions `(batch, channel, frame)`.
r	   dim)	unsqueezeFrelur<   r$   s      r   r&   Encoder.forward8   s0     kkak ffT[[%&
r   )r<   )i       )r)   r*   r+   r,   __doc__intr   r.   r/   r&   r0   r1   r2   s   @r   r4   r4   +   sL    W WS W$ W W
 %,,  r   r4   c                   ~   ^  \ rS rSrSS\S\S\S\SS4
U 4S jjjrS	\R                  S\R                  4S
 jr
SrU =r$ )	SingleRNNF   rnn_type
input_sizehidden_sizedropoutr   Nc           	         > [         [        U ]  5         Xl        X l        X0l        [        [        U5      " UUSUSSS9U l        [        R                  " US-  U5      U l
        g )Nr	   T)rN   batch_firstbidirectionalr   )r   rI   r   rK   rL   rM   getattrr   rnnLinearproj)r   rK   rL   rM   rN   r    s        r   r   SingleRNN.__init__G   s]    i') $&&-b(&;'
 IIkAoz:	r   r   c                 N    U R                  U5      u  p#U R                  U5      nU$ N)rS   rU   )r   r   r%   _s       r   r&   SingleRNN.forwardY   s#    !iin
r   )rM   rL   rU   rS   rK   )r(   )r)   r*   r+   r,   strrG   r-   r   r.   r/   r&   r0   r1   r2   s   @r   rI   rI   F   sR    ; ;# ;C ;RW ;bf ; ;$ %,,  r   rI   c                   b  ^  \ rS rSrSr       SS\S\S\S\S\S\S	\S
S4U 4S jjjrS\R                  S
\
\R                  \4   4S jrS\R                  S
\
\R                  \4   4S jrS\R                  S\S
\R                  4S jrS\R                  S
\R                  4S jrSrU =r$ )DPRNN`   a  *Dual-path recurrent neural networks (DPRNN)* :cite:`luo2020dual`.

Args:
    feat_dim (int, optional): The feature dimension after Encoder module. (Default: 64)
    hidden_dim (int, optional): Hidden dimension in the RNN layer of DPRNN. (Default: 128)
    num_blocks (int, optional): Number of DPRNN layers. (Default: 6)
    rnn_type (str, optional): Type of RNN in DPRNN. Valid options are ["RNN", "LSTM", "GRU"]. (Default: "LSTM")
    d_model (int, optional): The number of expected features in the input. (Default: 256)
    chunk_size (int, optional): Chunk size of input for DPRNN. (Default: 100)
    chunk_stride (int, optional): Stride of chunk input for DPRNN. (Default: 50)
r6   
hidden_dim
num_blocksrK   d_model
chunk_sizechunk_strider   Nc           	      "  > [         [        U ]  5         X0l        [        R
                  " / 5      U l        [        R
                  " / 5      U l        [        R
                  " / 5      U l        [        R
                  " / 5      U l	        [        U5       H  nU R                  R                  [        XAU5      5        U R                  R                  [        XAU5      5        U R                  R                  [        R                  " SUSS95        U R                  R                  [        R                  " SUSS95        M     [        R                  " [        R                  " XS5      [        R                   " 5       5      U l        X`l        Xpl        g )Nr	   g:0yE>)eps)r   r]   r   r`   r   
ModuleListrow_rnncol_rnnrow_normcol_normrangeappendrI   	GroupNorm
SequentialConv2dPReLUconvrb   rc   )
r   r6   r_   r`   rK   ra   rb   rc   rY   r    s
            r   r   DPRNN.__init__m   s    	eT#%$}}R(}}R(b)b)z"ALL	(j IJLL	(j IJMM  at!DEMM  at!DE	 #
 MMIIh+HHJ
	 %(r   r   c                     UR                   S   nU R                  U R                  X R                  -  -   U R                  -  -
  n[        R                  " XR                  X0R                  -   /5      nXC4$ )N)shaperb   rc   rB   pad)r   r   seq_lenrestr%   s        r   	pad_chunkDPRNN.pad_chunk   se    ''"+$"3"3g6O"OSWSbSb!bbeeA))42C2C+CDEyr   c                    U R                  U5      u  p#UR                  u  pEnUS S 2S S 2S U R                  * 24   R                  5       R	                  XESU R
                  5      nUS S 2S S 2U R                  S 24   R                  5       R	                  XESU R
                  5      n[        R                  " Xx/SS9nUR	                  XESU R
                  5      R                  SS5      R                  5       nX#4$ )Nrt      r?   r   )	ry   ru   rc   
contiguousviewrb   r.   cat	transpose)	r   r   r%   rx   
batch_sizer6   rw   	segments1	segments2s	            r   chunkingDPRNN.chunking   s    NN1%	(+		%
g12!2!2 2223>>@EEj\^`d`o`op	1d//112==?DDZ[]_c_n_no	ii.A6hhzRAKKAqQ\\^yr   rx   c                 2   UR                   u  p4  nUR                  SS5      R                  5       R                  X4SU R                  S-  5      nUS S 2S S 2S S 2S U R                  24   R                  5       R                  X4S5      S S 2S S 2U R
                  S 24   nUS S 2S S 2S S 2U R                  S 24   R                  5       R                  X4S5      S S 2S S 2S U R
                  * 24   nXx-   nUS:  a  US S 2S S 2S U* 24   nUR                  5       nU$ )Nr   r|   rt   r   )ru   r   r}   r~   rb   rc   )	r   r   rx   r   r@   rY   r%   out1out2s	            r   mergingDPRNN.merging   s    !
Akk!Q**,11*2tYZGZ[1a-doo--.99;@@RTUVWYZ\`\m\m\oVop1aDOO--.99;@@RTUVWYZ\p_c_p_p^p\pVpqk!8aFdUFl#Cnn
r   c                 ~   U R                  U5      u  pUR                  u  p4pVUn[        U R                  U R                  U R
                  U R                  5       GH  u  ppUR                  SSSS5      R                  5       R                  X6-  US5      R                  5       nU" U5      nUR                  X6US5      R                  SSSS5      R                  5       nU	" U5      nX}-   nUR                  SSSS5      R                  5       R                  X5-  US5      R                  5       nU
" U5      nUR                  X5US5      R                  SSSS5      R                  5       nU" U5      nX-   nGM     U R                  U5      nU R                  Xr5      nUR                  SS5      R                  5       nU$ )Nr   r|   r   r	   rt   )r   ru   ziprg   ri   rh   rj   permuter}   r~   rq   r   r   )r   r   rx   r   rY   dim1dim2r%   rg   ri   rh   rj   row_inrow_outcol_incol_outs                   r   r&   DPRNN.forward   s   --"$%GG!
t47dmmUYUaUacgcpcp4q0Gw[[Aq!,779>>z?PRVXZ[ffhFfoGll:T2>FFq!QPQR]]_Gw'G-C[[Aq!,779>>z?PRVXZ[ffhFfoGll:T2>FFq!QPQR]]_Gw'G-C 5r iinll3%mmAq!,,.
r   )rb   rc   rj   rh   rq   r`   ri   rg   )@         LSTM   d   2   )r)   r*   r+   r,   rF   rG   r[   r   r.   r/   r   ry   r   r   r&   r0   r1   r2   s   @r   r]   r]   `   s   
 )) ) 	)
 ) ) ) ) 
) )<5<< E%,,2C,D 	%,, 	5s1B+C 		 	S 	U\\ 	 %,,  r   r]   c                   r   ^  \ rS rSrS	S\SS4U 4S jjjrS\R                  S\R                  4S jrSr	U =r
$ )
AutoPool   pool_dimr   Nc                    > [         [        U ]  5         Xl        [        R
                  " US9U l        U R                  S[        R                  " [        R                  " S5      5      5        g )Nr?   alphar	   )r   r   r   r   r   Softmaxsoftmaxregister_parameter	Parameterr.   ones)r   r   r    s     r   r   AutoPool.__init__   sF    h&(%*,***Bejjm)DEr   r   c                     U R                  [        R                  " XR                  5      5      n[        R                  " [        R                  " X5      U R
                  S9nU$ )Nr?   )r   r.   mulr   sumr   )r   r   weightr%   s       r   r&   AutoPool.forward   s?    eii::67ii		!,$--@
r   )r   r   )r	   )r)   r*   r+   r,   rG   r   r.   r/   r&   r0   r1   r2   s   @r   r   r      s?    F FT F F %,,  r   r   c                      ^  \ rS rSrSrS\R                  S\R                  S\R                  4U 4S jjrS\	R                  S\\	R                     4S	 jrS
rU =r$ )SquimObjective   a  Speech Quality and Intelligibility Measures (SQUIM) model that predicts **objective** metric scores
for speech enhancement (e.g., STOI, PESQ, and SI-SDR).

Args:
    encoder (torch.nn.Module): Encoder module to transform 1D waveform to 2D feature representation.
    dprnn (torch.nn.Module): DPRNN module to model sequential feature.
    branches (torch.nn.ModuleList): Transformer branches in which each branch estimate one objective metirc score.
encoderdprnnbranchesc                 N   > [         [        U ]  5         Xl        X l        X0l        g rX   )r   r   r   r   r   r   )r   r   r   r   r    s       r   r   SquimObjective.__init__   s!     	nd,.
 r   r   r   c                 R   UR                   S:w  a  [        SUR                    S35      eU[        R                  " US-  SSS9S-  S-  -  nU R	                  U5      nU R                  U5      n/ nU R                   H'  nUR                  U" U5      R                  SS	95        M)     U$ )
z
Args:
    x (torch.Tensor): Input waveforms. Tensor with dimensions `(batch, time)`.

Returns:
    List(torch.Tensor): List of score Tenosrs. Each Tensor is with dimension `(batch,)`.
r   z/The input must be a 2D Tensor. Found dimension .r	   T)r@   keepdimg      ?   r?   )	ndim
ValueErrorr.   meanr   r   r   rl   squeeze)r   r   r%   scoresbranchs        r   r&   SquimObjective.forward   s     66Q;NqvvhVWXYYAqDa6#=BCll1ojjommFMM&+--!-45 $r   )r   r   r   )r)   r*   r+   r,   rF   r   Modulerf   r   r.   r/   r   r&   r0   r1   r2   s   @r   r   r      sZ    	!	! yy	! --		! $u||*<  r   r   ra   nheadmetricc           	         [         R                  " XU S-  SSS9n[        5       nUS:X  a^  [         R                  " [         R                  " X 5      [         R
                  " 5       [         R                  " U S5      [        5       5      nOUS:X  aa  [         R                  " [         R                  " X 5      [         R
                  " 5       [         R                  " U S5      [        [        S95      nOT[         R                  " [         R                  " X 5      [         R
                  " 5       [         R                  " U S5      5      n[         R                  " X4U5      $ )	aP  Create branch module after DPRNN model for predicting metric score.

Args:
    d_model (int): The number of expected features in the input.
    nhead (int): Number of heads in the multi-head attention model.
    metric (str): The metric name to predict.

Returns:
    (nn.Module): Returned module to predict corresponding metric score.
   r(   T)rN   rP   stoir	   pesq)r   )r   TransformerEncoderLayerr   rn   rT   rp   r   r   )ra   r   r   layer1layer2layer3s         r   _create_branchr      s     ''!S^bcFZFIIg'HHJIIgq!N	
 
6	IIg'HHJIIgq!9-	
 %'MM"))G2Mrxxz[][d[delno[p$q==00r   r6   r7   r_   r`   rK   rb   rc   c	           	          Uc  US-  n[        X5      n	[        XXVX'U5      n
[        R                  " [	        X#S5      [	        X#S5      [	        X#S5      /5      n[        XU5      $ )a  Build a custome :class:`torchaudio.prototype.models.SquimObjective` model.

Args:
    feat_dim (int, optional): The feature dimension after Encoder module.
    win_len (int): Kernel size in the Encoder module.
    d_model (int): The number of expected features in the input.
    nhead (int): Number of heads in the multi-head attention model.
    hidden_dim (int): Hidden dimension in the RNN layer of DPRNN.
    num_blocks (int): Number of DPRNN layers.
    rnn_type (str): Type of RNN in DPRNN. Valid options are ["RNN", "LSTM", "GRU"].
    chunk_size (int): Chunk size of input for DPRNN.
    chunk_stride (int or None, optional): Stride of chunk input for DPRNN.
r   r   r   sisdr)r4   r]   r   rf   r   r   )r6   r7   ra   r   r_   r`   rK   rb   rc   r   r   r   s               r   squim_objective_modelr     sn    0 !Qh(G(
gS_`E}}762762773	
H '(33r   c                  "    [        SSSSSSSSS9$ )zWBuild :class:`torchaudio.prototype.models.SquimObjective` model with default arguments.r   r   r   r   r   G   )r6   r7   ra   r   r_   r`   rK   rb   )r    r   r   squim_objective_baser   ;  s'     	 	r   rX   )r
   typingr   r   r   r.   torch.nnr   torch.nn.functional
functionalrB   r-   r   r   __annotations__r   r   r4   rI   r]   r   r   rG   r[   modulesr   r   r   r   r   r   <module>r      sa    ( (    Ju J J  C 	"	5 	299 	bii 6		 4]BII ]@
ryy 
%RYY %P1C 1 1S 1RZZ=N=N 1R #'#4#4#4 #4 	#4
 #4 #4 #4 #4 3-#4 #4Ln r   