
    h:              7          S SK r S SKJr  S SKrS SKrS SKJr  / SQr\R                  " \R                  " \R                  5      R                  5      rSrSrSrSrS	rS
r\\\\\/rS rS\S\4S jrS\S\S\S\S\4
S jrS\S\S\	S\R2                  S\S\4S jrS\S\S\	S\4S jrS\S\S\	S\	S \	S!\S"\	S\\\\\4   4S# jrS\S$\S\S\S\S\	S\S%\S\	S&\	S'\S"\	S\\\4   4S( jrS)\S*\S\4S+ jrS,S-S.S/S0S1S.S2S3S3S3S4S3S5\4S\S\	S\S&\	S\	S \	S\	S6\	S"\	S%\S'\S!\S\	S\S*\S\S\4"S7 jjrS8\	S\	4S9 jr S8\S\4S: jr!S;\	S\	4S< jr"S;\S\4S= jr#S>\	S?\	S@\	SA\	SB\	S;\S\4SC jr$S>\	S?\	SA\	SB\	S8\S\4SD jr%SE\SF\SG\	S@\	SA\	SH\	SI\	SB\	S\\\4   4SJ jr&S,S-S.S/S0S1S.S5SKS.SLS2S3S3S3S4S3S5S5S3S3SMSNS/\4S\S\	S\S&\	S\	S \	S\	SA\	SO\S@\	S6\	SP\S"\	S%\S'\S!\S\	S\S*\SQ\SR\SS\SI\	SH\	ST\	S\S\46SU jjr'SV\SP\S\4SW jr(SV\SX\	S\4SY jr)S,SZS-S.S/S0S1S.S5SKS[S.SLS2S3S3S3S4S3S5S5SMSNS/\4S\S\	SX\	S\S&\	S\	S \	S\	SA\	SO\S@\	SV\S6\	SP\S"\	S%\S'\S!\S\	S\S*\SQ\SI\	SH\	ST\	S\S\46S\ jjr*g)]    N)Tuple)Tensor)
get_mel_banksinverse_mel_scaleinverse_mel_scale_scalar	mel_scalemel_scale_scalarspectrogramfbankmfccvtln_warp_freqvtln_warp_mel_freqgMbP?hamminghanningpoveyrectangularblackmanc                 (    [         R                  XS9$ )Ndevicedtype)EPSILONtor   s     S/var/www/auris/envauris/lib/python3.13/site-packages/torchaudio/compliance/kaldi.py_get_epsilonr   #   s    ::V:11    xreturnc                 >    U S:X  a  S$ SU S-
  R                  5       -  $ )z6Returns the smallest power of 2 that is greater than xr         )
bit_length)r   s    r   _next_power_of_2r#   '   s%    Q15A!a%!3!3!555r   waveformwindow_sizewindow_shift
snip_edgesc                    U R                  5       S:X  d   eU R                  S5      nX R                  S5      -  U R                  S5      4nU(       a:  XA:  a*  [        R                  " SU R
                  U R                  S9$ SXA-
  U-  -   nOl[        R                  " U S/5      nXBS-  -   U-  nUS-  US-  -
  nUn	US:  a  Xx* S n
[        R                  " XU	4SS9n O[        R                  " X* S U	4SS9n Xa4nU R                  X5      $ )a  Given a waveform (1D tensor of size ``num_samples``), it returns a 2D tensor (m, ``window_size``)
representing how the window is shifted along the waveform. Each row is a frame.

Args:
    waveform (Tensor): Tensor of size ``num_samples``
    window_size (int): Frame length
    window_shift (int): Frame shift
    snip_edges (bool): If True, end effects will be handled by outputting only frames that completely fit
        in the file, and the number of frames depends on the frame_length.  If False, the number of frames
        depends only on the frame_shift, and we reflect the data at the ends.

Returns:
    Tensor: 2D tensor of size (m, ``window_size``) where each row is a frame
r    r   )r   r   r   r   r!   Ndim)
r+   sizestridetorchemptyr   r   flipcat
as_strided)r$   r%   r&   r'   num_samplesstridesmreversed_waveformpad	pad_rightpad_leftsizess               r   _get_stridedr;   ,   s    <<>Q--"Kooa00(//!2DEG$;;vX^^HOOTT[.<??A!JJx!5A-.<?Q!22%	7 )/Hyy(i!@aHH yy(45/9!=1EHEu..r   window_typeblackman_coeffr   r   c           	      J   U [         :X  a  [        R                  " USX4S9$ U [        :X  a  [        R                  " USSSX4S9$ U [
        :X  a%  [        R                  " USX4S9R                  S5      $ U [        :X  a  [        R                  " XUS9$ U [        :X  a|  S[        R                  -  US	-
  -  n[        R                  " XUS9nUS
[        R                  " XV-  5      -  -
  S
U-
  [        R                  " SU-  U-  5      -  -   R                  X4S9$ [        SU -   5      e)z6Returns a window function with the given type and sizeF)periodicr   r   gHzG?gq=
ףp?)r?   alphabetar   r   g333333?r   r!   r          ?zInvalid window type )HANNINGr.   hann_windowHAMMINGhamming_windowPOVEYpowRECTANGULARonesBLACKMANmathpiarangecosr   	Exception)r<   r%   r=   r   r   awindow_functions          r   _feature_window_functionrS   V   s    g  uVYY		##K%tRV_ess		  uVY]]^bcc		#zz+EBB		 K;?+,,{O EIIa1223^#uyyQ1H'IIJ "F"
(		) .<==r   strided_inputepsilonenergy_floorc           	      B   U R                   U R                  pC[        R                  " U R	                  S5      R                  S5      U5      R                  5       nUS:X  a  U$ [        R                  " U[        R                  " [        R                  " U5      X4S95      $ )z<Returns the log energy of size (m) for a strided_input (m,*)r!   r            r   )	r   r   r.   maxrH   sumlogtensorrL   )rT   rU   rV   r   r   
log_energys         r   _get_log_energyr^   t   sw    !((-*=*=E=,,Q/33A6@DDFJs99Zdhh|.DV!abbr   channelsample_frequencyframe_shiftframe_lengthround_to_power_of_twopreemphasis_coefficientc                    [        US5      nXR                  S5      :  d%   SR                  XR                  S5      5      5       eXSS24   n [        X#-  [        -  5      n[        X$-  [        -  5      nU(       a  [        U5      OUn	SUs=::  a  [        U 5      ::  d"  O   SR                  U[        U 5      5      5       eSU:  d   S5       eU	S-  S:X  d   S5       eSUs=::  a  S	::  d   S
5       e   S
5       eUS:  d   S5       eXX4$ )z'Gets the waveform and window propertiesr   zInvalid channel {} for size {}Nr!   z'choose a window size {} that is [2, {}]z%`window_shift` must be greater than 0zgthe padded `window_size` must be divisible by two. use `round_to_power_of_two` or change `frame_length`rX         ?z/`preemphasis_coefficient` must be between [0,1]z,`sample_frequency` must be greater than zero)rY   r,   formatintMILLISECONDS_TO_SECONDSr#   len)
r$   r_   r`   ra   rb   rc   rd   r&   r%   padded_window_sizes
             r   #_get_waveform_and_window_propertiesrl   }   s5    '1oG]]1%%i'G'N'NwXeXefgXh'ii%
#H'58OOPL&58OOPK:O)+6U`,s8}, .W.^.^S]/ , |DDD!Q& t& )0S0c2cc0c2cc0aO!OO;BBr   rk   
raw_energyditherremove_dc_offsetc                    U R                   U R                  p[        X5      n[        XX65      nU	S:w  a'  [        R
                  " UR                  XS9nUUU	-  -   nU
(       a)  [        R                  " USS9R                  S5      nUU-
  nU(       a  [        XU5      nUS:w  aW  [        R                  R                  R                  UR                  S5      SSS9R                  S5      nXUS	S	2S	S
24   -  -
  n[        XBX\U5      R                  S5      nUU-  nX:w  aN  X-
  n[        R                  R                  R                  UR                  S5      SU4SSS9R                  S5      nU(       d  [        XU5      nUW4$ )zGets a window and its log energy

Returns:
    (Tensor, Tensor): strided_input of size (m, ``padded_window_size``) and signal_log_energy of size (m)
rX   r   r    r*   r   )r    r   	replicate)modeNconstantrr   value)r   r   r   r;   r.   randnshapemean	unsqueezer^   nn
functionalr7   squeezerS   )r$   rk   r%   r&   r<   r=   r'   rm   rV   rn   ro   rd   r   r   rU   rT   
rand_gauss	row_meanssignal_log_energyoffset_strided_inputrR   padding_rights                         r   _get_windowr      s   & OOX^^E6)G !QM}[[!4!4VQ
%
V(;;JJ}!4>>qA	%	1 ,MLQ#%$xx2266}7N7Nq7QSY`k6ltt 
 &BVWXZ][]Z]W]B^(^^ /{afgqq	O "O3M (*8++//##A&M(:ST 0 

'!* 	
 +MLQ+++r   r\   subtract_meanc                 d    U(       a(  [         R                  " U SS9R                  S5      nX-
  n U $ )Nr   r*   )r.   ry   rz   )r\   r   	col_meanss      r   _subtract_column_meanr      s/     JJv1-77:	#Mr   gzG?rs   rX   rf   g      9@g      $@g
ףp=
?Tg     @@Fmin_durationc                    U R                   U R                  nn[        UU5      n[        XXX[U5      u  n nnn[	        U 5      X|-  :  a  [
        R                  " S5      $ [        U UUUUUUU	UUU
U5      u  nn[
        R                  R                  U5      n[
        R                  " UR                  5       R                  S5      U5      R                  5       nUUSS2S4'   [        UU5      nU$ )a&
  Create a spectrogram from a raw audio signal. This matches the input/output of Kaldi's
compute-spectrogram-feats.

Args:
    waveform (Tensor): Tensor of audio of size (c, n) where c is in the range [0,2)
    blackman_coeff (float, optional): Constant coefficient for generalized Blackman window. (Default: ``0.42``)
    channel (int, optional): Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right) (Default: ``-1``)
    dither (float, optional): Dithering constant (0.0 means no dither). If you turn this off, you should set
        the energy_floor option, e.g. to 1.0 or 0.1 (Default: ``0.0``)
    energy_floor (float, optional): Floor on energy (absolute, not relative) in Spectrogram computation.  Caution:
        this floor is applied to the zeroth component, representing the total signal energy.  The floor on the
        individual spectrogram elements is fixed at std::numeric_limits<float>::epsilon(). (Default: ``1.0``)
    frame_length (float, optional): Frame length in milliseconds (Default: ``25.0``)
    frame_shift (float, optional): Frame shift in milliseconds (Default: ``10.0``)
    min_duration (float, optional): Minimum duration of segments to process (in seconds). (Default: ``0.0``)
    preemphasis_coefficient (float, optional): Coefficient for use in signal preemphasis (Default: ``0.97``)
    raw_energy (bool, optional): If True, compute energy before preemphasis and windowing (Default: ``True``)
    remove_dc_offset (bool, optional): Subtract mean from waveform on each frame (Default: ``True``)
    round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
        to FFT. (Default: ``True``)
    sample_frequency (float, optional): Waveform data sample frequency (must match the waveform file, if
        specified there) (Default: ``16000.0``)
    snip_edges (bool, optional): If True, end effects will be handled by outputting only frames that completely fit
        in the file, and the number of frames depends on the frame_length.  If False, the number of frames
        depends only on the frame_shift, and we reflect the data at the ends. (Default: ``True``)
    subtract_mean (bool, optional): Subtract mean of each feature file [CMS]; not recommended to do
        it this way.  (Default: ``False``)
    window_type (str, optional): Type of window ('hamming'|'hanning'|'povey'|'rectangular'|'blackman')
     (Default: ``'povey'``)

Returns:
    Tensor: A spectrogram identical to what Kaldi would output. The shape is
    (m, ``padded_window_size // 2 + 1``) where m is calculated in _get_strided
r          @N)r   r   r   rl   rj   r.   r/   r   fftrfftrY   absrH   r[   r   )r$   r=   r_   rn   rV   rb   ra   r   rd   rm   ro   rc   r`   r'   r   r<   r   r   rU   r&   r%   rk   rT   r   r   power_spectrums                             r   r
   r
      s    h OOX^^EF65)G>a+,_v?;HlK); 8}|66{{1~'2($M$  ))..
'C YYswwy}}S17;??AN,N1a4*>=INr   mel_freqc                 @    S[         R                  " U S-  5      S-
  -  $ N     @     @rf   )rL   expr   s    r   r   r   >  s     DHHX./#566r   c                 4    SU S-  R                  5       S-
  -  $ r   )r   r   s    r   r   r   B  s     X&++-344r   freqc                 @    S[         R                  " SU S-  -   5      -  $ Nr   rf   r   )rL   r[   r   s    r   r	   r	   F  s    DHHS4%</000r   c                 4    SSU S-  -   R                  5       -  $ r   )r[   r   s    r   r   r   J  s    S4%<',,...r   vtln_low_cutoffvtln_high_cutofflow_freq	high_freqvtln_warp_factorc                 .   X:  d   S5       eX:  d   S5       eU [        SU5      -  nU[        SU5      -  nSU-  nX-  n	X-  n
Xb:  a  Xs:  d   eX-
  Xb-
  -  nX:-
  X7-
  -  n[        R                  " U5      n[        R                  " XR5      [        R
                  " XS5      -  n[        R                  " XV5      n[        R                  " XW5      n[        R                  " XW5      nX<UU   U-
  -  -   UU'   XU   -  UU'   X+X_   U-
  -  -   X'   X^   X'   U$ )a  This computes a VTLN warping function that is not the same as HTK's one,
but has similar inputs (this function has the advantage of never producing
empty bins).

This function computes a warp function F(freq), defined between low_freq
and high_freq inclusive, with the following properties:
    F(low_freq) == low_freq
    F(high_freq) == high_freq
The function is continuous and piecewise linear with two inflection
    points.
The lower inflection point (measured in terms of the unwarped
    frequency) is at frequency l, determined as described below.
The higher inflection point is at a frequency h, determined as
    described below.
If l <= f <= h, then F(f) = f/vtln_warp_factor.
If the higher inflection point (measured in terms of the unwarped
    frequency) is at h, then max(h, F(h)) == vtln_high_cutoff.
    Since (by the last point) F(h) == h/vtln_warp_factor, then
    max(h, h/vtln_warp_factor) == vtln_high_cutoff, so
    h = vtln_high_cutoff / max(1, 1/vtln_warp_factor).
      = vtln_high_cutoff * min(1, vtln_warp_factor).
If the lower inflection point (measured in terms of the unwarped
    frequency) is at l, then min(l, F(l)) == vtln_low_cutoff
    This implies that l = vtln_low_cutoff / min(1, 1/vtln_warp_factor)
                        = vtln_low_cutoff * max(1, vtln_warp_factor)
Args:
    vtln_low_cutoff (float): Lower frequency cutoffs for VTLN
    vtln_high_cutoff (float): Upper frequency cutoffs for VTLN
    low_freq (float): Lower frequency cutoffs in mel computation
    high_freq (float): Upper frequency cutoffs in mel computation
    vtln_warp_factor (float): Vtln warp factor
    freq (Tensor): given frequency in Hz

Returns:
    Tensor: Freq after vtln warp
z7be sure to set the vtln_low option higher than low_freqzFbe sure to set the vtln_high option lower than high_freq [or negative]rf   )rY   minr.   
empty_likeltgtge)r   r   r   r   r   r   lhscaleFlFh
scale_leftscale_rightresoutside_low_high_freqbefore_lbefore_hafter_hs                     r   r   r   N  s<   X %`'``%'q)qq'#c#344A3s$455A""E	B	B<AM))-AL1J >im4K


4
 C!HHT4uxx7PPxx Hxx HhhtG d7mi.GHHCLN*CMT^h-FGGCM!%!<CJr   c                 B    [        [        XX#U[        U5      5      5      $ )a  
Args:
    vtln_low_cutoff (float): Lower frequency cutoffs for VTLN
    vtln_high_cutoff (float): Upper frequency cutoffs for VTLN
    low_freq (float): Lower frequency cutoffs in mel computation
    high_freq (float): Upper frequency cutoffs in mel computation
    vtln_warp_factor (float): Vtln warp factor
    mel_freq (Tensor): Given frequency in Mel

Returns:
    Tensor: ``mel_freq`` after vtln warp
)r   r   r   )r   r   r   r   r   r   s         r   r   r     s*    ( xDTVghpVq	
 r   num_binswindow_length_paddedsample_freqvtln_low	vtln_highc                    U S:  d   S5       eUS-  S:X  d   eUS-  nSU-  n	US::  a  XI-  nSUs=::  a  U	:  a  O  OSUs=:  a  U	::  a  O  OX4:  d   SR                  X4U	5      5       eX!-  n
[        U5      n[        U5      nX-
  U S-   -  nUS:  a  Xi-  nUS	:X  d;  X5s=:  a  U:  a  O  OSUs=:  a  U:  a  O  OXV:  d   S
R                  XVX45      5       e[        R                  " U 5      R	                  S5      nXU-  -   nXS	-   U-  -   nXS-   U-  -   nUS	:w  a)  [        XVX4X5      n[        XVX4UU5      n[        XVX4UU5      n[        U5      n[        U
[        R                  " U5      -  5      R	                  S5      nUU-
  UU-
  -  nUU-
  UU-
  -  nUS	:X  aD  [        R                  " [        R                  " S5      [        R                  " UU5      5      nUU4$ [        R                  " U5      n[        R                  " UU5      [        R                  " UU5      -  n[        R                  " UU5      [        R                  " UU5      -  nUU   UU'   UU   UU'   UU4$ )z
Returns:
    (Tensor, Tensor): The tuple consists of ``bins`` (which is
    melbank of size (``num_bins``, ``num_fft_bins``)) and ``center_freqs`` (which is
    center frequencies of bins of size (``num_bins``)).
   zMust have at least 3 mel binsr!   r   rB   rX   zBBad values in options: low-freq {} and high-freq {} vs. nyquist {}r    rf   zXBad values in options: vtln-low {} and vtln-high {}, versus low-freq {} and high-freq {}r   )rg   r	   r.   rN   rz   r   r   r   rY   zerosr   
zeros_liker   ler   )r   r   r   r   r   r   r   r   num_fft_binsnyquistfft_bin_widthmel_low_freqmel_high_freqmel_freq_deltabinleft_mel
center_mel	right_melcenter_freqsmelup_slope
down_slopebinsup_idxdown_idxs                            r   r   r     s     a<888<!#q((('!+LKGC	 
	"7	"y)CG)C(J^qKRRS[hopq_  6M#H-L$Y/M $2x!|DN3	s"		(y	(sY/J/JQYQedkkX  ,,x
 
*
*1
-CN22Hsn <<Jc	^;;I3%h8P`k'XRbdno
&xHQaclm	$Z0L
MELL$>>
?
I
I!
LC h:#89Hc/i*&<=J3yyQ8Z)HI  )#x(588C+DD88C,uxxY/GG'V#H-Xr   g      4@   g     @g      Y@
htk_compatnum_mel_bins
use_energyuse_log_fbank	use_power	vtln_warpc                 f   U R                   U R                  nn[        XUXeX5      u  n nnn[        U 5      U
U-  :  a  [        R
                  " SUUS9$ [        U UUUUUUUUUUU5      u  nn [        R                  R                  U5      R                  5       n!U(       a  U!R                  S5      n![        UUUXUUU5      u  n"n#U"R                  UUS9n"[        R                  R                  R                  U"SSSS9n"[        R                   " U!U"R"                  5      n"U(       a/  [        R$                  " U"['        UU5      5      R)                  5       n"U(       aG  U R+                  S5      n U(       a  [        R,                  " U"U 4SS9n"O[        R,                  " U U"4SS9n"[/        U"U5      n"U"$ )	a  Create a fbank from a raw audio signal. This matches the input/output of Kaldi's
compute-fbank-feats.

Args:
    waveform (Tensor): Tensor of audio of size (c, n) where c is in the range [0,2)
    blackman_coeff (float, optional): Constant coefficient for generalized Blackman window. (Default: ``0.42``)
    channel (int, optional): Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right) (Default: ``-1``)
    dither (float, optional): Dithering constant (0.0 means no dither). If you turn this off, you should set
        the energy_floor option, e.g. to 1.0 or 0.1 (Default: ``0.0``)
    energy_floor (float, optional): Floor on energy (absolute, not relative) in Spectrogram computation.  Caution:
        this floor is applied to the zeroth component, representing the total signal energy.  The floor on the
        individual spectrogram elements is fixed at std::numeric_limits<float>::epsilon(). (Default: ``1.0``)
    frame_length (float, optional): Frame length in milliseconds (Default: ``25.0``)
    frame_shift (float, optional): Frame shift in milliseconds (Default: ``10.0``)
    high_freq (float, optional): High cutoff frequency for mel bins (if <= 0, offset from Nyquist)
     (Default: ``0.0``)
    htk_compat (bool, optional): If true, put energy last.  Warning: not sufficient to get HTK compatible features
     (need to change other parameters). (Default: ``False``)
    low_freq (float, optional): Low cutoff frequency for mel bins (Default: ``20.0``)
    min_duration (float, optional): Minimum duration of segments to process (in seconds). (Default: ``0.0``)
    num_mel_bins (int, optional): Number of triangular mel-frequency bins (Default: ``23``)
    preemphasis_coefficient (float, optional): Coefficient for use in signal preemphasis (Default: ``0.97``)
    raw_energy (bool, optional): If True, compute energy before preemphasis and windowing (Default: ``True``)
    remove_dc_offset (bool, optional): Subtract mean from waveform on each frame (Default: ``True``)
    round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
        to FFT. (Default: ``True``)
    sample_frequency (float, optional): Waveform data sample frequency (must match the waveform file, if
        specified there) (Default: ``16000.0``)
    snip_edges (bool, optional): If True, end effects will be handled by outputting only frames that completely fit
        in the file, and the number of frames depends on the frame_length.  If False, the number of frames
        depends only on the frame_shift, and we reflect the data at the ends. (Default: ``True``)
    subtract_mean (bool, optional): Subtract mean of each feature file [CMS]; not recommended to do
        it this way.  (Default: ``False``)
    use_energy (bool, optional): Add an extra dimension with energy to the FBANK output. (Default: ``False``)
    use_log_fbank (bool, optional):If true, produce log-filterbank, else produce linear. (Default: ``True``)
    use_power (bool, optional): If true, use power, else use magnitude. (Default: ``True``)
    vtln_high (float, optional): High inflection point in piecewise linear VTLN warping function (if
        negative, offset from high-mel-freq (Default: ``-500.0``)
    vtln_low (float, optional): Low inflection point in piecewise linear VTLN warping function (Default: ``100.0``)
    vtln_warp (float, optional): Vtln warp factor (only applicable if vtln_map not specified) (Default: ``1.0``)
    window_type (str, optional): Type of window ('hamming'|'hanning'|'povey'|'rectangular'|'blackman')
     (Default: ``'povey'``)

Returns:
    Tensor: A fbank identical to what Kaldi would output. The shape is (m, ``num_mel_bins + use_energy``)
    where m is calculated in _get_strided
r   r   r   )r   r    rt   ru   r    r*   )r   r   rl   rj   r.   r/   r   r   r   r   rH   r   r   r{   r|   r7   mmTrY   r   r[   rz   r1   r   )$r$   r=   r_   rn   rV   rb   ra   r   r   r   r   r   rd   rm   ro   rc   r`   r'   r   r   r   r   r   r   r   r<   r   r   r&   r%   rk   rT   r   spectrummel_energies_s$                                       r   r   r     s   V OOX^^EF>a+[H]?;HlK); 8}|&666{{1V599 (3($M$  yy~~m,002H<<$ $(*:HQY[dfoOL!  ??&?>L 88&&**<jXY*ZL 88Hlnn5Lyy|FE/JKOOQ -77: 99l4E%FANL 99&7%FANL(}ELr   num_cepsc                     [         R                  R                  XS5      n[        R                  " S[        U5      -  5      US S 2S4'   US S 2S U 24   nU$ )Northor    r   )
torchaudior|   
create_dctrL   sqrtfloat)r   r   
dct_matrixs      r   _get_dct_matrixr     sW     &&11,gVJ
 yyU<%8!89Jq!tAyyL)Jr   cepstral_lifterc                     [         R                  " U 5      nSSU-  [         R                  " [        R                  U-  U-  5      -  -   $ )Nrf   rB   )r.   rN   sinrL   rM   )r   r   is      r   _get_lifter_coeffsr     s=     	XA&477Q;3P)QQQQr   g      6@   c                 &   X::  d   SX4-  5       eU R                   U R                  nn[        S&0 SU _SU_SU_SU_SU_SU_SU_S	U_S
U	_SU
_SU_SU_SU_SU_SU_SU_SU_SU_SS_SU_SS_SS_SU_SU_SU_SU_6nU(       a0  USS2U	(       a  UOS4   n[        U	(       + 5      nUSS2UUU-   24   n[	        X5      R                  UUS 9nUR                  U5      nUS!:w  a-  [        X5      R                  S5      n UU R                  UUS"9-  nU(       a	  WUSS2S4'   U	(       aZ  USS2S4   R                  S#5      n!USS2S#S24   nU(       d  U![        R                  " S$5      -  n![        R                  " UU!4S#S%9n[        UU5      nU$ )'a  Create a mfcc from a raw audio signal. This matches the input/output of Kaldi's
compute-mfcc-feats.

Args:
    waveform (Tensor): Tensor of audio of size (c, n) where c is in the range [0,2)
    blackman_coeff (float, optional): Constant coefficient for generalized Blackman window. (Default: ``0.42``)
    cepstral_lifter (float, optional): Constant that controls scaling of MFCCs (Default: ``22.0``)
    channel (int, optional): Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right) (Default: ``-1``)
    dither (float, optional): Dithering constant (0.0 means no dither). If you turn this off, you should set
        the energy_floor option, e.g. to 1.0 or 0.1 (Default: ``0.0``)
    energy_floor (float, optional): Floor on energy (absolute, not relative) in Spectrogram computation.  Caution:
        this floor is applied to the zeroth component, representing the total signal energy.  The floor on the
        individual spectrogram elements is fixed at std::numeric_limits<float>::epsilon(). (Default: ``1.0``)
    frame_length (float, optional): Frame length in milliseconds (Default: ``25.0``)
    frame_shift (float, optional): Frame shift in milliseconds (Default: ``10.0``)
    high_freq (float, optional): High cutoff frequency for mel bins (if <= 0, offset from Nyquist)
     (Default: ``0.0``)
    htk_compat (bool, optional): If true, put energy last.  Warning: not sufficient to get HTK compatible
     features (need to change other parameters). (Default: ``False``)
    low_freq (float, optional): Low cutoff frequency for mel bins (Default: ``20.0``)
    num_ceps (int, optional): Number of cepstra in MFCC computation (including C0) (Default: ``13``)
    min_duration (float, optional): Minimum duration of segments to process (in seconds). (Default: ``0.0``)
    num_mel_bins (int, optional): Number of triangular mel-frequency bins (Default: ``23``)
    preemphasis_coefficient (float, optional): Coefficient for use in signal preemphasis (Default: ``0.97``)
    raw_energy (bool, optional): If True, compute energy before preemphasis and windowing (Default: ``True``)
    remove_dc_offset (bool, optional): Subtract mean from waveform on each frame (Default: ``True``)
    round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
        to FFT. (Default: ``True``)
    sample_frequency (float, optional): Waveform data sample frequency (must match the waveform file, if
        specified there) (Default: ``16000.0``)
    snip_edges (bool, optional): If True, end effects will be handled by outputting only frames that completely fit
        in the file, and the number of frames depends on the frame_length.  If False, the number of frames
        depends only on the frame_shift, and we reflect the data at the ends. (Default: ``True``)
    subtract_mean (bool, optional): Subtract mean of each feature file [CMS]; not recommended to do
        it this way.  (Default: ``False``)
    use_energy (bool, optional): Add an extra dimension with energy to the FBANK output. (Default: ``False``)
    vtln_high (float, optional): High inflection point in piecewise linear VTLN warping function (if
        negative, offset from high-mel-freq (Default: ``-500.0``)
    vtln_low (float, optional): Low inflection point in piecewise linear VTLN warping function (Default: ``100.0``)
    vtln_warp (float, optional): Vtln warp factor (only applicable if vtln_map not specified) (Default: ``1.0``)
    window_type (str, optional): Type of window ('hamming'|'hanning'|'povey'|'rectangular'|'blackman')
     (Default: ``"povey"``)

Returns:
    Tensor: A mfcc identical to what Kaldi would output. The shape is (m, ``num_ceps``)
    where m is calculated in _get_strided
z5num_ceps cannot be larger than num_mel_bins: %d vs %dr$   r=   r_   rn   rV   rb   ra   r   r   r   r   r   rd   rm   ro   rc   r`   r'   r   Fr   r   Tr   r   r   r   r<   Nr   r)   rX   r   r    r!   r*    )r   r   r   rh   r   r   matmulr   rz   rL   r   r.   r1   r   )"r$   r=   r   r_   rn   rV   rb   ra   r   r   r   r   r   r   rd   rm   ro   rc   r`   r'   r   r   r   r   r   r<   r   r   featurer   
mel_offsetr   lifter_coeffsenergys"                                     r   r   r     sY   V #w%\`h_w%ww#OOX^^EF
  %  	
 " "      " " !8  *  4!" *#$ %& '( )* +, -. /0 12 34  5G: #Az|q$HIZ(
!Z<*+DEEF !8;;%PV;WJ nnZ(G#*8EOOPQR=##6#?? )1A((+!QR%. diil"F))Wf-15#G];GNr   )+rL   typingr   r.   r   r   __all__r\   finfor   epsr   ri   rE   rC   rG   rI   rK   WINDOWSr   rh   r#   boolr;   strr   rS   r^   rl   r   r   r
   r   r   r	   r   r   r   r   r   r   r   r   r   r   r   <module>r      s        ,,u{{5;;/33
4  
GUK
:26 6 6
'/6 '/ '/3 '/TX '/]c '/T>>> > LL	>
 > ><c6 cF c% cTZ cCCC C 	C
 C  C #C 63S !C:?,?,?, ?, 	?,
 ?, ?, ?, ?, ?, ?, ?, #?, 66>?,D&  &  !%)!"&%!VVV V 	V
 V V V V #V V V  V V V V  !V" #Vr7u 7 75 56 515 1U 1/F /v /HHH H 	H
 H H HV 	
   6KKK K 	K
 K K K K 66>K` !%)!"&%5CCC C 	C
 C C C C C C C C #C C C   !C" #C$ %C& 'C( )C* +C, -C. /C0 1C2 3C4 5C6 7CL
c 
 
 
R Ru R R !!%)!"&%5PPP P 	P
 P P P P P P P P P P #P  !P" #P$  %P& 'P( )P* +P, -P. /P0 1P2 3P4 5P6 7Pr   