
    h'2                        S SK JrJrJrJrJr  S SKrS SKJr  SS/r	\\\
   \R                  \\\R                        \4   rS\l        S\S\\
   4S	 jrS\S\R                  4S
 jrS\S\\\R                        4S jrS\S\4S jrS\S\4S jrS\\   S\\\R                        4S jrS\\\R                        S\
S\R,                  S\\\R                        4S jrS\S\4S jrS\\   S\R                  S\
S\\R                  \R                  \R                  4   4S jrS\S\\   SS4S jr " S S\R6                  R8                  5      rg)    )CallableDictListOptionalTupleN)RNNT
HypothesisRNNTBeamSearchzHypothesis generated by RNN-T beam search decoder,
    represented as tuple of (tokens, prediction network output, prediction network state, score).
    hyporeturnc                     U S   $ Nr    r   s    V/var/www/auris/envauris/lib/python3.13/site-packages/torchaudio/models/rnnt_decoder.py_get_hypo_tokensr          7N    c                     U S   $ N   r   r   s    r   _get_hypo_predictor_outr      r   r   c                     U S   $ )N   r   r   s    r   _get_hypo_stater      r   r   c                     U S   $ )N   r   r   s    r   _get_hypo_scorer      r   r   c                     [        U S   5      $ r   )strr   s    r   _get_hypo_keyr!       s    tAw<r   hyposc                 f   / n[        [        [        U S   5      5      5       H  n/ n[        [        [        U S   5      U   5      5       HH  nUR                  [        R
                  " U  Vs/ s H  n[        U5      U   U   PM     sn5      5        MJ     UR                  U5        M     U$ s  snf r   )rangelenr   appendtorchcat)r"   statesibatched_state_componentsjr   s         r   _batch_stater-   $   s    ')F3uQx01279 s?584Q789A$++EII_d6e_dW[t7LQ7OPQ7R_d6e,fg :./	 3
 M 7fs   -B.r)   idxdevicec                     [         R                  " U/US9nU  VVs/ s H$  oD Vs/ s H  oUR                  SU5      PM     snPM&     snn$ s  snf s  snnf )Nr/   r   )r'   tensorindex_select)r)   r.   r/   
idx_tensorstate_tuplestates         r   _slice_stater7   .   sK    seF3J\bc\b[KHK5:.KH\bccHcs   	AA AAc                 H    [        U 5      [        [        U 5      5      S-   -  $ r   )r   r%   r   r   s    r   _default_hypo_sort_keyr9   3   s"    4 C(8(>$?!$CDDr   next_token_probs
beam_widthc                 L   [         R                  " U  Vs/ s H  n[        U5      PM     sn5      R                  S5      nXAS S 2S S24   -   nUR	                  S5      R                  U5      u  pgUR                  UR                  S   SS9nXuR                  S   -  n	XhU	4$ s  snf )Nr   trunc)rounding_mode)r'   r2   r   	unsqueezereshapetopkdivshape)
r"   r:   r;   hhypo_scoresnonblank_scoresnonblank_nbest_scoresnonblank_nbest_idxnonblank_nbest_hypo_idxnonblank_nbest_tokens
             r   _compute_updated_scoresrL   7   s    
 ,,EBEq 2EBCMMaPK!QV$<<O0?0G0G0K0P0PQ[0\-044_5J5J15M]d4e-0E0Ea0HH ;OOO  Cs   B!	hypo_listc                 d    [        U5       H!  u  p#[        U 5      [        U5      :X  d  M  X	   g    g N)	enumerater!   )r   rM   r*   elems       r   _remove_hyporR   D   s-    Y'-"55 (r   c                   V  ^  \ rS rSrSr   S%S\S\S\S\\	\
/\4      S\S	S4U 4S
 jjjrS\R                  S	\\
   4S jrS\R                   S\\
   S\R                  S	\R                   4S jrS\\
   S\\
   S\R                   S\\\
4   S	\\
   4
S jrS\\
   S\\
   S\R                   S\S\S\R                  S	\\
   4S jrS\\
   S\\   S\\   S\S\R                  S	\\
   4S jrS\R                   S\\\
      S\S	\\
   4S jrS\R                   S\R                   S\S	\\
   4S  jr\R2                  R4                    S&S\R                   S\R                   S\S!\\\\R                            S"\\\
      S	\\\
   \\\R                         4   4S# jj5       rS$rU =r$ )'r
   K   a  Beam search decoder for RNN-T model.

See Also:
    * :class:`torchaudio.pipelines.RNNTBundle`: ASR pipeline with pretrained model.

Args:
    model (RNNT): RNN-T model to use.
    blank (int): index of blank token in vocabulary.
    temperature (float, optional): temperature to apply to joint network output.
        Larger values yield more uniform samples. (Default: 1.0)
    hypo_sort_key (Callable[[Hypothesis], float] or None, optional): callable that computes a score
        for a given hypothesis to rank hypotheses by. If ``None``, defaults to callable that returns
        hypothesis score normalized by token sequence length. (Default: None)
    step_max_tokens (int, optional): maximum number of tokens to emit per input time step. (Default: 100)
Nmodelblanktemperaturehypo_sort_keystep_max_tokensr   c                 |   > [         TU ]  5         Xl        X l        X0l        Uc  [
        U l        OX@l        XPl        g rO   )super__init__rU   rV   rW   r9   rX   rY   )selfrU   rV   rW   rX   rY   	__class__s         r   r\   RNNTBeamSearch.__init__\   s:     	

& !7D!..r   r/   c                     U R                   nS n[        R                  " S/US9nU R                  R	                  [        R                  " U//US9XC5      u  pVnU/US   R                  5       US4nU/$ )Nr   r1   r   g        )rV   r'   r2   rU   predictdetach)	r]   r/   tokenr6   
one_tensorpred_out_
pred_state	init_hypos	            r   _init_b_hyposRNNTBeamSearch._init_b_hyposp   sz    

\\1#f5
"&**"4"4U\\E7)TZ5[]g"oZGQK 	
	 {r   enc_outr"   c                    [         R                  " S/US9n[         R                  " U Vs/ s H  n[        U5      PM     snSS9nU R                  R                  UUU[         R                  " S/[        U5      -  US95      u  n  n[         R                  R                  R                  XpR                  -  SS9nUS S 2SS4   $ s  snf )Nr   r1   r   )dimr   )r'   r2   stackr   rU   joinr%   nn
functionallog_softmaxrW   )	r]   rk   r"   r/   rd   rE   predictor_out
joined_outrf   s	            r   _gen_next_token_probs$RNNTBeamSearch._gen_next_token_probs~   s     \\1#f5
$OA%<Q%?$OUVW::??LL!s5z)&9	

Aq XX((44ZBRBR5RXY4Z
!Q'"" %Ps   Cb_hyposa_hyposr:   key_to_b_hypoc                 v   [        [        U5      5       H  nX%   n[        U5      X5S4   -   n[        U5      U;   aQ  U[        U5         n[	        X5        [        [        R                  " [        U5      5      R                  U5      5      n	O[        U5      n	[        U5      [        U5      [        U5      U	4nUR                  U5        X[        U5      '   M     [        R                  " U V
s/ s H  n
[        U
5      PM     sn
5      R                  5       u  pU Vs/ s H  oU   PM	     sn$ s  sn
f s  snf )Nr=   )r$   r%   r   r!   rR   floatr'   r2   	logaddexpr   r   r   r&   sort)r]   rw   rx   r:   ry   r*   h_aappend_blank_scoreh_bscorer   rf   
sorted_idxr.   s                 r   _gen_b_hyposRNNTBeamSearch._gen_b_hypos   s    s7|$A*C!0!58HB8O!OS!]2#M#$67S*ell?3+?@JJK]^_01 %',$	C NN303-,-! %" %Pod&;%PQVVX(23

33 &Q3s   0D1 D6tr;   c                    [        XU5      u  nnn	[        U5      U:  a  [        S5      * n
O[        X%*    5      n
/ n/ n/ n[	        U5       He  n[        X~   5      nX:  d  M  [        X   5      nUR                  UU   5        UR                  [        X   5      5        UR                  U5        Mg     U(       a  U R                  XXU5      nU$ / nU$ )Ninf)rL   r%   r{   r   r$   intr&   _gen_new_hypos)r]   rx   rw   r:   r   r;   r/   rH   rJ   rK   b_nbest_score
base_hypos
new_tokens
new_scoresr*   r   
a_hypo_idx	new_hyposs                     r   _gen_a_hyposRNNTBeamSearch._gen_a_hypos   s     $GzJ		
!#  w<*$"5\MM+GK,@AM')
 "
"$
z"A/23E$ !8!;<
!!'*"56!!#&:&=">?!!%( # ++JJSYZI  +-Ir   r   tokensscoresc           
         [         R                  " U Vs/ s H  of/PM     snUS9n[        U5      nU R                  R	                  U[         R                  " S/[        U5      -  US9U5      u  pn/ n[        U5       HG  u  p[        U5      X-   /-   nUR                  XU   R                  5       [        XU5      X=   45        MI     U$ s  snf )Nr1   r   )r'   r2   r-   rU   ra   r%   rP   r   r&   rb   r7   )r]   r   r   r   r   r/   rc   
tgt_tokensr)   re   rf   pred_statesr   r*   r~   r   s                   r   r   RNNTBeamSearch._gen_new_hypos   s     \\"?u7"?O
j)#'::#5#5LL!s:.v>$
 [
 ')	
+FA)#.&)<Jj1+*<*<*>[]c@dflfopq ,  #@s   Cr   c           	         UR                   S   nUR                  n/ nUc  U R                  U5      OUn[        U5       GH  nUn[        R
                  R                  [        [           / 5      n0 n	Sn
U(       az  U R                  US S 2XS-   24   Xe5      nUR                  5       nU R                  XvX5      nXR                  :X  a  O+U R                  UUUUUU5      nU(       a  U
S-  n
U(       a  Mz  [        R                  " U Vs/ s H  oR                  U5      PM     sn5      R!                  U5      u  pU Vs/ s H  oU   PM	     nnGM     U$ s  snf s  snf )Nr   r   )rD   r/   ri   r$   r'   jitannotater   r	   ru   cpur   rY   r   r2   rX   rB   )r]   rk   r   r;   n_time_stepsr/   rx   rw   r   ry   symbols_current_tr:   hyprf   r   r.   s                   r   _searchRNNTBeamSearch._search   s_    }}Q'$&04$$$V,$|$AGii((j)92>G35M !#'#=#=gaQRUl>SU\#e #3#7#7#9 ++G>N^$(<(<<++$ %*%# '& "LLW)UWc*<*<S*AW)UV[[\fgMA/9:zs|zG:G5 %8  *V:s   	E
?Einputlengthc                    UR                  5       S:w  a2  UR                  5       S:X  a  UR                  S   S:X  d  [        S5      eUR                  5       S:X  a  UR                  S5      nUR                  S:w  a  UR                  S:w  a  [        S5      eUR                  5       S:X  a  UR                  S5      nU R                  R                  X5      u  pEU R                  US	U5      $ )
a  Performs beam search for the given input sequence.

T: number of frames;
D: feature dimension of each frame.

Args:
    input (torch.Tensor): sequence of input frames, with shape (T, D) or (1, T, D).
    length (torch.Tensor): number of valid frames in input
        sequence, with shape () or (1,).
    beam_width (int): beam size to use during search.

Returns:
    List[Hypothesis]: top-``beam_width`` hypotheses found by beam search.
r   r   r   r   *input must be of shape (T, D) or (1, T, D)r   r   "length must be of shape () or (1,)N)rm   rD   
ValueErrorr@   rU   
transcriber   )r]   r   r   r;   rk   rf   s         r   forwardRNNTBeamSearch.forward  s     99;!UYY[A%5%++a.A:MIJJ99;!OOA&E<<2&,,$"6ABB::<1%%a(FZZ**59
||GT:66r   r6   
hypothesisc                    UR                  5       S:w  a2  UR                  5       S:X  a  UR                  S   S:X  d  [        S5      eUR                  5       S:X  a  UR                  S5      nUR                  S:w  a  UR                  S:w  a  [        S5      eUR                  5       S:X  a  UR                  S5      nU R                  R                  XU5      u  pgnU R                  XeU5      U4$ )	a  Performs beam search for the given input sequence in streaming mode.

T: number of frames;
D: feature dimension of each frame.

Args:
    input (torch.Tensor): sequence of input frames, with shape (T, D) or (1, T, D).
    length (torch.Tensor): number of valid frames in input
        sequence, with shape () or (1,).
    beam_width (int): beam size to use during search.
    state (List[List[torch.Tensor]] or None, optional): list of lists of tensors
        representing transcription network internal state generated in preceding
        invocation. (Default: ``None``)
    hypothesis (List[Hypothesis] or None): hypotheses from preceding invocation to seed
        search with. (Default: ``None``)

Returns:
    (List[Hypothesis], List[List[torch.Tensor]]):
        List[Hypothesis]
            top-``beam_width`` hypotheses found by beam search.
        List[List[torch.Tensor]]
            list of lists of tensors representing transcription network
            internal state generated in current invocation.
r   r   r   r   r   r   r   r   )rm   rD   r   r@   rU   transcribe_streamingr   )r]   r   r   r;   r6   r   rk   rf   s           r   inferRNNTBeamSearch.infer'  s    B 99;!UYY[A%5%++a.A:MIJJ99;!OOA&E<<2&,,$"6ABB::<1%%a(F JJ;;E5QE||G<eCCr   )rV   rX   rU   rY   rW   )g      ?Nd   )NN)__name__
__module____qualname____firstlineno____doc__r   r   r{   r   r   r	   r\   r'   r/   r   ri   Tensorru   r   r    r   r   r   r   r   r   exportr   r   __static_attributes____classcell__)r^   s   @r   r
   r
   K   s   ( !AE"// / 	/
  *u)< =>/ / 
/ /(ELL T*5E #||#,0,<#FKll#	#4j!4 j!4  ,,	4
 CO,4 
j	46$j!$ j!$  ,,	$
 $ $ $ 
j	$L$ S	 U	
   
j	*'' tJ'(' 	'
 
j	'R7U\\ 75<< 7S 7UYZdUe 78 YY 5915+D||+D +D 	+D
 T%,,/01+D T*-.+D 
tJd5<<&8!99	:+D +Dr   )typingr   r   r   r   r   r'   torchaudio.modelsr   __all__r   r   r{   r	   r   r   r   r   r   r    r!   r-   r/   r7   r9   rL   rR   rp   Moduler
   r   r   r   <module>r      s   8 8  " )
* 49ellDell1C,DeKL

 
: $s) *  * d5<<.@)A *  
 s Z( T$u||2D-E dd5<<01 d dU\\ dVZ[_`e`l`l[mVn d
E E E
P

Pll
P 
P 5<<u||34	
Pz d:.> 4 HDUXX__ HDr   