
    h
                         S SK JrJr  S SKJrJr  S SKrS SKJr	  S SKJ
r
  S SKJr   " S S\5      r " S S	\5      rSS
\
S\\   S\4S jjr " S S\5      rS rS r " S S\5      rg)    )ABCabstractmethod)DictListN)Tensor)	TokenSpanc                   B    \ rS rSr\S\\   S\\\      4S j5       rSrg)
ITokenizer
   
transcriptreturnc                     g)zTokenize the given transcript (list of word)

.. note::

   The toranscript must be normalized.

Args:
    transcript (list of str): Transcript (list of word).

Returns:
    (list of int): List of token sequences
N )selfr   s     ^/var/www/auris/envauris/lib/python3.13/site-packages/torchaudio/pipelines/_wav2vec2/aligner.py__call__ITokenizer.__call__           r   N)	__name__
__module____qualname____firstlineno__r   r   strr   __static_attributes__r   r   r   r
   r
   
   s.    49 d3i  r   r
   c                   P    \ rS rSrS\\\4   4S jrS\\   S\\\      4S jr	Sr
g)		Tokenizer   
dictionaryc                     Xl         g Nr   )r   r   s     r   __init__Tokenizer.__init__   s    $r   r   r   c           	      z    U VVs/ s H!  o" Vs/ s H  o0R                   U   PM     snPM#     snn$ s  snf s  snnf r!   r"   )r   r   wordcs       r   r   Tokenizer.__call__   s3    ?IJztT2T#T2zJJ2Js   	7277r"   N)r   r   r   r   r   r   intr#   r   r   r   r   r   r   r   r      s8    %4S> %K49 Kd3i Kr   r   emissiontokensblankc                     U R                   nU R                  S5      n [        R                  " U/[        R                  US9n[
        R                  " XUS9u  pVUR                  5       nUS   US   peXV4$ )Nr   )dtypedevicer,   )r/   	unsqueezetorchtensorint32Fforced_alignexp)r*   r+   r,   r/   targetsaligned_tokensscoress          r   _align_emission_and_tokensr;   #   sj    __F!!!$HllF85;;vFG^^HUKNZZ\F+A.q	F!!r   c            	       L    \ rS rSr\S\S\\\      S\\\      4S j5       r	Sr
g)IAligner/   r*   r+   r   c                     g)a  Generate list of time-stamped token sequences

Args:
    emission (Tensor): Sequence of token probability distributions in log-domain.
        Shape: `(time, tokens)`.
    tokens (list of integer sequence): Tokenized transcript.
        Output from :py:class:`torchaudio.pipelines.Wav2Vec2FABundle.Tokenizer`.

Returns:
    (list of TokenSpan sequence): Tokens with time stamps and scores.
Nr   )r   r*   r+   s      r   r   IAligner.__call__0   r   r   r   N)r   r   r   r   r   r   r   r)   r   r   r   r   r   r   r=   r=   /   s:     d3i T$y/EZ  r   r=   c                     [        U 5      [        U5      :X  d   eSn/ nU H  nUR                  XX$-    5        X$-  nM     U$ )Nr   )lensumappend)list_lengthsiretls        r   
_unflattenrJ   ?   sM    u:W%%%	A
C

5QU#$	  Jr   c                 H    U  VVs/ s H  o  H  o"PM     M     snn$ s  snnf r!   r   )nested_listrE   items      r   _flattenrN   I   s"    )<kUedDeDk<<<s   c                   H    \ rS rSrS rS\S\\\      S\\\      4S jr	Sr
g)	AlignerM   c                     Xl         g r!   r0   )r   r,   s     r   r#   Aligner.__init__N   s    
r   r*   r+   r   c           	         UR                   S:w  a  [        SUR                   35      e[        U[	        U5      U R
                  5      u  p4[        R                  " X45      n[        XR Vs/ s H  n[        U5      PM     sn5      $ s  snf )N   z&The input emission must be 2D. Found: )
ndim
ValueErrorshaper;   rN   r,   r5   merge_tokensrJ   rB   )r   r*   r+   r9   r:   spanstss          r   r   Aligner.__call__Q   st    ==AEhnnEUVWW!;HhvFVX\XbXb!c~6%F!;Fb#b'F!;<<!;s   *B
r0   N)r   r   r   r   r#   r   r   r)   r   r   r   r   r   r   rP   rP   M   s2    = =d3i =T$y/EZ =r   rP   )r   )abcr   r   typingr   r   r2   torchaudio.functional
functionalr5   r   r   r
   r   r)   r;   r=   rJ   rN   rP   r   r   r   <module>ra      sq    #   !  + "K
 K	" 	"c 	"3 	"s  =
=h 
=r   