o
    dZh|                     @  s  d dl mZ d dlmZmZmZ d dlmZ d dlm	Z	 er4d dl
Zd dlmZ d dlmZ d dlmZ G d	d
 d
eZG dd dZG dd deddZG dd deddZ			d$d%d d!Zed"krd dlZd dlZe Zed# e Zej ej!Z"ede"Z#e$e# dS dS )&    )annotations)TYPE_CHECKINGLiteral	TypedDict)	AudioData)WhisperCompatibleRecognizerNWhisperModel)Segment)Unpackc                   @  s&   e Zd ZU ded< ded< ded< dS )TranscribeOutputstrtextzlist[Segment]segmentslanguageN__name__
__module____qualname____annotations__ r   r   j/var/www/auris/lib/python3.10/site-packages/speech_recognition/recognizers/whisper_local/faster_whisper.pyr      s   
 r   c                   @  s    e Zd ZdddZdd
dZdS )TranscribableAdaptermodelr	   returnNonec                 C  s
   || _ d S N)r   )selfr   r   r   r   __init__   s   
zTranscribableAdapter.__init__audio_array
np.ndarrayr   c                 K  s>   | j j|fi |\}}t|}ddd |D ||jdS )N c                 s  s    | ]}|j V  qd S r   )r   ).0segmentr   r   r   	<genexpr>!   s    z2TranscribableAdapter.transcribe.<locals>.<genexpr>)r   r   r   )r   
transcribelistjoinr   )r   r   kwargsZsegments_generatorinfor   r   r   r   r%      s   zTranscribableAdapter.transcribeN)r   r	   r   r   )r   r    r   r   )r   r   r   r   r%   r   r   r   r   r      s    
r   c                   @  s&   e Zd ZU ded< ded< ded< dS )InitOptionalParameterszLiteral['cpu', 'gpu', 'auto']Zdevicer   Zcompute_typeZdownload_rootNr   r   r   r   r   r*   '      
 r*   F)totalc                   @  s&   e Zd ZU ded< ded< ded< dS )TranscribeOptionalParametersr   r   z"Literal['transcribe', 'translate']taskintZ	beam_sizeNr   r   r   r   r   r-   /   r+   r-   base
audio_datar   r   r   	show_dictboolinit_optionsInitOptionalParameters | Nonetranscribe_options$Unpack[TranscribeOptionalParameters]r   str | TranscribeOutputc                 K  sB   ddl m} ||fi |pi }tt|}|j|fd|i|S )a  Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using Whisper.

    Pick ``model`` size (Same as Whisper).

    If ``show_dict`` is true, returns the detailed response from Whisper, including the detected language. Otherwise returns only the transcription.

    You can specify:

        * ``language``: recognition language, an uncapitalized 2 letters language name like "en" or "fr".

            * If not set, Faster Whisper will automatically detect the language.

        * ``task``

            * If you want transcribe + **translate** to english, set ``task="translate"``.

    Other values are passed directly to whisper. See https://github.com/SYSTRAN/faster-whisper/blob/master/faster_whisper/transcribe.py for all options.
    r   r   r2   )faster_whisperr	   r   r   	recognize)Z
recognizerr1   r   r2   r4   r6   r	   Zwhisper_recognizerr   r   r   r:   7   s   r:   __main__
audio_file)r0   FN)r1   r   r   r   r2   r3   r4   r5   r6   r7   r   r8   )%
__future__r   typingr   r   r   Zspeech_recognition.audior   Z1speech_recognition.recognizers.whisper_local.baser   numpynpr9   r	   Zfaster_whisper.transcriber
   Ztyping_extensionsr   r   r   r*   r-   r:   r   argparseZspeech_recognitionsrArgumentParserparseradd_argument
parse_argsargs	from_filer<   r1   Ztranscriptionprintr   r   r   r   <module>   s6    %

