o
    dZh                     @  s   d dl mZ d dlmZmZmZ d dlmZ d dlm	Z	 er2d dl
Zd dlZd dlmZ d dlmZ G dd	 d	ed
dZG dd ded
dZG dd deZG dd deZG dd dZ		
	d#d$d!d"ZdS )%    )annotations)TYPE_CHECKINGLiteral	TypedDict)	AudioData)WhisperCompatibleRecognizerN)Unpack)Whisperc                   @  s&   e Zd ZU ded< ded< ded< dS )LoadModelOptionalParameterszstr | torch.deviceZdevicestrZdownload_rootboolZ	in_memoryN__name__
__module____qualname____annotations__ r   r   c/var/www/auris/lib/python3.10/site-packages/speech_recognition/recognizers/whisper_local/whisper.pyr
      s   
 r
   F)totalc                   @  s2   e Zd ZU dZded< ded< ded< ded	< d
S )TranscribeOptionalParametersz<Transcribe optional parameters & DecodingOptions parameters.zfloat | tuple[float, ...]temperaturez"Literal['transcribe', 'translate']taskr   languager   fp16N)r   r   r   __doc__r   r   r   r   r   r      s   
 r   c                   @  s^   e Zd ZU ded< ded< ded< ded< ded< d	ed
< ded< ded< ded< ded< dS )Segmentintidseekfloatstartendr   textz	list[int]tokensr   Zavg_logprobZcompression_ratioZno_speech_probNr   r   r   r   r   r   &   s   
 r   c                   @  s&   e Zd ZU ded< ded< ded< dS )TranscribeOutputr   r"   zlist[Segment]segmentsr   Nr   r   r   r   r   r$   3   s   
 r$   c                   @  s    e Zd ZdddZdd
dZdS )TranscribableAdaptermodelr	   returnNonec                 C  s
   || _ d S )N)r'   )selfr'   r   r   r   __init__:   s   
zTranscribableAdapter.__init__audio_array
np.ndarrayr$   c                 K  s2   d|vrdd l }|j |d< | jj|fi |S )Nr   r   )torchcudaZis_availabler'   
transcribe)r*   r,   kwargsr.   r   r   r   r0   =   s   zTranscribableAdapter.transcribeN)r'   r	   r(   r)   )r,   r-   r(   r$   )r   r   r   r+   r0   r   r   r   r   r&   9   s    
r&   base
audio_datar   r'   r   	show_dictr   load_options"LoadModelOptionalParameters | Nonetranscribe_options$Unpack[TranscribeOptionalParameters]r(   str | TranscribeOutputc           	      K  s@   ddl }|j|fi |pi }tt|}|j|fd|i|S )a  Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using Whisper.

    Pick ``model`` from output of :command:`python -c 'import whisper; print(whisper.available_models())'`.
    See also https://github.com/openai/whisper?tab=readme-ov-file#available-models-and-languages.

    If ``show_dict`` is true, returns the full dict response from Whisper, including the detected language. Otherwise returns only the transcription.

    You can specify:

        * ``language``: recognition language, an uncapitalized full language name like "english" or "chinese". See the full language list at https://github.com/openai/whisper/blob/main/whisper/tokenizer.py

            * If not set, Whisper will automatically detect the language.

        * ``task``

            * If you want transcribe + **translate** to english, set ``task="translate"``.

    Other values are passed directly to whisper. See https://github.com/openai/whisper/blob/main/whisper/transcribe.py for all options.
    r   Nr4   )whisperZ
load_modelr   r&   	recognize)	Z
recognizerr3   r'   r4   r5   r7   r:   Zwhisper_modelZwhisper_recognizerr   r   r   r;   H   s   r;   )r2   FN)r3   r   r'   r   r4   r   r5   r6   r7   r8   r(   r9   )
__future__r   typingr   r   r   Zspeech_recognition.audior   Z1speech_recognition.recognizers.whisper_local.baser   numpynpr.   Ztyping_extensionsr   r:   r	   r
   r   r   r$   r&   r;   r   r   r   r   <module>   s$    