o
    Zh                     @   sj   d Z ddlZddlZddlmZmZ ddlmZ ddlm	Z	 ddl
mZ e	eZG d	d
 d
Zd
gZdS )zTokenization classes for RAG.    N)ListOptional   )BatchEncoding)logging   )	RagConfigc                   @   s   e Zd Zdd Zdd Zedd Zdd Zd	d
 Zdd Z	dd Z
dd Z						ddee deee  dee dee dedee dedefddZdS )RagTokenizerc                 C   s   || _ || _| j | _d S N)question_encoder	generatorcurrent_tokenizer)selfr   r    r   W/var/www/auris/lib/python3.10/site-packages/transformers/models/rag/tokenization_rag.py__init__   s   zRagTokenizer.__init__c                 C   sb   t j|rtd| dt j|dd t j|d}t j|d}| j| | j| d S )NzProvided path (z#) should be a directory, not a fileT)exist_okquestion_encoder_tokenizergenerator_tokenizer)	ospathisfile
ValueErrormakedirsjoinr   save_pretrainedr   )r   Zsave_directoryZquestion_encoder_pathZgenerator_pathr   r   r   r   #   s   zRagTokenizer.save_pretrainedc                 K   sZ   ddl m} |dd }|d u rt|}|j||jdd}|j||jdd}| ||dS )N   )AutoTokenizerconfigr   )r   Z	subfolderr   )r   r   )Zauto.tokenization_autor   popr   from_pretrainedr   r   )clsZpretrained_model_name_or_pathkwargsr   r   r   r   r   r   r   r    ,   s   
zRagTokenizer.from_pretrainedc                 O   s   | j |i |S r
   )r   r   argsr"   r   r   r   __call__>   s   zRagTokenizer.__call__c                 O      | j j|i |S r
   )r   batch_decoder#   r   r   r   r'   A      zRagTokenizer.batch_decodec                 O   r&   r
   )r   decoder#   r   r   r   r)   D   r(   zRagTokenizer.decodec                 C      | j | _d S r
   )r   r   r   r   r   r   _switch_to_input_modeG      z"RagTokenizer._switch_to_input_modec                 C   r*   r
   )r   r   r+   r   r   r   _switch_to_target_modeJ   r-   z#RagTokenizer._switch_to_target_modeNlongestT	src_texts	tgt_texts
max_lengthmax_target_lengthpaddingreturn_tensors
truncationreturnc              	   K   s   t dt |d u r| jj}| |fd||||d|}	|d u r"|	S |d u r*| jj}| d|d||||d|}
|
d |	d< |	S )Nu4  `prepare_seq2seq_batch` is deprecated and will be removed in version 5 of 🤗 Transformers. Use the regular `__call__` method to prepare your inputs and the tokenizer under the `with_target_tokenizer` context manager to prepare your targets. See the documentation of your specific tokenizer for more detailsT)add_special_tokensr5   r2   r4   r6   )Ztext_targetr8   r5   r4   r2   r6   Z	input_idslabelsr   )warningswarnFutureWarningr   Zmodel_max_length)r   r0   r1   r2   r3   r4   r5   r6   r"   Zmodel_inputsr9   r   r   r   prepare_seq2seq_batchM   sB   		z"RagTokenizer.prepare_seq2seq_batch)NNNr/   NT)__name__
__module____qualname__r   r   classmethodr    r%   r'   r)   r,   r.   r   strr   intboolr   r=   r   r   r   r   r	      sB    	


r	   )__doc__r   r:   typingr   r   Ztokenization_utils_baser   utilsr   Zconfiguration_ragr   Z
get_loggerr>   loggerr	   __all__r   r   r   r   <module>   s   

_