o
    Zh[                     @   sJ   d dl mZ d dlmZ d dlmZ d dlmZmZ dede	fddZ
d	S )
    )Path)Any)TikTokenConverter)TIKTOKEN_VOCAB_FILETOKENIZER_FILEencoding
output_dirc           	      C   s   t |}|jdd |d t }|t }t| }t| }zddlm} ddlm	} t
| tr6|| } || j| W n tyH   tdw t|| j| jd }|| d	S )
a  
    Converts given `tiktoken` encoding to `PretrainedTokenizerFast` and saves the configuration of converted tokenizer
    on disk.

    Args:
        encoding (`str` or `tiktoken.Encoding`):
            Tokenizer from `tiktoken` library. If `encoding` is `str`, the tokenizer will be loaded with
            `tiktoken.get_encoding(encoding)`.
        output_dir (`str`):
            Save path for converted tokenizer configuration file.
    T)exist_oktiktokenr   )get_encoding)dump_tiktoken_bpezY`tiktoken` is required to save a `tiktoken` file. Install it with `pip install tiktoken`.)Z
vocab_filepatternZadditional_special_tokensN)r   mkdirr   r   strabsoluter
   r   Ztiktoken.loadr   
isinstanceZ_mergeable_ranksImportError
ValueErrorr   Z_pat_strZ_special_tokensZ	convertedsave)	r   r   Z	save_fileZtokenizer_fileZsave_file_absoluteZoutput_file_absoluter   r   Z	tokenizer r   Q/var/www/auris/lib/python3.10/site-packages/transformers/integrations/tiktoken.pyconvert_tiktoken_to_fast   s*   

r   N)pathlibr   typingr   Z#transformers.convert_slow_tokenizerr   Z$transformers.tokenization_utils_fastr   r   r   r   r   r   r   r   <module>   s
    