o
    Zh                     @   s   d dl Z d dlmZmZmZmZ d dlZd dlm	Z	 ddl
mZ ddlmZmZ ddlmZ e r7d d	lmZ ed
dG dd dejjZdgZdS )    N)DictListOptionalUnion)pad_model_inputs   )keras)is_keras_nlp_availablerequires   )GPT2Tokenizer)BytePairTokenizer)Z	keras_nlp)backendsc                
       s   e Zd ZdZ		ddeeef dee dee dee f fddZ	e
d	efd
dZe
deeejf fddZe
dd Zdd Zddee fddZ  ZS )TFGPT2Tokenizera7  
    This is an in-graph tokenizer for GPT2. It should be initialized similarly to other tokenizers, using the
    `from_pretrained()` method. It can also be initialized with the `from_tokenizer()` method, which imports settings
    from an existing standard tokenizer object.

    In-graph tokenizers, unlike other Hugging Face tokenizers, are actually Keras layers and are designed to be run
    when the model is called, rather than during preprocessing. As a result, they have somewhat more limited options
    than standard tokenizer classes. They are most useful when you want to create an end-to-end model that goes
    straight from `tf.string` inputs to outputs.

    Args:
        vocab (Dict[str, int]): Vocabulary dict for Byte Pair Tokenizer
        merges (List[str]): Merges list for Byte Pair Tokenizer
    Nvocabmerges
max_lengthpad_token_idc                    s6   t    || _|| _|| _|| _t|||d| _d S )N)Zsequence_length)super__init__r   r   r   r   r   tf_tokenizer)selfr   r   r   r   	__class__ \/var/www/auris/lib/python3.10/site-packages/transformers/models/gpt2/tokenization_gpt2_tf.pyr   !   s   
zTFGPT2Tokenizer.__init__	tokenizerc                 O   s4   dd |j  D }| }| ||g|R i |S )ag  Creates TFGPT2Tokenizer from GPT2Tokenizer

        Args:
            tokenizer (GPT2Tokenizer)

        Examples:

        ```python
        from transformers import AutoTokenizer, TFGPT2Tokenizer

        tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
        tf_tokenizer = TFGPT2Tokenizer.from_tokenizer(tokenizer)
        ```
        c                 S   s   g | ]}d  |qS ) )join).0mr   r   r   
<listcomp>@   s    z2TFGPT2Tokenizer.from_tokenizer.<locals>.<listcomp>)Z	bpe_rankskeysZ	get_vocab)clsr   argskwargsr   r   r   r   r   from_tokenizer0   s   zTFGPT2Tokenizer.from_tokenizerpretrained_model_name_or_pathc                 O   s0   t j|g|R i |}| j|g|R i |S )a_  Creates TFGPT2Tokenizer from pretrained GPT2Tokenizer

        Args:
            pretrained_model_name_or_path (Union[str, os.PathLike]): Path to pretrained model

        Examples:

        ```python
        from transformers import TFGPT2Tokenizer

        tf_tokenizer = TFGPT2Tokenizer.from_pretrained("openai-community/gpt2")
        ```
        )r   from_pretrainedr&   )r#   r'   Zinit_inputsr%   r   r   r   r   r(   D   s   zTFGPT2Tokenizer.from_pretrainedc                 C   s   | di |S )zCreates TFGPT2Tokenizer from configurations

        Args:
            config (Dict): Dictionary with keys such as stated in `get_config`.
        Nr   r   )r#   configr   r   r   from_configV   s   zTFGPT2Tokenizer.from_configc                 C   s   | j | j| j| jdS )Nr   r   r   r   r+   )r   r   r   r   
get_config_   s
   zTFGPT2Tokenizer.get_configc                 C   sV   |  |}t|}| jd ur&|d ur|n| j}|d ur&t||| jd\}}||dS )N)Zmax_seq_lengthZ	pad_value)attention_mask	input_ids)r   tfZ	ones_liker   r   r   )r   xr   r.   r-   r   r   r   callg   s   




zTFGPT2Tokenizer.call)NN)N)__name__
__module____qualname____doc__r   strintr   r   r   classmethodr   r&   r   osPathLiker(   r*   r,   r1   __classcell__r   r   r   r   r      s*    

r   )r9   typingr   r   r   r   Z
tensorflowr/   Ztensorflow_textr   Zmodeling_tf_utilsr   Zutils.import_utilsr	   r
   Ztokenization_gpt2r   Zkeras_nlp.tokenizersr   ZlayersZLayerr   __all__r   r   r   r   <module>   s    
f