o
    ZhZ
                     @   s6   d Z ddlmZ ddlmZmZmZ G dd dZdS )z Tokenization utils for RoFormer.    )List)NormalizedStringPreTokenizedStringnormalizersc                   @   s>   e Zd ZdddZdededee fddZd	efd
dZ	dS )JiebaPreTokenizerreturnNc                 C   sH   || _ tjddddd| _zdd l}W n ty   tdw || _d S )NFT)Z
clean_textZhandle_chinese_charsZstrip_accentsZ	lowercaser   zkYou need to install rjieba to use RoFormerTokenizer. See https://pypi.org/project/rjieba/ for installation.)vocabr   ZBertNormalizerrjiebaImportErrorjieba)selfr   r	    r   ^/var/www/auris/lib/python3.10/site-packages/transformers/models/roformer/tokenization_utils.py__init__   s   
zJiebaPreTokenizer.__init__inormalized_stringc                 C   s   g }| j jt|ddD ]4\}}}|| jv r ||||  q| j| }|D ]}|r?|t| }||||  |}q*q|S )NF)Zhmm)	r   tokenizestrr   appendr   Znormalize_strsplitlen)r   r   r   ZsplitstokenstartendZ
token_listr   r   r   jieba_split(   s   
zJiebaPreTokenizer.jieba_splitpretokc                 C   s   | | j d S )N)r   r   )r   r   r   r   r   pre_tokenizeC   s   zJiebaPreTokenizer.pre_tokenize)r   N)
__name__
__module____qualname__r   intr   r   r   r   r   r   r   r   r   r      s    
r   N)__doc__typingr   Z
tokenizersr   r   r   r   r   r   r   r   <module>   s   