o
    rZh                     @   sd   d Z ddlmZmZ ddlmZmZmZ ddlm	Z	 ddl
mZ G dd deZG dd	 d	eZd
S )z
Tokenizer Interface
    )ABCabstractmethod)IteratorListTuple)
overridden)string_span_tokenizec                   @   s   e Zd ZdZededee fddZdedee	e
e
f  fddZdee deee  fd	d
Zdee deee	e
e
f   fddZdS )
TokenizerIz
    A processing interface for tokenizing a string.
    Subclasses must define ``tokenize()`` or ``tokenize_sents()`` (or both).
    sreturnc                 C   s   t | jr| |gd S dS )zL
        Return a tokenized copy of *s*.

        :rtype: List[str]
        r   N)r   tokenize_sentsselfr
    r   @/var/www/auris/lib/python3.10/site-packages/nltk/tokenize/api.pytokenize   s   
zTokenizerI.tokenizec                 C   s   t  )z
        Identify the tokens using integer offsets ``(start_i, end_i)``,
        where ``s[start_i:end_i]`` is the corresponding token.

        :rtype: Iterator[Tuple[int, int]]
        NotImplementedErrorr   r   r   r   span_tokenize$   s   zTokenizerI.span_tokenizestringsc                    s    fdd|D S )z
        Apply ``self.tokenize()`` to each element of ``strings``.  I.e.:

            return [self.tokenize(s) for s in strings]

        :rtype: List[List[str]]
        c                    s   g | ]}  |qS r   )r   ).0r
   r   r   r   
<listcomp>5   s    z-TokenizerI.tokenize_sents.<locals>.<listcomp>r   )r   r   r   r   r   r   -   s   zTokenizerI.tokenize_sentsc                 c   s     |D ]
}t | |V  qdS )z
        Apply ``self.span_tokenize()`` to each element of ``strings``.  I.e.:

            return [self.span_tokenize(s) for s in strings]

        :yield: List[Tuple[int, int]]
        N)listr   )r   r   r
   r   r   r   span_tokenize_sents7   s   
zTokenizerI.span_tokenize_sentsN)__name__
__module____qualname____doc__r   strr   r   r   r   intr   r   r   r   r   r   r   r	      s    		
r	   c                   @   s0   e Zd ZdZeedd Zdd Zdd ZdS )	StringTokenizerzxA tokenizer that divides a string into substrings by splitting
    on the specified string (defined in subclasses).
    c                 C   s   t Nr   r   r   r   r   _stringJ   s   zStringTokenizer._stringc                 C   s   | | jS r"   )splitr#   r   r   r   r   r   O   s   zStringTokenizer.tokenizec                 c   s    t || jE d H  d S r"   )r   r#   r   r   r   r   r   R   s   zStringTokenizer.span_tokenizeN)	r   r   r   r   propertyr   r#   r   r   r   r   r   r   r!   E   s    r!   N)r   abcr   r   typingr   r   r   Znltk.internalsr   Znltk.tokenize.utilr   r	   r!   r   r   r   r   <module>   s   1