
    /h                     p    S r SSKJrJr  SSKJrJrJr  SSKJ	r	  SSK
Jr   " S S\5      r " S S	\5      rg
)z
Tokenizer Interface
    )ABCabstractmethod)IteratorListTuple)
overridden)string_span_tokenizec                       \ rS rSrSr\S\S\\   4S j5       rS\S\	\
\\4      4S jrS\\   S\\\      4S jrS\\   S\	\\
\\4         4S	 jrS
rg)
TokenizerI   zz
A processing interface for tokenizing a string.
Subclasses must define ``tokenize()`` or ``tokenize_sents()`` (or both).
sreturnc                 b    [        U R                  5      (       a  U R                  U/5      S   $ g)z4
Return a tokenized copy of *s*.

:rtype: List[str]
r   N)r   tokenize_sentsselfr   s     I/var/www/auris/envauris/lib/python3.13/site-packages/nltk/tokenize/api.pytokenizeTokenizerI.tokenize   s2     d))**&&s+A.. +    c                     [        5       e)z
Identify the tokens using integer offsets ``(start_i, end_i)``,
where ``s[start_i:end_i]`` is the corresponding token.

:rtype: Iterator[Tuple[int, int]]
NotImplementedErrorr   s     r   span_tokenizeTokenizerI.span_tokenize$   s     "##r   stringsc                 N    U Vs/ s H  o R                  U5      PM     sn$ s  snf )z
Apply ``self.tokenize()`` to each element of ``strings``.  I.e.:

    return [self.tokenize(s) for s in strings]

:rtype: List[List[str]]
)r   r   r   r   s      r   r   TokenizerI.tokenize_sents-   s#     +22'Qa '222s   "c              #   V   #    U H  n[        U R                  U5      5      v   M!     g7f)z
Apply ``self.span_tokenize()`` to each element of ``strings``.  I.e.:

    return [self.span_tokenize(s) for s in strings]

:yield: List[Tuple[int, int]]
N)listr   r   s      r   span_tokenize_sentsTokenizerI.span_tokenize_sents7   s'      At))!,-- s   ') N)__name__
__module____qualname____firstlineno____doc__r   strr   r   r   r   intr   r   r"   __static_attributes__r$   r   r   r   r      s    
 /# /$s) / /$s $xc3h'@ $3d3i 3DcO 3.Cy.	$uS#X'	(.r   r   c                   >    \ rS rSrSr\\S 5       5       rS rS r	Sr
g)StringTokenizerE   zpA tokenizer that divides a string into substrings by splitting
on the specified string (defined in subclasses).
c                     [         eNr   )r   s    r   _stringStringTokenizer._stringJ   s
     "!r   c                 8    UR                  U R                  5      $ r1   )splitr2   r   s     r   r   StringTokenizer.tokenizeO   s    wwt||$$r   c              #   J   #    [        XR                  5       S h  vN   g  N7fr1   )r	   r2   r   s     r   r   StringTokenizer.span_tokenizeR   s     '<<888s   #!#r$   N)r%   r&   r'   r(   r)   propertyr   r2   r   r   r,   r$   r   r   r.   r.   E   s-     "  "%9r   r.   N)r)   abcr   r   typingr   r   r   nltk.internalsr   nltk.tokenize.utilr	   r   r.   r$   r   r   <module>r>      s4    $ ( ( % 3.. ..b9j 9r   