o
    rZh                     @   sZ   d dl mZ d dlmZ d dlmZmZ ejZeedddddZ	de	_
d	d
 Zdd ZdS )    )partial)chain)
everygramspad_sequenceTz<s>z</s>)pad_leftZleft_pad_symbol	pad_rightZright_pad_symbolzPads both ends of a sentence to length specified by ngram order.

    Following convention <s> pads the start of sentence </s> pads its end.
    c                 C   s   t tt|| d| dS )zpHelper with some useful defaults.

    Applies pad_both_ends to sentence and follows it up with everygrams.
    nmax_len)r   listpad_both_ends)orderZsentence r   D/var/www/auris/lib/python3.10/site-packages/nltk/lm/preprocessing.pypadded_everygrams   s   r   c                    s.   t t d fdd|D tt|fS )a  Default preprocessing for a sequence of sentences.

    Creates two iterators:

    - sentences padded and turned into sequences of `nltk.util.everygrams`
    - sentences padded as above and chained together for a flat stream of words

    :param order: Largest ngram length produced by `everygrams`.
    :param text: Text to iterate over. Expected to be an iterable of sentences.
    :type text: Iterable[Iterable[str]]
    :return: iterator over text as ngrams, iterator over text as vocabulary data
    r   c                 3   s$    | ]}t t| d V  qdS )r
   N)r   r   ).0sentr   Z
padding_fnr   r   	<genexpr>1   s   " z,padded_everygram_pipeline.<locals>.<genexpr>)r   r   flattenmap)r   textr   r   r   padded_everygram_pipeline"   s   r   N)	functoolsr   	itertoolsr   Z	nltk.utilr   r   from_iterabler   r   __doc__r   r   r   r   r   r   <module>   s   