
    /h                     r    S SK Jr  S SKJr  S SKJrJr  \R                  r\" \SSSSS9r	S\	l
        S	 rS
 rg)    )partial)chain)
everygramspad_sequenceTz<s>z</s>)pad_leftleft_pad_symbol	pad_rightright_pad_symbolzPads both ends of a sentence to length specified by ngram order.

    Following convention <s> pads the start of sentence </s> pads its end.
    c           	      6    [        [        [        XS95      U S9$ )zhHelper with some useful defaults.

Applies pad_both_ends to sentence and follows it up with everygrams.
nmax_len)r   listpad_both_ends)ordersentences     M/var/www/auris/envauris/lib/python3.13/site-packages/nltk/lm/preprocessing.pypadded_everygramsr      s    
 d=;<eLL    c                 f   ^ ^ [        [        T S9mU U4S jU 5       [        [        TU5      5      4$ )a  Default preprocessing for a sequence of sentences.

Creates two iterators:

- sentences padded and turned into sequences of `nltk.util.everygrams`
- sentences padded as above and chained together for a flat stream of words

:param order: Largest ngram length produced by `everygrams`.
:param text: Text to iterate over. Expected to be an iterable of sentences.
:type text: Iterable[Iterable[str]]
:return: iterator over text as ngrams, iterator over text as vocabulary data
r   c              3   V   >#    U  H  n[        [        T" U5      5      TS 9v   M      g7f)r   N)r   r   ).0sentr   
padding_fns     r   	<genexpr>,padded_everygram_pipeline.<locals>.<genexpr>1   s"     LttDD)*E	:ts   &))r   r   flattenmap)r   textr   s   ` @r   padded_everygram_pipeliner!   "   s2     %0JLtLJ%& r   N)	functoolsr   	itertoolsr   	nltk.utilr   r   from_iterabler   r   __doc__r   r!    r   r   <module>r(      sI      .


 Mr   