
    /h                          S r SSKrSSKrSSKJrJr  SSKJr  SSKJr  SSK	J
r
  SSKJr  SSKJr   " S	 S
\S9rS rS rSS jr " S S\S9rg)zLanguage Model Interface.    N)ABCMetaabstractmethod)bisect)
accumulate)NgramCounter)	log_base2)
Vocabularyc                   >    \ rS rSrSrS r\S 5       r\S 5       rSr	g)	Smoothing   zNgram Smoothing Interface

Implements Chen & Goodman 1995's idea that all smoothing algorithms have
certain features in common. This should ideally allow smoothing algorithms to
work both with Backoff and Interpolation.
c                     Xl         X l        g)z
:param vocabulary: The Ngram vocabulary object.
:type vocabulary: nltk.lm.vocab.Vocabulary
:param counter: The counts of the vocabulary items.
:type counter: nltk.lm.counter.NgramCounter
N)vocabcounts)self
vocabularycounters      C/var/www/auris/envauris/lib/python3.13/site-packages/nltk/lm/api.py__init__Smoothing.__init__   s      
    c                     [        5       eNNotImplementedError)r   words     r   unigram_scoreSmoothing.unigram_score&       !##r   c                     [        5       er   r   r   r   contexts      r   alpha_gammaSmoothing.alpha_gamma*   r   r   )r   r   N)
__name__
__module____qualname____firstlineno____doc__r   r   r   r"   __static_attributes__ r   r   r   r      s4     $ $ $ $r   r   )	metaclassc                 0    [        U 5      [        U 5      -  $ )z0Return average (aka mean) for sequence of items.)sumlen)itemss    r   _meanr0   /   s    u:E
""r   c                 p    [        U [        R                  5      (       a  U $ [        R                  " U 5      $ r   )
isinstancerandomRandom)seed_or_generators    r   _random_generatorr6   4   s*    #V]]33  ==*++r   c                     U (       d  [        S5      e[        U 5      [        U5      :w  a  [        S5      e[        [        U5      5      nUS   nUR	                  5       nU [        X4U-  5         $ )zXLike random.choice, but with weights.

Heavily inspired by python 3.6 `random.choices`.
z"Can't choose from empty populationz3The number of weights does not match the population)
ValueErrorr.   listr   r3   r   )
populationweightsrandom_generatorcum_weightstotal	thresholds         r   _weighted_choicerA   :   sj    
 =>>
:#g,&NOOz'*+KOE '')If[)*;<==r   c                   p    \ rS rSrSrSS jrSS jrSS jr\SS j5       r	SS jr
S	 rS
 rS rSS jrSrg)LanguageModelI   zCABC for Language Models.

Cannot be directly instantiated itself.

Nc                     Xl         U(       aC  [        U[        5      (       d.  [        R                  " SU R
                  R                  < S3SS9  Uc
  [        5       OUU l        Uc  [        5       U l	        gUU l	        g)a  Creates new LanguageModel.

:param vocabulary: If provided, this vocabulary will be used instead
    of creating a new one when training.
:type vocabulary: `nltk.lm.Vocabulary` or None
:param counter: If provided, use this object to count ngrams.
:type counter: `nltk.lm.NgramCounter` or None
:param ngrams_fn: If given, defines how sentences in training text are turned to ngram
    sequences.
:type ngrams_fn: function or None
:param pad_fn: If given, defines how sentences in training text are padded.
:type pad_fn: function or None
z$The `vocabulary` argument passed to z- must be an instance of `nltk.lm.Vocabulary`.   )
stacklevelN)
orderr2   r	   warningswarn	__class__r$   r   r   r   )r   rH   r   r   s       r   r   LanguageModel.__init__P   si     
jZ@@MM6t~~7N7N6Q R? ?
 &0%7Z\Z
(/lnWr   c                    ^  T R                   (       d)  Uc  [        S5      eT R                   R                  U5        T R                  R                  U 4S jU 5       5        g)zUTrains the model on a text.

:param text: Training text as a sequence of sentences.

Nz:Cannot fit without a vocabulary or text to create it from.c              3   Z   >#    U  H   nTR                   R                  U5      v   M"     g 7fr   )r   lookup).0sentr   s     r   	<genexpr>$LanguageModel.fit.<locals>.<genexpr>t   s#     Dtt4::,,T22ts   (+)r   r9   updater   )r   textvocabulary_texts   `  r   fitLanguageModel.fith   sN     zz& P  JJo.DtDDr   c                     U R                  U R                  R                  U5      U(       a  U R                  R                  U5      5      $ S5      $ )zMasks out of vocab (OOV) words and computes their model score.

For model-specific logic of calculating scores, see the `unmasked_score`
method.
N)unmasked_scorer   rO   r    s      r   scoreLanguageModel.scorev   sI     ""JJd#7TZZ%6%6w%?
 	
PT
 	
r   c                     [        5       e)a  Score a word given some optional context.

Concrete models are expected to provide an implementation.
Note that this method does not mask its arguments with the OOV label.
Use the `score` method for that.

:param str word: Word for which we want the score
:param tuple(str) context: Context the word is in.
    If `None`, compute unigram score.
:param context: tuple(str) or None
:rtype: float
r   r    s      r   rZ   LanguageModel.unmasked_score   s     "##r   c                 6    [        U R                  X5      5      $ )zwEvaluate the log score of this word in this context.

The arguments are the same as for `score` and `unmasked_score`.

)r   r[   r    s      r   logscoreLanguageModel.logscore   s     D233r   c                 x    U(       a  U R                   [        U5      S-      U   $ U R                   R                  $ )zHelper method for retrieving counts for a given context.

Assumes context has been checked and oov words in it masked.
:type context: tuple(str) or None

   )r   r.   unigrams)r   r!   s     r   context_countsLanguageModel.context_counts   s7     7>DKKGq()'2	
CG;;CWCW	
r   c                 t    S[        U Vs/ s H  o R                  US   USS 5      PM     sn5      -  $ s  snf )a  Calculate cross-entropy of model for given evaluation text.

This implementation is based on the Shannon-McMillan-Breiman theorem,
as used and referenced by Dan Jurafsky and Jordan Boyd-Graber.

:param Iterable(tuple(str)) text_ngrams: A sequence of ngram tuples.
:rtype: float

r8   N)r0   r`   )r   text_ngramsngrams      r   entropyLanguageModel.entropy   s@     E?JK{e]]59eCRj1{K
 
 	
Ks   !5
c                 8    [        SU R                  U5      5      $ )z~Calculates the perplexity of the given text.

This is simply 2 ** cross-entropy for the text, so the arguments are the same.

g       @)powrj   )r   rh   s     r   
perplexityLanguageModel.perplexity   s     3[122r   c           	      |  ^ ^ Uc  / O
[        U5      n[        U5      nUS:X  a  [        U5      T R                  :  a  UT R                  * S-   S OUmT R	                  T R
                  R                  T5      5      nT(       aW  U(       dP  [        T5      S:  a  TSS O/ mT R	                  T R
                  R                  T5      5      nT(       a	  U(       d  MP  [        U5      n[        U[        UU 4S jU 5       5      U5      $ / n[        U5       H%  nUR                  T R                  SX&-   US95        M'     U$ )ah  Generate words from the model.

:param int num_words: How many words to generate. By default 1.
:param text_seed: Generation can be conditioned on preceding context.
:param random_seed: A random seed or an instance of `random.Random`. If provided,
    makes the random sampling part of generation reproducible.
:return: One (str) word or a list of words generated from model.

Examples:

>>> from nltk.lm import MLE
>>> lm = MLE(2)
>>> lm.fit([[("a", "b"), ("b", "c")]], vocabulary_text=['a', 'b', 'c'])
>>> lm.fit([[("a",), ("b",), ("c",)]])
>>> lm.generate(random_seed=3)
'a'
>>> lm.generate(text_seed=['a'])
'b'

Nrc   c              3   H   >#    U  H  nTR                  UT5      v   M     g 7fr   )r[   )rP   wr!   r   s     r   rR   )LanguageModel.generate.<locals>.<genexpr>   s     >gdjjG,,gs   ")	num_words	text_seedrandom_seed)r:   r6   r.   rH   re   r   rO   sortedrA   tuplerangeappendgenerate)	r   rt   ru   rv   r=   samples	generated_r!   s	   `       @r   r{   LanguageModel.generate   s3   * $+Bi	,[9> y>TZZ/ 4::+/+, 
 ))$***;*;G*DEG'),W)9'!"+r--djj.?.?.HI '' WoG#>g>>   	y!A'3 0   " r   )r   rH   r   )NNr   )rc   NN)r$   r%   r&   r'   r(   r   rW   r[   r   rZ   r`   re   rj   rn   r{   r)   r*   r   r   rC   rC   I   sE    E0E
 $ $4	

35r   rC   r   )r(   r3   rI   abcr   r   r   	itertoolsr   nltk.lm.counterr   nltk.lm.utilr   nltk.lm.vocabularyr	   r   r0   r6   rA   rC   r*   r   r   <module>r      sK        '    ( " )$' $6#
,>eg er   