
    /h
                     t    S r SSKJr  SSKJr  SSKJr  S r " S S\5      r " S S	\5      r	 " S
 S\5      r
g)zSmoothing algorithms for language modeling.

According to Chen & Goodman 1995 these should work with both Backoff and
Interpolation.
    )methodcaller)	Smoothing)ConditionalFreqDistc                    ^ [        U [        5      (       a  [        S5      OS m[        U4S jU R	                  5        5       5      $ )zCount values that are greater than zero in a distribution.

Assumes distribution is either a mapping with counts as values or
an instance of `nltk.ConditionalFreqDist`.
Nc                     U $ N )counts    I/var/www/auris/envauris/lib/python3.13/site-packages/nltk/lm/smoothing.py<lambda>'_count_values_gt_zero.<locals>.<lambda>   s    5    c              3   D   >#    U  H  nT" U5      S :  d  M  Sv   M     g7f)r      Nr
   ).0dist_or_countas_counts     r   	<genexpr>(_count_values_gt_zero.<locals>.<genexpr>   s$      4m8ORS8S4s    	 )
isinstancer   r   sumvalues)distributionr   s    @r   _count_values_gt_zeror      sJ     l$788 	S    +224  r   c                   >   ^  \ rS rSrSrU 4S jrS rS rS rSr	U =r
$ )
WittenBell$   zWitten-Bell smoothing.c                 (   > [         TU ]  " X40 UD6  g r	   )super__init__)self
vocabularycounterkwargs	__class__s       r   r!   WittenBell.__init__'   s    77r   c                 t    U R                   U   R                  U5      nU R                  U5      nSU-
  U-  U4$ )Ng      ?)countsfreq_gammar"   wordcontextalphagammas        r   alpha_gammaWittenBell.alpha_gamma*   s=    G$))$/G$eu$e++r   c                 v    [        U R                  U   5      nX"U R                  U   R                  5       -   -  $ r	   )r   r)   r   r"   r.   n_pluss      r   r+   WittenBell._gamma/   s5    &t{{7';<$++g"6"8"8"::;;r   c                 L    U R                   R                  R                  U5      $ r	   r)   unigramsr*   r"   r-   s     r   unigram_scoreWittenBell.unigram_score3       {{##((..r   r
   __name__
__module____qualname____firstlineno____doc__r!   r1   r+   r;   __static_attributes____classcell__r&   s   @r   r   r   $   s     8,
</ /r   r   c                   B   ^  \ rS rSrSrSU 4S jjrS rS rS rSr	U =r
$ )	AbsoluteDiscounting7   z!Smoothing with absolute discount.c                 4   > [         TU ]  " X40 UD6  X0l        g r	   )r    r!   discount)r"   r#   r$   rK   r%   r&   s        r   r!   AbsoluteDiscounting.__init__:   s    77 r   c                     [        U R                  U   U   U R                  -
  S5      U R                  U   R                  5       -  nU R	                  U5      nX44$ )Nr   )maxr)   rK   r   r+   r,   s        r   r1   AbsoluteDiscounting.alpha_gamma>   sZ    G$T*T]]:A>kk'"$$&' 	 G$|r   c                     [        U R                  U   5      nU R                  U-  U R                  U   R                  5       -  $ r	   )r   r)   rK   r   r4   s      r   r+   AbsoluteDiscounting._gammaF   s;    &t{{7';<&$++g*>*@*@*BBBr   c                 L    U R                   R                  R                  U5      $ r	   r8   r:   s     r   r;   !AbsoluteDiscounting.unigram_scoreJ   r=   r   )rK   )g      ?r>   rF   s   @r   rH   rH   7   s     +!C/ /r   rH   c                   R   ^  \ rS rSrSrSU 4S jjrS rS r\" 5       4S jr	Sr
U =r$ )		KneserNeyN   a  Kneser-Ney Smoothing.

This is an extension of smoothing with a discount.

Resources:
- https://pages.ucsd.edu/~rlevy/lign256/winter2008/kneser_ney_mini_example.pdf
- https://www.youtube.com/watch?v=ody1ysUTD7o
- https://medium.com/@dennyc/a-simple-numerical-example-for-kneser-ney-smoothing-nlp-4600addf38b8
- https://www.cl.uni-heidelberg.de/courses/ss15/smt/scribe6.pdf
- https://www-i6.informatik.rwth-aachen.de/publications/download/951/Kneser-ICASSP-1995.pdf
c                 @   > [         TU ]  " X40 UD6  X@l        X0l        g r	   )r    r!   rK   _order)r"   r#   r$   orderrK   r%   r&   s         r   r!   KneserNey.__init__[   s    77 r   c                 0    U R                  U5      u  p#X#-  $ r	   )_continuation_counts)r"   r-   word_continuation_counttotal_counts       r   r;   KneserNey.unigram_score`   s    /3/H/H/N,&44r   c                    U R                   U   n[        U5      S-   U R                  :X  a  X1   UR                  5       4OU R	                  X5      u  pE[        X@R                  -
  S5      U-  nU R                  [        U5      -  U-  nXg4$ )Nr   g        )r)   lenrX   r   r\   rN   rK   r   )r"   r-   r.   prefix_countsr]   r^   r/   r0   s           r   r1   KneserNey.alpha_gammad   s    G, 7|a4;;.  -//"34**49 	-
 +mm;SAKO 5m DD{R|r   c                    ^ U4S jU R                   [        T5      S-      R                  5        5       nSu  pEU H$  nU[        Xa   S:  5      -  nU[	        U5      -  nM&     XE4$ )zCount continuations that end with context and word.

Continuations track unique ngram "types", regardless of how many
instances were observed for each "type".
This is different than raw ngram counts which track number of instances.
c              3   B   >#    U  H  u  pUS S T:X  d  M  Uv   M     g7f)r   Nr
   )r   prefix_ngramr)   r.   s      r   r   1KneserNey._continuation_counts.<locals>.<genexpr>v   s,      ,
(M$AB7* F(Ms   	   )r   r   r   )r)   ra   itemsintr   )r"   r-   r.    higher_order_ngrams_with_context#higher_order_ngrams_with_word_counttotalr)   s     `    r   r\   KneserNey._continuation_countso   su    ,
(,CL14D(E(K(K(M,
(
 6:2+6F/3v|a7G3HH/*622E 7 399r   )rX   rK   )g?)r?   r@   rA   rB   rC   r!   r;   r1   tupler\   rD   rE   rF   s   @r   rU   rU   N   s(    

5	 27 : :r   rU   N)rC   operatorr   nltk.lm.apir   nltk.probabilityr   r   r   rH   rU   r
   r   r   <module>rs      s>   
 " ! 0"/ /&/) /.1:	 1:r   