
    /h(                     p    S r SSKJr  SSKJr  SSKJrJrJrJ	r	   " S S\5      r
S r\S:X  a  \" 5         g	g	)
a  
A classifier based on the Naive Bayes algorithm.  In order to find the
probability for a label, this algorithm first uses the Bayes rule to
express P(label|features) in terms of P(label) and P(features|label):

|                       P(label) * P(features|label)
|  P(label|features) = ------------------------------
|                              P(features)

The algorithm then makes the 'naive' assumption that all features are
independent, given the label:

|                       P(label) * P(f1|label) * ... * P(fn|label)
|  P(label|features) = --------------------------------------------
|                                         P(features)

Rather than computing P(features) explicitly, the algorithm just
calculates the numerator for each label, and normalizes them so they
sum to one:

|                       P(label) * P(f1|label) * ... * P(fn|label)
|  P(label|features) = --------------------------------------------
|                        SUM[l]( P(l) * P(f1|l) * ... * P(fn|l) )
    )defaultdict)ClassifierI)DictionaryProbDistELEProbDistFreqDistsum_logsc                   Z    \ rS rSrSrS rS rS rS rSS jr	SS jr
\\4S	 j5       rS
rg)NaiveBayesClassifier+   a  
A Naive Bayes classifier.  Naive Bayes classifiers are
paramaterized by two probability distributions:

  - P(label) gives the probability that an input will receive each
    label, given no information about the input's features.

  - P(fname=fval|label) gives the probability that a given feature
    (fname) will receive a given value (fval), given that the
    label (label).

If the classifier encounters an input with a feature that has
never been seen with any label, then rather than assigning a
probability of 0 to all labels, it will ignore that feature.

The feature value 'None' is reserved for unseen feature values;
you generally should not use 'None' as a feature value for one of
your own features.
c                 X    Xl         X l        [        UR                  5       5      U l        g)a  
:param label_probdist: P(label), the probability distribution
    over labels.  It is expressed as a ``ProbDistI`` whose
    samples are labels.  I.e., P(label) =
    ``label_probdist.prob(label)``.

:param feature_probdist: P(fname=fval|label), the probability
    distribution for feature values, given labels.  It is
    expressed as a dictionary whose keys are ``(label, fname)``
    pairs and whose values are ``ProbDistI`` objects over feature
    values.  I.e., P(fname=fval|label) =
    ``feature_probdist[label,fname].prob(fval)``.  If a given
    ``(label,fname)`` is not a key in ``feature_probdist``, then
    it is assumed that the corresponding P(fname=fval|label)
    is 0 for all values of ``fval``.
N)_label_probdist_feature_probdistlistsamples_labels)selflabel_probdistfeature_probdists      P/var/www/auris/envauris/lib/python3.13/site-packages/nltk/classify/naivebayes.py__init__NaiveBayesClassifier.__init__@   s%    "  .!1N2245    c                     U R                   $ N)r   )r   s    r   labelsNaiveBayesClassifier.labelsU   s    ||r   c                 @    U R                  U5      R                  5       $ r   )prob_classifymax)r   
featuresets     r   classifyNaiveBayesClassifier.classifyX   s    !!*-1133r   c                 ,   UR                  5       n[        UR                  5       5       H,  nU R                   H  nX24U R                  ;   d  M    M(     X	 M.     0 nU R                   H   nU R
                  R                  U5      XC'   M"     U R                   Ho  nUR                  5        HX  u  p%X24U R                  ;   a-  U R                  X24   nXC==   UR                  U5      -  ss'   MC  XC==   [        / 5      -  ss'   MZ     Mq     [        USSS9$ )NT)	normalizelog)
copyr   keysr   r   r   logprobitemsr   r   )r   r    fnamelabelr(   fvalfeature_probss          r   r   "NaiveBayesClassifier.prob_classify[   s      __&
*//+,E>T%;%;; &
 % - \\E!1199%@GN " \\E)//1>T%;%;;$($:$:5<$HMNm&;&;D&AAN
 Nhrl2N  2 " "'TtDDr   c                   ^^^^	 U R                   m[        S5        U R                  U5       H  u  mmUUU4S jm	[        UUU4S jU R                   5       U	4S jSS9n[        U5      S:X  a  MD  US   nUS	   nTUT4   R                  T5      S:X  a  S
nO0STUT4   R                  T5      TUT4   R                  T5      -  -  n[        T<S ST<S SSU-  S S <S SSU-  S S <S SU< S3
5        M     g )NzMost Informative Featuresc                 0   > TU T4   R                  T5      $ r   )prob)lcpdistr*   r,   s    r   	labelprobFNaiveBayesClassifier.show_most_informative_features.<locals>.labelprob   s    ah',,T22r   c              3   ^   >#    U  H"  nTTUT4   R                  5       ;   d  M  Uv   M$     g 7fr   )r   ).0r2   r3   r*   r,   s     r   	<genexpr>FNaiveBayesClassifier.show_most_informative_features.<locals>.<genexpr>   s,     OLqDF1e84D4L4L4N,NLs   -	-c                    > T" U 5      * U 4$ r    )elementr4   s    r   <lambda>ENaiveBayesClassifier.show_most_informative_features.<locals>.<lambda>   s    i&8%8'$Br   T)keyreverse   r   INFz%8.1fz>24z = 14 z%s   z>6z : 6z : 1.0)r   printmost_informative_featuressortedr   lenr1   )
r   nr   l0l1ratior3   r*   r,   r4   s
         @@@@r   show_most_informative_features3NaiveBayesClassifier.show_most_informative_features|   s    '')*99!<KE43 ODLLOBF
 6{aBBb%i %%d+q02u9%**406"e)3D3I3I$3OO $BQ$)Ra%I) =r   c                   ^	^
 [        U S5      (       a  U R                  SU $ [        5       n[        [        5      m	[        S 5      m
U R
                  R                  5        H  u  u  p4nUR                  5        Hh  nXF4nUR                  U5        UR                  U5      n[        UT	U   5      T	U'   [        UT
U   5      T
U'   T
U   S:X  d  MW  UR                  U5        Mj     M     [        UU	U
4S jS9U l        U R                  SU $ )a_  
Return a list of the 'most informative' features used by this
classifier.  For the purpose of this function, the
informativeness of a feature ``(fname,fval)`` is equal to the
highest value of P(fname=fval|label), for any label, divided by
the lowest value of P(fname=fval|label), for any label:

|  max[ P(fname=fval|label1) / P(fname=fval|label2) ]
_most_informative_featuresNc                      g)Ng      ?r;   r;   r   r   r=   @NaiveBayesClassifier.most_informative_features.<locals>.<lambda>   s    #r   r   c                 h   > TU    TU    -  U S   U S   S;   [        U S   5      R                  5       4$ )Nr   rA   )NFT)strlower)feature_maxprobminprobs    r   r=   rU      sC    H%(99QKQK#66$**,	&r   )r?   )hasattrrS   setr   floatr   r)   r   addr1   r   mindiscardrJ   )r   rL   featuresr+   r*   probdistr,   featureprZ   r[   s            @@r   rI   .NaiveBayesClassifier.most_informative_features   s    4566222A66 uH "%(G!+.G,0,B,B,H,H,J($,,.D$mGLL) d+A'*1gg.>'?GG$'*1gg.>'?GG$w'1, ((1 / -K /5/D+ ..r22r   c                 p   [        5       n[        [         5      n[        [        5      n[        5       nU H_  u  pxX8==   S-  ss'   UR                  5        H:  u  pXHU	4   U
==   S-  ss'   XY   R	                  U
5        UR	                  U	5        M<     Ma     U HT  nX8   nU HG  n	XHU	4   R                  5       nX-
  S:  d  M!  XHU	4   S==   X-
  -  ss'   XY   R	                  S5        MI     MV     U" U5      n0 nUR                  5        H   u  u  pnU" U[        XY   5      S9nUXU	4'   M"     U " X5      $ )zs
:param labeled_featuresets: A list of classified featuresets,
    i.e., a list of tuples ``(featureset, label)``.
rA   r   N)bins)r   r   r]   r)   r_   NrK   )clslabeled_featuresets	estimatorlabel_freqdistfeature_freqdistfeature_valuesfnamesr    r+   r*   r,   num_samplescountr   r   freqdistrc   s                    r   trainNaiveBayesClassifier.train   sU    "&x0$S) "5J!Q&!)//1 .t494%))$/

5!  2 "5 $E(/K(688: &*$E\248K<OO8")--d3   $ #>2 (8(>(>(@$NUH N4I0JKH-5E\* )A >44r   )r   r   r   rS   N)
   )d   )__name__
__module____qualname____firstlineno____doc__r   r   r!   r   rP   rI   classmethodr   rt   __static_attributes__r;   r   r   r
   r
   +   s?    (6*4EB<)3V 2= .5 .5r   r
   c                  \    SSK Jn   U " [        R                  5      nUR	                  5         g )Nr   )
names_demo)nltk.classify.utilr   r
   rt   rP   )r   
classifiers     r   demor      s"    -0667J--/r   __main__N)r|   collectionsr   nltk.classify.apir   nltk.probabilityr   r   r   r   r
   r   rx   r;   r   r   <module>r      s@   2 $ ) P PI5; I5b0 zF r   