
    /hP/                         S r SSKrSSKrSSKJr  SS jrS rS rS r	 " S S	5      r
S
 rS r\4S jr\4S jr0 qSS jrS rg)z0
Utility functions and classes for classifiers.
    N)LazyMapc                    ^  Uc"  U=(       a    [        US   [        [        45      nU(       a  U 4S jn[        X15      $ [        T U5      $ )an  
Use the ``LazyMap`` class to construct a lazy list-like
object that is analogous to ``map(feature_func, toks)``.  In
particular, if ``labeled=False``, then the returned list-like
object's values are equal to::

    [feature_func(tok) for tok in toks]

If ``labeled=True``, then the returned list-like object's values
are equal to::

    [(feature_func(tok), label) for (tok, label) in toks]

The primary purpose of this function is to avoid the memory
overhead involved in storing all the featuresets for every token
in a corpus.  Instead, these featuresets are constructed lazily,
as-needed.  The reduction in memory overhead can be especially
significant when the underlying list of tokens is itself lazy (as
is the case with many corpus readers).

:param feature_func: The function that will be applied to each
    token.  It should return a featureset -- i.e., a dict
    mapping feature names to feature values.
:param toks: The list of tokens to which ``feature_func`` should be
    applied.  If ``labeled=True``, then the list elements will be
    passed directly to ``feature_func()``.  If ``labeled=False``,
    then the list elements should be tuples ``(tok,label)``, and
    ``tok`` will be passed to ``feature_func()``.
:param labeled: If true, then ``toks`` contains labeled tokens --
    i.e., tuples of the form ``(tok, label)``.  (Default:
    auto-detect based on types.)
r   c                 $   > T" U S   5      U S   4$ )Nr       )labeled_tokenfeature_funcs    J/var/www/auris/envauris/lib/python3.13/site-packages/nltk/classify/util.py	lazy_func!apply_features.<locals>.lazy_funcA   s     q!12M!4DEE    )
isinstancetuplelistr   )r	   tokslabeledr   s   `   r
   apply_featuresr      sG    B =:d1gt}=	F y''|T**r   c                 N    [        U  VVs1 s H  u  pUiM	     snn5      $ s  snnf )a  
:return: A list of all labels that are attested in the given list
    of tokens.
:rtype: list of (immutable)
:param tokens: The list of classified tokens from which to extract
    labels.  A classified token has the form ``(token, label)``.
:type tokens: list
)r   )tokenstoklabels      r
   attested_labelsr   I   s#     F3FLS%F3443s   !
c                 "   U R                  U VVs/ s H  u  p#UPM	     snn5      n[        X5       VVVs/ s H  u  u  p#oUR                  U5      PM     nnnn[        R                  " [        U5      [        U5      -  5      $ s  snnf s  snnnf N)prob_classify_manyzipprobmathlogsumlen)
classifiergoldfslresultspdistlls          r
   log_likelihoodr)   U   sp    ++t,DtGRRt,DEG03D0B	C0B,Wbe**Q-0BB	C88CGc"g%&& -E	Cs
   B
B
c                     U R                  U VVs/ s H  u  p#UPM	     snn5      n[        X5       VVVs/ s H  u  u  p#oSU:H  PM     nnnnU(       a  [        U5      [        U5      -  $ gs  snnf s  snnnf )Nr   )classify_manyr   r    r!   )r"   r#   r$   r%   r&   rcorrects          r
   accuracyr.   [   sh    &&$'?$w$'?@G*-d*<=*<,7BAAv*<G=7|c'l** (@=s
   A,
A2c                   $    \ rS rSrSrS rS rSrg)CutoffCheckerd   z
A helper class that implements cutoff checks based on number of
iterations and log likelihood.

Accuracy cutoffs are also implemented, but they're almost never
a good idea to use.
c                     UR                  5       U l        SU;   a  [        US   5      * US'   SU;   a  [        US   5      US'   S U l        S U l        SU l        g )Nmin_llmin_lldeltar   )copycutoffsabsr(   acciter)selfr6   s     r
   __init__CutoffChecker.__init__m   s_    ||~w!$WX%6!7 7GHG#%()?%@GM"	r   c                    U R                   nU =R                  S-  sl        SU;   a  U R                  US   :  a  g[        R                  R                  R                  X5      n[        R                  " U5      (       a  gSU;   d  SU;   aK  SU;   a	  XCS   :  a  gSU;   a0  U R                  (       a  X@R                  -
  [        US   5      ::  a  gX@l        SU;   d  SU;   au  [        R                  R                  R                  X5      nSU;   a	  XSS   :  a  gSU;   a0  U R                  (       a  XPR                  -
  [        US   5      ::  a  gXPl
        gg )	Nr   max_iterTr3   r4   max_accmin_accdeltaF)r6   r9   nltkclassifyutilr)   r   isnanr(   r7   r8   )r:   r"   
train_toksr6   new_llnew_accs         r
   checkCutoffChecker.checkw   s)   ,,		Q	 TYY'*2E%E##22:J::fw-7":7"v1B'B(GGww&3w}/E+FFG>W#<mm((77
OGG#93E(E')HH(S1H-IIH $=r   )r8   r6   r9   r(   N)__name__
__module____qualname____firstlineno____doc__r;   rH   __static_attributes__r   r   r
   r0   r0   d   s    !r   r0   c                     0 nSUS'   U S   R                  5       US'   U S   R                  5       US'   S H@  nU R                  5       R                  U5      USU-  '   X R                  5       ;   US	U-  '   MB     U$ )
NTalwaysonr   
startswithendswithabcdefghijklmnopqrstuvwxyz	count(%s)has(%s)lowercountnamefeaturesletters      r
   names_demo_featuresr_      s    HHZ!!W]]_H\8>>+HZ.)-););F)Cv%&'-'=V#$ / Or   c                 p   0 nSUS'   U S   R                  5       S;   US'   U S   R                  5       S;   US'   S Hv  nU R                  5       R                  U5      US	U-  '   X R                  5       ;   US
U-  '   X S   R                  5       :H  USU-  '   X S   R                  5       :H  USU-  '   Mx     U$ )NTrQ   r   aeiouyzstartswith(vowel)rS   zendswith(vowel)rU   rV   rW   zstartswith(%s)zendswith(%s)rX   r[   s      r
   binary_names_demo_featuresrb      s    HHZ$(GMMOx$?H !"&r(.."2h">H.)-););F)Cv%&'-'=V#$.4Q.G!F*+,22hnn6F,F&()	 /
 Or   c           
      4   SS K nSSKJn  UR                  S5       Vs/ s H  oDS4PM     snUR                  S5       Vs/ s H  oDS4PM     sn-   nUR	                  S5        UR                  U5        US S nUSS	 n[        S
5        U " U VV	s/ s H  u  pU" U5      U	4PM     sn	n5      n
[        S5        [        X VV	s/ s H  u  pU" U5      U	4PM     sn	n5      n[        SU-  5         U VV	s/ s H  u  pU" U5      PM     nnn	U
R                  U5      n[        X}5       VVVs/ s H  u  u  pNoR                  U5      PM     nnnn[        S[        U5      [        U5      -  -  5        [        5         [        S5        [        [        X}5      5      S S  HB  u  u  nnnUS:X  a  SnOSn[        UXOR                  S5      UR                  S5      4-  5        MD     U
$ s  snf s  snf s  sn	nf s  sn	nf s  sn	nf s  snnnf ! [         a     U
$ f = f)Nr   namesmale.txtmale
female.txtfemale@ i  i|  Training classifier...Testing classifier...Accuracy: %6.4fAvg. log likelihood: %6.4fMUnseen Names      P(Male)  P(Female)
----------------------------------------     %-15s *%6.4f   %6.4f  %-15s  %6.4f  *%6.4f)randomnltk.corpusre   wordsseedshuffleprintr.   r   r   logprobr    r!   r   r   NotImplementedError)trainerr]   rs   re   r\   namelisttraintestngr"   r8   test_featuresetspdistsr#   r'   r(   genderfmts                      r
   
names_demor      s   ! ,1;;z+BC+B4v+BC%*[[%>G%>Tx%>G H
 KK
NN8UdOEDD 

"#?v8A;*?@J 

!"
:tDtVa!a 0tD
EC	
c
!"6:;dFQHQKd;../?@?B4?PQ?P&;|UmmD!?PQ*c"gD	.ABCAB%)#d*;%<Ra%@!NT6E..#zz&15::h3GHHI &A M D G @  E <Q   	sH   G& G+G0
G60H	 5G<	#H	 ,HBH	 <H	 	
HHc           
      j   SS K nSSKJn  UR                  S5      nUR                  S5      nUR	                  S5        UR                  U5        UR                  U5        [        XS S 5      n[        XSS US S -   5      nUSS	  Vs/ s H  oS
4PM     snUSS  Vs/ s H  oS4PM     sn-   n	UR                  U	5        [        S5        U " Xg5      n
[        S5        [        X VVs/ s H  u  pU" U5      U4PM     snn5      n[        SU-  5         U	 VVs/ s H  u  pU" U5      PM     nnnU
R                  U5      n[        X5       VVVs/ s H  u  u  nnnUR                  U5      PM     nnnn[        S[        U5      [        U	5      -  -  5        [        5         [        S5        [        X5      S S  HC  u  u  nnnUS
:X  a  SnOSn[        UUUR                  S
5      UR                  S5      4-  5        ME     U
$ s  snf s  snf s  snnf s  snnf s  snnnf ! [         a     U
$ f = f)Nr   rd   rf   rh   i	 i  i	  i  i
  Ti  Frk   rl   rm   rn   ro   rp   rq   rr   )rs   rt   re   ru   rv   rw   maprx   r.   r   r   ry   r    r!   r   rz   )r{   r]   rs   re   
male_namesfemale_namespositive	unlabeledr\   r~   r"   r   mr8   r   r   r#   r'   r(   is_maler   s                        r
   partial_names_demor      s=   !Z(J;;|,L
KK
NN:
NN<  8./H Hd3l4C6HHII &0T%:;%:T4L%:;".s3"7?"7$u"7? D NN4 

"#-J 

!"
:tDtVa!a 0tD
EC	
c
!"6:;dFQHQKd;../?@?B4?PQ?P&;|dUemmD!?PQ*c"gD	.ABCAB&)$&7&;"OT7U$..#uzz$/E1BCCD '< E < ?  E <Q   	sC   	HH*HH$ H0#H$ !H4BH$ H$ $
H21H2c           
      l   SS K nSSKJn  [        S5        U[        ;  a7  UR                  U5       Vs/ s H  ofUR                  S   4PM     sn[        U'   [        U   S S  nU[        U5      :  a  [        U5      n[        U VVs1 s H  u  phUiM	     snn5      n	[        SSR                  U	5      -   5        [        S5        UR                  S5        UR                  U5        US [        SU-  5       n
U[        SU-  5      U n[        S	5        U " U
 VVs/ s H  u  phU" U5      U4PM     snn5      n[        S
5        [        X VVs/ s H  u  phU" U5      U4PM     snn5      n[        SU-  5         U VVs/ s H  u  pcU" U5      PM     nnnUR                  U5      n[        X5       VVVs/ s H  u  u  nnnUR!                  U5      PM     nnnn[        S[#        U5      [        U5      -  -  5        U$ s  snf s  snnf s  snnf s  snnf s  snnf s  snnnf ! [$         a     U$ f = f)Nr   )sensevalzReading data...z
  Senses:  zSplitting into test & train...rj   g?rk   rl   rm   rn   )rs   rt   r   rx   _inst_cache	instancessensesr!   r   joinrv   rw   intr.   r   r   ry   r    rz   )r{   wordr]   r   rs   r   ir   r%   r   r}   r~   r"   r8   r   r   r\   r#   r'   r(   s                       r
   wsd_demor   "  s   $ 

;7?7I7I$7OP7O!!-7OPDD!!$I3y>	N9-9!19-.F	,&)
)* 

*+
KK
NN9nC!G%ESq\A&D 

"#?v8A;*?@J 

!"
:tDtVa!a 0tD
EC	
c
!"6:;dFQHQKd;../?@?B4?PQ?P&;|dUemmD!?PQ*c"gD	.ABC
 E Q . @  E <Q  	sG   HH
H
H;H%  H#H% 7!H'H% H% %
H32H3c                  d     [           g! [         a  n [        S5      n[        U5      U eSn A ff = f)z0
Checks whether the MEGAM binary is configured.
z\Please configure your megam binary first, e.g.
>>> nltk.config_megam('/usr/bin/local/megam')N)
_megam_bin	NameErrorstr)eerr_msgs     r
   check_megam_configr   P  s8    ( (<
  a'(s   	 
/*/r   )i  )rN   r   nltk.classify.utilrA   	nltk.utilr   r   r   r)   r.   r0   r_   rb   r   r   r   r   r   r   r   r
   <module>r      si      *+Z	5'4 4x
 "5 ,^ *= 5p +\(r   