o
    rZhP/                     @   s   d Z ddlZddlZddlmZ dddZdd Zdd	 Zd
d Z	G dd dZ
dd Zdd ZefddZefddZi adddZdd ZdS )z0
Utility functions and classes for classifiers.
    N)LazyMapc                    sB   |du r|ot |d ttf}|r fdd}t||S t |S )a  
    Use the ``LazyMap`` class to construct a lazy list-like
    object that is analogous to ``map(feature_func, toks)``.  In
    particular, if ``labeled=False``, then the returned list-like
    object's values are equal to::

        [feature_func(tok) for tok in toks]

    If ``labeled=True``, then the returned list-like object's values
    are equal to::

        [(feature_func(tok), label) for (tok, label) in toks]

    The primary purpose of this function is to avoid the memory
    overhead involved in storing all the featuresets for every token
    in a corpus.  Instead, these featuresets are constructed lazily,
    as-needed.  The reduction in memory overhead can be especially
    significant when the underlying list of tokens is itself lazy (as
    is the case with many corpus readers).

    :param feature_func: The function that will be applied to each
        token.  It should return a featureset -- i.e., a dict
        mapping feature names to feature values.
    :param toks: The list of tokens to which ``feature_func`` should be
        applied.  If ``labeled=True``, then the list elements will be
        passed directly to ``feature_func()``.  If ``labeled=False``,
        then the list elements should be tuples ``(tok,label)``, and
        ``tok`` will be passed to ``feature_func()``.
    :param labeled: If true, then ``toks`` contains labeled tokens --
        i.e., tuples of the form ``(tok, label)``.  (Default:
        auto-detect based on types.)
    Nr   c                    s    | d | d fS )Nr       )Zlabeled_tokenfeature_funcr   A/var/www/auris/lib/python3.10/site-packages/nltk/classify/util.py	lazy_funcA   s   z!apply_features.<locals>.lazy_func)
isinstancetuplelistr   )r   toksZlabeledr   r   r   r   apply_features   s   !

r   c                 C   s   t dd | D S )a!  
    :return: A list of all labels that are attested in the given list
        of tokens.
    :rtype: list of (immutable)
    :param tokens: The list of classified tokens from which to extract
        labels.  A classified token has the form ``(token, label)``.
    :type tokens: list
    c                 S      h | ]\}}|qS r   r   ).0toklabelr   r   r   	<setcomp>R       z"attested_labels.<locals>.<setcomp>)r
   )tokensr   r   r   attested_labelsI   s   	r   c                 C   s>   |  dd |D }dd t||D }tt|t| S )Nc                 S      g | ]\}}|qS r   r   r   fslr   r   r   
<listcomp>V   r   z"log_likelihood.<locals>.<listcomp>c                 S      g | ]\\}}}| |qS r   )prob)r   r   r   pdistr   r   r   r   W       )prob_classify_manyzipmathlogsumlen)
classifiergoldresultsllr   r   r   log_likelihoodU   s   r)   c                 C   s@   |  dd |D }dd t||D }|rt|t| S dS )Nc                 S   r   r   r   r   r   r   r   r   \   r   zaccuracy.<locals>.<listcomp>c                 S   s   g | ]
\\}}}||kqS r   r   )r   r   r   rr   r   r   r   ]       r   )Zclassify_manyr    r#   r$   )r%   r&   r'   Zcorrectr   r   r   accuracy[   s
   r,   c                   @   s    e Zd ZdZdd Zdd ZdS )CutoffCheckerz
    A helper class that implements cutoff checks based on number of
    iterations and log likelihood.

    Accuracy cutoffs are also implemented, but they're almost never
    a good idea to use.
    c                 C   sR   |  | _d|v rt|d  |d< d|v rt|d |d< d | _d | _d| _d S )Nmin_llmin_lldeltar   )copycutoffsabsr(   acciter)selfr1   r   r   r   __init__m   s   

zCutoffChecker.__init__c                 C   s  | j }|  jd7  _d|v r| j|d krdS tjj||}t|r&dS d|v s.d|v rQd|v r:||d kr:dS d|v rN| jrN|| j t	|d krNdS || _d|v sYd|v rtjj||}d|v rm||d krmdS d|v r| j
r|| j
 t	|d krdS || _
dS d S )	Nr   Zmax_iterTr.   r/   Zmax_accZmin_accdeltaF)r1   r4   nltkZclassifyutilr)   r!   isnanr(   r2   r3   )r5   r%   Z
train_toksr1   Znew_llZnew_accr   r   r   checkw   s8   
zCutoffChecker.checkN)__name__
__module____qualname____doc__r6   r:   r   r   r   r   r-   d   s    
r-   c                 C   sd   i }d|d< | d   |d< | d   |d< dD ]}|   ||d| < ||   v |d	| < q|S )
NTalwaysonr   
startswithendswithabcdefghijklmnopqrstuvwxyz	count(%s)has(%s)lowercountnamefeaturesletterr   r   r   names_demo_features   s   rM   c                 C   s   i }d|d< | d   dv |d< | d   dv |d< dD ]/}|   ||d	| < ||   v |d
| < || d   k|d| < || d   k|d| < q|S )NTr?   r   Zaeiouyzstartswith(vowel)rA   zendswith(vowel)rC   rD   rE   zstartswith(%s)zendswith(%s)rF   rI   r   r   r   binary_names_demo_features   s   rN   c                    sx  dd l }ddlm} dd |dD dd |dD  }|d || |d d	 }|d	d
 }td |  fdd|D }td t| fdd|D }td|  zX fdd|D }	||	}
dd t	||
D }tdt
|t|   t  td tt	||
d d D ]\\}}}|dkrd}nd}t|||d|df  qW |S  ty   Y |S w )Nr   namesc                 S      g | ]}|d fqS )maler   r   rJ   r   r   r   r      r   znames_demo.<locals>.<listcomp>male.txtc                 S   rQ   )femaler   rS   r   r   r   r          
female.txt@ i  i|  Training classifier...c                       g | ]
\}} ||fqS r   r   r   ngrK   r   r   r      r+   Testing classifier...c                    rZ   r   r   r[   r^   r   r   r      r+   Accuracy: %6.4fc                       g | ]\}} |qS r   r   r[   r^   r   r   r          c                 S   r   r   Zlogprobr   rJ   r&   r   r   r   r   r      r   Avg. log likelihood: %6.4fMUnseen Names      P(Male)  P(Female)
----------------------------------------   rR     %-15s *%6.4f   %6.4f  %-15s  %6.4f  *%6.4frU   )randomnltk.corpusrP   wordsseedshuffleprintr,   r   r    r#   r$   r   r   NotImplementedError)trainerrK   rj   rP   namelisttraintestr%   r3   test_featuresetspdistsr(   rJ   Zgenderr   fmtr   r^   r   
names_demo   s@   


" 
rx   c                    s  dd l }ddlm} |d}|d}|d || || t |d d }t |dd |d d  }d	d
 |dd D dd
 |dd D  }|| td | ||}	td t|	 fdd
|D }
td|
  zV fdd
|D }|		|}dd
 t
||D }tdt|t|   t  td t
||d d D ]\\}}}|dkrd}nd}t|||d|df  qW |	S  ty   Y |	S w )Nr   rO   rT   rW   i	 i  i	  i  c                 S   rQ   )Tr   rS   r   r   r   r      r   z&partial_names_demo.<locals>.<listcomp>i
  c                 S   rQ   )Fr   rS   r   r   r   r      rV   i  rY   r_   c                    rZ   r   r   r   r\   mr^   r   r   r     r+   r`   c                    ra   r   r   ry   r^   r   r   r     rb   c                 S   r   r   rc   rd   r   r   r   r     r   re   rf   rg   Trh   ri   F)rj   rk   rP   rl   rm   rn   mapro   r,   r   r    r#   r$   r   rp   )rq   rK   rj   rP   Z
male_namesZfemale_namesZpositiveZ	unlabeledrt   r%   r3   ru   rv   r(   rJ   Zis_maler   rw   r   r^   r   partial_names_demo   sH   








 
r|     c                    sv  dd l }ddlm} td |tvrdd ||D t|< t| d d  }|t|kr0t|}tdd |D }tdd	|  td
 |	d |
| |d td|  }|td| | }	td |  fdd|D }
td t|
 fdd|	D }td|  z' fdd|	D }|
|}dd t|	|D }tdt|t|	   W |
S  ty   Y |
S w )Nr   )sensevalzReading data...c                 S   s   g | ]	}||j d  fqS )r   )senses)r   ir   r   r   r   +  s    zwsd_demo.<locals>.<listcomp>c                 S   r   r   r   r   r   r   r   r   r   r   /  r   zwsd_demo.<locals>.<setcomp>z
  Senses:  zSplitting into test & train...rX   g?rY   c                    rZ   r   r   r   r^   r   r   r   ;  r+   r_   c                    rZ   r   r   r   r^   r   r   r   ?  r+   r`   c                    ra   r   r   )r   r   r\   r^   r   r   r   E  rb   c                 S   r   r   rc   rd   r   r   r   r   G  r   re   )rj   rk   r~   ro   _inst_cache	instancesr$   r   joinrm   rn   intr,   r   r    r#   rp   )rq   wordrK   r\   rj   r~   r   r   rs   rt   r%   r3   ru   rv   r(   r   r^   r   wsd_demo"  s<   


r   c               
   C   s6   zt  W dS  ty }  z	td}t|| d} ~ ww )z8
    Checks whether the MEGAM binary is configured.
    z\Please configure your megam binary first, e.g.
>>> nltk.config_megam('/usr/bin/local/megam')N)Z
_megam_bin	NameErrorstr)eerr_msgr   r   r   check_megam_configP  s   

r   )N)r}   )r>   r!   Znltk.classify.utilr7   Z	nltk.utilr   r   r   r)   r,   r-   rM   rN   rx   r|   r   r   r   r   r   r   r   <module>   s    
-	</8
.