o
    rZh                     @   s  d Z zddlZW n	 ey   Y nw ddlZddlZddlmZ ddlmZ ddl	m
Z
mZmZ ddlmZmZmZ ddlmZmZmZ ddlmZ dd	lmZ dd
lmZ dZG dd deZeZG dd dZG dd deZG dd deZ G dd de Z!G dd de Z"G dd deZ#	d5ddZ$dd Z%dd  Z&	d5d!d"Z'd#d$ Z(d%d& Z)	d6d'd(Z*G d)d* d*eZ+d+d, Z,d7d.d/Z-d0d1 Z.d2d3 Z/e0d4kre/  dS dS )8a  
A classifier model based on maximum entropy modeling framework.  This
framework considers all of the probability distributions that are
empirically consistent with the training data; and chooses the
distribution with the highest entropy.  A probability distribution is
"empirically consistent" with a set of training data if its estimated
frequency with which a class and a feature vector value co-occur is
equal to the actual frequency in the data.

Terminology: 'feature'
======================
The term *feature* is usually used to refer to some property of an
unlabeled token.  For example, when performing word sense
disambiguation, we might define a ``'prevword'`` feature whose value is
the word preceding the target word.  However, in the context of
maxent modeling, the term *feature* is typically used to refer to a
property of a "labeled" token.  In order to prevent confusion, we
will introduce two distinct terms to disambiguate these two different
concepts:

  - An "input-feature" is a property of an unlabeled token.
  - A "joint-feature" is a property of a labeled token.

In the rest of the ``nltk.classify`` module, the term "features" is
used to refer to what we will call "input-features" in this module.

In literature that describes and discusses maximum entropy models,
input-features are typically called "contexts", and joint-features
are simply referred to as "features".

Converting Input-Features to Joint-Features
-------------------------------------------
In maximum entropy models, joint-features are required to have numeric
values.  Typically, each input-feature ``input_feat`` is mapped to a
set of joint-features of the form:

|   joint_feat(token, label) = { 1 if input_feat(token) == feat_val
|                              {      and label == some_label
|                              {
|                              { 0 otherwise

For all values of ``feat_val`` and ``some_label``.  This mapping is
performed by classes that implement the ``MaxentFeatureEncodingI``
interface.
    N)defaultdict)ClassifierI)
call_megamparse_megam_weightswrite_megam_file)	call_tadmparse_tadm_weightswrite_tadm_file)CutoffCheckeraccuracylog_likelihood)gzip_open_unicode)DictionaryProbDist)OrderedDictz
epytext enc                   @   s   e Zd ZdZd ddZdd Zdd Zd	d
 Zdd Zdd Z	d!ddZ
d"ddZd#ddZdd Zg dZe					d$ddZdS )%MaxentClassifiera  
    A maximum entropy classifier (also known as a "conditional
    exponential classifier").  This classifier is parameterized by a
    set of "weights", which are used to combine the joint-features
    that are generated from a featureset by an "encoding".  In
    particular, the encoding maps each ``(featureset, label)`` pair to
    a vector.  The probability of each label is then computed using
    the following equation::

                                dotprod(weights, encode(fs,label))
      prob(fs|label) = ---------------------------------------------------
                       sum(dotprod(weights, encode(fs,l)) for l in labels)

    Where ``dotprod`` is the dot product::

      dotprod(a,b) = sum(x*y for (x,y) in zip(a,b))
    Tc                 C   s*   || _ || _|| _| t|ksJ dS )a{  
        Construct a new maxent classifier model.  Typically, new
        classifier models are created using the ``train()`` method.

        :type encoding: MaxentFeatureEncodingI
        :param encoding: An encoding that is used to convert the
            featuresets that are given to the ``classify`` method into
            joint-feature vectors, which are used by the maxent
            classifier model.

        :type weights: list of float
        :param weights:  The feature weight vector for this classifier.

        :type logarithmic: bool
        :param logarithmic: If false, then use non-logarithmic weights.
        N)	_encoding_weights_logarithmiclengthlen)selfencodingweightsZlogarithmic r   C/var/www/auris/lib/python3.10/site-packages/nltk/classify/maxent.py__init__a   s   zMaxentClassifier.__init__c                 C   s
   | j  S N)r   labelsr   r   r   r   r   x      
zMaxentClassifier.labelsc                 C   s    || _ | j t|ksJ dS )z
        Set the feature weight vector for this classifier.
        :param new_weights: The new feature weight vector.
        :type new_weights: list of float
        N)r   r   r   r   )r   Znew_weightsr   r   r   set_weights{   s   zMaxentClassifier.set_weightsc                 C      | j S )zg
        :return: The feature weight vector for this classifier.
        :rtype: list of float
        r   r   r   r   r   r      s   zMaxentClassifier.weightsc                 C   s   |  | S r   )prob_classifymax)r   
featuresetr   r   r   classify      zMaxentClassifier.classifyc           	      C   s   i }| j  D ]9}| j ||}| jr*d}|D ]\}}|| j| | 7 }q|||< qd}|D ]\}}|| j| | 9 }q.|||< qt|| jddS )Ng              ?T)log	normalize)r   r   encoder   r   r   )	r   r%   Z	prob_dictlabelfeature_vectortotalf_idf_valprodr   r   r   r#      s   

zMaxentClassifier.prob_classify   c                    s  d}dt |d  d }| t   jdd}|d| }td|d	d
d |D   tdd|d dt|     t	t
t|D ]e\}}j||}|jfdddd |D ]L\}	}
jrqj|	 |
 }nj|	 |
 }j|	}|dd }|d|
 7 }t|dkr|dd d }t|||d d |f  |  |7  < qbqJtdd|d dt|     td|d	fdd|D   td|d	 fdd|D   dS )z
        Print a table showing the effect of each of the features in
        the given feature set, and how they combine to determine the
        probabilities of each label for that featureset.
        2   z  %-   zs%s%8.3fTkeyreverseNz	  Feature c                 s   s$    | ]}d d| dd  V  qdS )z%8s%sN   r   .0lr   r   r   	<genexpr>   s   " z+MaxentClassifier.explain.<locals>.<genexpr>z  -   c                    s   t  j| d  S )Nr   absr   )Zfid__r   r   r   <lambda>   s    z*MaxentClassifier.explain.<locals>.<lambda> and label is r   z (%s)/   ,   z...    z  TOTAL:c                 3   s    | ]	}d  |  V  qdS z%8.3fNr   r;   )sumsr   r   r>      s    z  PROBS:c                 3   s    | ]
}d   | V  qdS rI   )probr;   )pdistr   r   r>      s    )strr#   sortedsamplesrK   printljustjoinr   r   int	enumerater   r+   sortr   r   describesplit)r   r%   columnsZdescr_widthTEMPLATEr   ir,   r-   r/   r0   Zscoredescrr   )rL   r   rJ   r   explain   sL   
   zMaxentClassifier.explain
   c                    sL   t  dr jd| S tttt j fdddd _ jd| S )zW
        Generates the ranked list of informative features from most to least.
        _most_informative_featuresNc                    s   t  j|  S r   rA   )fidr   r   r   rC      s    z<MaxentClassifier.most_informative_features.<locals>.<lambda>Tr5   )hasattrr^   rN   listranger   r   )r   nr   r   r   most_informative_features   s   

z*MaxentClassifier.most_informative_featuresallc                    sx     d}|dkr fdd|D }n|dkr  fdd|D }|d| D ]}t j| dd j|  q&dS )	z
        :param show: all, neg, or pos (for negative-only or positive-only)
        :type show: str
        :param n: The no. of top features
        :type n: int
        Nposc                    s   g | ]} j | d kr|qS r   r"   r<   r_   r   r   r   
<listcomp>       zCMaxentClassifier.show_most_informative_features.<locals>.<listcomp>negc                    s   g | ]} j | d k r|qS rg   r"   rh   r   r   r   ri      rj   z8.3frG   )rd   rP   r   r   rV   )r   rc   showZfidsr_   r   r   r   show_most_informative_features   s   
$z/MaxentClassifier.show_most_informative_featuresc                 C   s   dt | j | j f S )Nz:<ConditionalExponentialClassifier: %d labels, %d features>)r   r   r   r   r   r   r   r   __repr__   s   zMaxentClassifier.__repr__)ZGISZIISZMEGAMZTADMN   r   c           
      K   s   |du rd}|D ]}|dvrt d| q| }|dkr(t||||fi |S |dkr7t||||fi |S |dkrGt|||||fi |S |dkrf|}	||	d< ||	d	< ||	d
< ||	d< tj|fi |	S td| )a	  
        Train a new maxent classifier based on the given corpus of
        training samples.  This classifier will have its weights
        chosen to maximize entropy while remaining empirically
        consistent with the training corpus.

        :rtype: MaxentClassifier
        :return: The new maxent classifier

        :type train_toks: list
        :param train_toks: Training data, represented as a list of
            pairs, the first member of which is a featureset,
            and the second of which is a classification label.

        :type algorithm: str
        :param algorithm: A case-insensitive string, specifying which
            algorithm should be used to train the classifier.  The
            following algorithms are currently available.

            - Iterative Scaling Methods: Generalized Iterative Scaling (``'GIS'``),
              Improved Iterative Scaling (``'IIS'``)
            - External Libraries (requiring megam):
              LM-BFGS algorithm, with training performed by Megam (``'megam'``)

            The default algorithm is ``'IIS'``.

        :type trace: int
        :param trace: The level of diagnostic tracing output to produce.
            Higher values produce more verbose output.
        :type encoding: MaxentFeatureEncodingI
        :param encoding: A feature encoding, used to convert featuresets
            into feature vectors.  If none is specified, then a
            ``BinaryMaxentFeatureEncoding`` will be built based on the
            features that are attested in the training corpus.
        :type labels: list(str)
        :param labels: The set of possible labels.  If none is given, then
            the set of all labels attested in the training data will be
            used instead.
        :param gaussian_prior_sigma: The sigma value for a gaussian
            prior on model weights.  Currently, this is supported by
            ``megam``. For other algorithms, its value is ignored.
        :param cutoffs: Arguments specifying various conditions under
            which the training should be halted.  (Some of the cutoff
            conditions are not supported by some algorithms.)

            - ``max_iter=v``: Terminate after ``v`` iterations.
            - ``min_ll=v``: Terminate after the negative average
              log-likelihood drops under ``v``.
            - ``min_lldelta=v``: Terminate if a single iteration improves
              log likelihood by less than ``v``.
        NZiis)	max_iterZmin_llmin_lldeltaZmax_accZmin_accdeltacount_cutoffZnormexplicit	bernoullizUnexpected keyword arg %rZgisZmegamZtadmtracer   r   gaussian_prior_sigmazUnknown algorithm %s)	TypeErrorlower train_maxent_classifier_with_iis train_maxent_classifier_with_gis"train_maxent_classifier_with_megamTadmMaxentClassifiertrain
ValueError)
cls
train_toks	algorithmru   r   r   rv   cutoffsr6   kwargsr   r   r   r}      sB   >
zMaxentClassifier.train)T)r2   )r]   )r]   re   )Nro   NNr   )__name__
__module____qualname____doc__r   r   r    r   r&   r#   r\   rd   rm   rn   Z
ALGORITHMSclassmethodr}   r   r   r   r   r   N   s(    
	

,
r   c                   @   8   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d ZdS )MaxentFeatureEncodingIa  
    A mapping that converts a set of input-feature values to a vector
    of joint-feature values, given a label.  This conversion is
    necessary to translate featuresets into a format that can be used
    by maximum entropy models.

    The set of joint-features used by a given encoding is fixed, and
    each index in the generated joint-feature vectors corresponds to a
    single joint-feature.  The length of the generated joint-feature
    vectors is therefore constant (for a given encoding).

    Because the joint-feature vectors generated by
    ``MaxentFeatureEncodingI`` are typically very sparse, they are
    represented as a list of ``(index, value)`` tuples, specifying the
    value of each non-zero joint-feature.

    Feature encodings are generally created using the ``train()``
    method, which generates an appropriate encoding based on the
    input-feature values and labels that are present in a given
    corpus.
    c                 C      t  )aC  
        Given a (featureset, label) pair, return the corresponding
        vector of joint-feature values.  This vector is represented as
        a list of ``(index, value)`` tuples, specifying the value of
        each non-zero joint-feature.

        :type featureset: dict
        :rtype: list(tuple(int, int))
        NotImplementedErrorr   r%   r,   r   r   r   r+   {     
zMaxentFeatureEncodingI.encodec                 C   r   )z
        :return: The size of the fixed-length joint-feature vectors
            that are generated by this encoding.
        :rtype: int
        r   r   r   r   r   r        zMaxentFeatureEncodingI.lengthc                 C   r   )z
        :return: A list of the "known labels" -- i.e., all labels
            ``l`` such that ``self.encode(fs,l)`` can be a nonzero
            joint-feature vector for some value of ``fs``.
        :rtype: list
        r   r   r   r   r   r     s   zMaxentFeatureEncodingI.labelsc                 C   r   )z
        :return: A string describing the value of the joint-feature
            whose index in the generated feature vectors is ``fid``.
        :rtype: str
        r   r   r_   r   r   r   rV     r   zMaxentFeatureEncodingI.describec                 C   r   )ao  
        Construct and return new feature encoding, based on a given
        training corpus ``train_toks``.

        :type train_toks: list(tuple(dict, str))
        :param train_toks: Training data, represented as a list of
            pairs, the first member of which is a feature dictionary,
            and the second of which is a classification label.
        r   )r   r   r   r   r   r}     r   zMaxentFeatureEncodingI.trainN)	r   r   r   r   r+   r   r   rV   r}   r   r   r   r   r   d  s    	r   c                   @   r   )#FunctionBackedMaxentFeatureEncodingz
    A feature encoding that calls a user-supplied function to map a
    given featureset/label pair to a sparse joint-feature vector.
    c                 C   s   || _ || _|| _dS )ag  
        Construct a new feature encoding based on the given function.

        :type func: (callable)
        :param func: A function that takes two arguments, a featureset
             and a label, and returns the sparse joint feature vector
             that encodes them::

                 func(featureset, label) -> feature_vector

             This sparse joint feature vector (``feature_vector``) is a
             list of ``(index,value)`` tuples.

        :type length: int
        :param length: The size of the fixed-length joint-feature
            vectors that are generated by this encoding.

        :type labels: list
        :param labels: A list of the "known labels" for this
            encoding -- i.e., all labels ``l`` such that
            ``self.encode(fs,l)`` can be a nonzero joint-feature vector
            for some value of ``fs``.
        N)_length_func_labels)r   funcr   r   r   r   r   r     s   
z,FunctionBackedMaxentFeatureEncoding.__init__c                 C   s   |  ||S r   )r   r   r   r   r   r+     s   z*FunctionBackedMaxentFeatureEncoding.encodec                 C   r!   r   r   r   r   r   r   r        z*FunctionBackedMaxentFeatureEncoding.lengthc                 C   r!   r   r   r   r   r   r   r     r   z*FunctionBackedMaxentFeatureEncoding.labelsc                 C   s   dS )Nzno description availabler   r   r   r   r   rV     s   z,FunctionBackedMaxentFeatureEncoding.describeN)	r   r   r   r   r   r+   r   r   rV   r   r   r   r   r     s    r   c                   @   H   e Zd ZdZdddZdd Zdd Zd	d
 Zdd Ze	dddZ
dS )BinaryMaxentFeatureEncodinga  
    A feature encoding that generates vectors containing a binary
    joint-features of the form:

    |  joint_feat(fs, l) = { 1 if (fs[fname] == fval) and (l == label)
    |                      {
    |                      { 0 otherwise

    Where ``fname`` is the name of an input-feature, ``fval`` is a value
    for that input-feature, and ``label`` is a label.

    Typically, these features are constructed based on a training
    corpus, using the ``train()`` method.  This method will create one
    feature for each combination of ``fname``, ``fval``, and ``label``
    that occurs at least once in the training corpus.

    The ``unseen_features`` parameter can be used to add "unseen-value
    features", which are used whenever an input feature has a value
    that was not encountered in the training corpus.  These features
    have the form:

    |  joint_feat(fs, l) = { 1 if is_unseen(fname, fs[fname])
    |                      {      and l == label
    |                      {
    |                      { 0 otherwise

    Where ``is_unseen(fname, fval)`` is true if the encoding does not
    contain any joint features that are true when ``fs[fname]==fval``.

    The ``alwayson_features`` parameter can be used to add "always-on
    features", which have the form::

    |  joint_feat(fs, l) = { 1 if (l == label)
    |                      {
    |                      { 0 otherwise

    These always-on features allow the maxent model to directly model
    the prior probabilities of each label.
    Fc                       t | t tt|krtdt| _	 | _	 t| _	 d _		 d _
	 |rB fddt|D  _	  jt j	7  _|rbdd |D } fddt|D  _
  jt|7  _dS dS )a  
        :param labels: A list of the "known labels" for this encoding.

        :param mapping: A dictionary mapping from ``(fname,fval,label)``
            tuples to corresponding joint-feature indexes.  These
            indexes must be the set of integers from 0...len(mapping).
            If ``mapping[fname,fval,label]=id``, then
            ``self.encode(..., fname:fval, ..., label)[id]`` is 1;
            otherwise, it is 0.

        :param unseen_features: If true, then include unseen value
           features in the generated joint-feature vectors.

        :param alwayson_features: If true, then include always-on
           features in the generated joint-feature vectors.
        HMapping values must be exactly the set of integers from 0...len(mapping)Nc                       i | ]
\}}|| j  qS r   r   r<   rZ   r,   r   r   r   
<dictcomp>,      z8BinaryMaxentFeatureEncoding.__init__.<locals>.<dictcomp>c                 S      h | ]\}}}|qS r   r   r<   fnamefvalr,   r   r   r   	<setcomp>2      z7BinaryMaxentFeatureEncoding.__init__.<locals>.<setcomp>c                    r   r   r   r<   rZ   r   r   r   r   r   3      setvaluesrb   r   r~   ra   r   _mappingr   	_alwayson_unseenrT   r   r   mappingunseen_featuresalwayson_featuresfnamesr   r   r   r     0   


z$BinaryMaxentFeatureEncoding.__init__c                 C   s   g }|  D ]<\}}|||f| jv r || j|||f df q| jrB| jD ]}|||f| jv r2 nq&|| jv rB|| j| df q| jrU|| jv rU|| j| df |S NrH   )itemsr   appendr   r   r   r   r%   r,   r   r   r   label2r   r   r   r+   6  s   

z"BinaryMaxentFeatureEncoding.encodec                 C     t |ts	tdz| j W n! ty/   dgt| j | _| j D ]	\}}|| j|< q#Y nw |t| jk rI| j| \}}}| d|d|S | jri|| j	 v ri| j D ]\}}||krfd|   S qXd S | j
r|| j
	 v r| j
 D ]\}}||krd|   S qxd S tdNzdescribe() expected an intz==rD   zlabel is %rz%s is unseenzBad feature id
isinstancerS   rw   Z_inv_mappingAttributeErrorr   r   r   r   r   r   r~   r   r/   inforZ   r   r   r,   Zf_id2r   r   r   rV   Q  4   

z$BinaryMaxentFeatureEncoding.describec                 C   r!   r   r   r   r   r   r   r   j     z"BinaryMaxentFeatureEncoding.labelsc                 C   r!   r   r   r   r   r   r   r   n  r   z"BinaryMaxentFeatureEncoding.lengthr   Nc                 K   s   i }t  }tt}|D ]@\}}	|r|	|vrtd|	 ||	 | D ]&\}
}||
|f  d7  < ||
|f |krJ|
||	f|vrJt|||
||	f< q$q|du rR|}| ||fi |S )a  
        Construct and return new feature encoding, based on a given
        training corpus ``train_toks``.  See the class description
        ``BinaryMaxentFeatureEncoding`` for a description of the
        joint-features that will be included in this encoding.

        :type train_toks: list(tuple(dict, str))
        :param train_toks: Training data, represented as a list of
            pairs, the first member of which is a feature dictionary,
            and the second of which is a classification label.

        :type count_cutoff: int
        :param count_cutoff: A cutoff value that is used to discard
            rare joint-features.  If a joint-feature's value is 1
            fewer than ``count_cutoff`` times in the training corpus,
            then that joint-feature is not included in the generated
            encoding.

        :type labels: list
        :param labels: A list of labels that should be used by the
            classifier.  If not specified, then the set of labels
            attested in ``train_toks`` will be used.

        :param options: Extra parameters for the constructor, such as
            ``unseen_features`` and ``alwayson_features``.
        Unexpected label %srH   N)r   r   rS   r~   addr   r   r   r   rr   r   optionsr   Zseen_labelscounttokr,   r   r   r   r   r   r}   r  s"   
	z!BinaryMaxentFeatureEncoding.trainFFr   Nr   r   r   r   r   r+   rV   r   r   r   r}   r   r   r   r   r     s    
(1r   c                   @   s@   e Zd ZdZ	dddZedd Zdd	 Zd
d Zdd Z	dS )GISEncodinga  
    A binary feature encoding which adds one new joint-feature to the
    joint-features defined by ``BinaryMaxentFeatureEncoding``: a
    correction feature, whose value is chosen to ensure that the
    sparse vector always sums to a constant non-negative number.  This
    new feature is used to ensure two preconditions for the GIS
    training algorithm:

      - At least one feature vector index must be nonzero for every
        token.
      - The feature vector must sum to a constant non-negative number
        for every token.
    FNc                 C   s:   t | |||| |du rtdd |D d }|| _dS )a	  
        :param C: The correction constant.  The value of the correction
            feature is based on this value.  In particular, its value is
            ``C - sum([v for (f,v) in encoding])``.
        :seealso: ``BinaryMaxentFeatureEncoding.__init__``
        Nc                 S   r   r   r   r   r   r   r   r     r   z'GISEncoding.__init__.<locals>.<setcomp>rH   )r   r   r   _C)r   r   r   r   r   Cr   r   r   r     s   	

zGISEncoding.__init__c                 C   r!   )zOThe non-negative constant that all encoded feature vectors
        will sum to.)r   r   r   r   r   r     s   zGISEncoding.Cc                 C   sT   t | ||}t | }tdd |D }|| jkrtd||| j| f |S )Nc                 s       | ]\}}|V  qd S r   r   )r<   fvr   r   r   r>         z%GISEncoding.encode.<locals>.<genexpr>z&Correction feature is not high enough!)r   r+   r   sumr   r~   r   )r   r%   r,   r   Zbase_lengthr.   r   r   r   r+     s   

zGISEncoding.encodec                 C   s   t | d S r   )r   r   r   r   r   r   r     r'   zGISEncoding.lengthc                 C   s$   |t | krd| j S t | |S )NzCorrection feature (%s))r   r   r   rV   )r   r/   r   r   r   rV     s   
zGISEncoding.describe)FFN)
r   r   r   r   r   propertyr   r+   r   rV   r   r   r   r   r     s    

r   c                   @   sD   e Zd ZdddZdd Zdd Zdd	 Zd
d ZedddZ	dS )TadmEventMaxentFeatureEncodingFc                 C   s*   t || _t  | _t| || j|| d S r   )r   r   _label_mappingr   r   )r   r   r   r   r   r   r   r   r     s
   
z'TadmEventMaxentFeatureEncoding.__init__c                 C   s   g }|  D ]<\}}||f| jvrt| j| j||f< || jvr3t|ts.t| j| j|< n|| j|< || j||f | j| f q|S r   )r   r   r   r   r   rS   r   )r   r%   r,   r   featurevaluer   r   r   r+     s   


z%TadmEventMaxentFeatureEncoding.encodec                 C   r!   r   r   r   r   r   r   r     r   z%TadmEventMaxentFeatureEncoding.labelsc                 C   s2   | j D ]\}}| j ||f |kr||f  S qd S r   )r   )r   r_   r   r,   r   r   r   rV     s
   z'TadmEventMaxentFeatureEncoding.describec                 C   s
   t | jS r   )r   r   r   r   r   r   r     r   z%TadmEventMaxentFeatureEncoding.lengthr   Nc           	      K   s   t  }|sg }t|}|D ]\}}||vr|| q|D ]\}}|D ]}|D ]}||f|vr7t||||f< q'q#q| ||fi |S r   )r   ra   r   r   )	r   r   rr   r   r   r   r%   r,   r   r   r   r   r}     s"   
z$TadmEventMaxentFeatureEncoding.trainr   r   )
r   r   r   r   r+   r   rV   r   r   r}   r   r   r   r   r     s    
r   c                   @   r   )TypedMaxentFeatureEncodingaZ  
    A feature encoding that generates vectors containing integer,
    float and binary joint-features of the form:

    Binary (for string and boolean features):

    |  joint_feat(fs, l) = { 1 if (fs[fname] == fval) and (l == label)
    |                      {
    |                      { 0 otherwise

    Value (for integer and float features):

    |  joint_feat(fs, l) = { fval if     (fs[fname] == type(fval))
    |                      {         and (l == label)
    |                      {
    |                      { not encoded otherwise

    Where ``fname`` is the name of an input-feature, ``fval`` is a value
    for that input-feature, and ``label`` is a label.

    Typically, these features are constructed based on a training
    corpus, using the ``train()`` method.

    For string and boolean features [type(fval) not in (int, float)]
    this method will create one feature for each combination of
    ``fname``, ``fval``, and ``label`` that occurs at least once in the
    training corpus.

    For integer and float features [type(fval) in (int, float)] this
    method will create one feature for each combination of ``fname``
    and ``label`` that occurs at least once in the training corpus.

    For binary features the ``unseen_features`` parameter can be used
    to add "unseen-value features", which are used whenever an input
    feature has a value that was not encountered in the training
    corpus.  These features have the form:

    |  joint_feat(fs, l) = { 1 if is_unseen(fname, fs[fname])
    |                      {      and l == label
    |                      {
    |                      { 0 otherwise

    Where ``is_unseen(fname, fval)`` is true if the encoding does not
    contain any joint features that are true when ``fs[fname]==fval``.

    The ``alwayson_features`` parameter can be used to add "always-on
    features", which have the form:

    |  joint_feat(fs, l) = { 1 if (l == label)
    |                      {
    |                      { 0 otherwise

    These always-on features allow the maxent model to directly model
    the prior probabilities of each label.
    Fc                    r   )a  
        :param labels: A list of the "known labels" for this encoding.

        :param mapping: A dictionary mapping from ``(fname,fval,label)``
            tuples to corresponding joint-feature indexes.  These
            indexes must be the set of integers from 0...len(mapping).
            If ``mapping[fname,fval,label]=id``, then
            ``self.encode({..., fname:fval, ...``, label)[id]} is 1;
            otherwise, it is 0.

        :param unseen_features: If true, then include unseen value
           features in the generated joint-feature vectors.

        :param alwayson_features: If true, then include always-on
           features in the generated joint-feature vectors.
        r   Nc                    r   r   r   r   r   r   r   r   {  r   z7TypedMaxentFeatureEncoding.__init__.<locals>.<dictcomp>c                 S   r   r   r   r   r   r   r   r     r   z6TypedMaxentFeatureEncoding.__init__.<locals>.<setcomp>c                    r   r   r   r   r   r   r   r     r   r   r   r   r   r   r   T  r   z#TypedMaxentFeatureEncoding.__init__c                 C   s   g }|  D ]]\}}t|ttfr+|t||f| jv r*|| j|t||f |f q|||f| jv rA|| j|||f df q| jrc| jD ]}|||f| jv rS nqG|| jv rc|| j| df q| j	rv|| j	v rv|| j	| df |S r   )
r   r   rS   floattyper   r   r   r   r   r   r   r   r   r+     s&   

z!TypedMaxentFeatureEncoding.encodec                 C   r   r   r   r   r   r   r   rV     r   z#TypedMaxentFeatureEncoding.describec                 C   r!   r   r   r   r   r   r   r     r   z!TypedMaxentFeatureEncoding.labelsc                 C   r!   r   r   r   r   r   r   r     r   z!TypedMaxentFeatureEncoding.lengthr   Nc                 K   s   i }t  }tt}|D ]L\}}	|r|	|vrtd|	 ||	 | D ]2\}
}t|ttfv r4t|}||
|f  d7  < ||
|f |krV|
||	f|vrVt|||
||	f< q$q|du r^|}| ||fi |S )a)  
        Construct and return new feature encoding, based on a given
        training corpus ``train_toks``.  See the class description
        ``TypedMaxentFeatureEncoding`` for a description of the
        joint-features that will be included in this encoding.

        Note: recognized feature values types are (int, float), over
        types are interpreted as regular binary features.

        :type train_toks: list(tuple(dict, str))
        :param train_toks: Training data, represented as a list of
            pairs, the first member of which is a feature dictionary,
            and the second of which is a classification label.

        :type count_cutoff: int
        :param count_cutoff: A cutoff value that is used to discard
            rare joint-features.  If a joint-feature's value is 1
            fewer than ``count_cutoff`` times in the training corpus,
            then that joint-feature is not included in the generated
            encoding.

        :type labels: list
        :param labels: A list of labels that should be used by the
            classifier.  If not specified, then the set of labels
            attested in ``train_toks`` will be used.

        :param options: Extra parameters for the constructor, such as
            ``unseen_features`` and ``alwayson_features``.
        r   rH   N)	r   r   rS   r~   r   r   r   r   r   r   r   r   r   r}     s&   
z TypedMaxentFeatureEncoding.trainr   r   r   r   r   r   r   r     s    
81 r   ro   c                 K   s  | dd t|}|du rtj| |d}t|dstdd|j }t| |}tt	
|dkd }t	t|d	}	|D ]}
t	j|	|
< q=t||	}t	|}~|dkr\td
|d   |dkrkt  td td zU	 |dkr|jpxt|| }|jpt|| }|j}td|||f  t|| |}|D ]
}
||
  d7  < qt	|}~| }	|	|| | 7 }	||	 ||| rnqmW n ty   td Y n    |dkrt|| }t|| }td|dd|d |S )a  
    Train a new ``ConditionalExponentialClassifier``, using the given
    training samples, using the Generalized Iterative Scaling
    algorithm.  This ``ConditionalExponentialClassifier`` will encode
    the model that maximizes entropy from all the models that are
    empirically consistent with ``train_toks``.

    :see: ``train_maxent_classifier()`` for parameter descriptions.
    rp   d   Nr   r   zJThe GIS algorithm requires an encoding that defines C (e.g., GISEncoding).r(   r   d  ==> Training (%d iterations)r4   -      Iteration    Log Likelihood    Accuracy-      ---------------------------------------T     %9d    %14.5f    %9.3frH   *      Training stopped: keyboard interrupt         Final    14.5f    9.3f)
setdefaultr
   r   r}   r`   rw   r   calculate_empirical_fcountr   numpynonzerozerosr   NINF ConditionalExponentialClassifierlog2rP   llr   accr   itercalculate_estimated_fcountr   r    checkKeyboardInterrupt)r   ru   r   r   r   cutoffcheckerZCinvZempirical_fcount
unattestedr   r_   
classifierZlog_empirical_fcountr   r   iternumZestimated_fcountZlog_estimated_fcountr   r   r   rz     sj   








rz   c                 C   sH   t | d}| D ]\}}|||D ]\}}||  |7  < qq
|S Nr   )r   r   r   r+   )r   r   fcountr   r,   indexvalr   r   r   r   f  s   r   c           
      C   sn   t | d}|D ]*\}}| |}| D ]}||}|||D ]\}}	||  ||	 7  < q$qq
|S r   )r   r   r   r#   rO   rK   r+   )
r   r   r   r   r   r,   rL   rK   r_   r   r   r   r   r   p  s   

r   c              	   K   s  | dd t|}|du rtj| |d}t| |t|  }t| |}tt	||j
dd}t|t|df}	tt|dkd }
tt|d}|
D ]}tj||< qMt||}|dkrftd	|d   |d
krut  td td zC	 |d
kr|jpt|| }|jpt|| }|j}td|||f  t| ||
||||	|}| }||7 }|| ||| rnqwW n ty   td Y n    |d
krt|| }t|| }td|dd|d |S )a  
    Train a new ``ConditionalExponentialClassifier``, using the given
    training samples, using the Improved Iterative Scaling algorithm.
    This ``ConditionalExponentialClassifier`` will encode the model
    that maximizes entropy from all the models that are empirically
    consistent with ``train_toks``.

    :see: ``train_maxent_classifier()`` for parameter descriptions.
    rp   r   Nr   )r6   r   rH   r   r   r4   r   r   Tr   r   r   r   r   r   )r   r
   r   r}   r   r   calculate_nfmapr   arrayrN   __getitem__Zreshaper   r   r   r   r   rP   r   r   r   r   r   calculate_deltasr   r    r   r   )r   ru   r   r   r   r   Zempirical_ffreqnfmapnfarraynftransposer   r   r_   r   r   r   r   deltasr   r   r   ry     sh   




ry   c              
   C   sT   t  }| D ]\}}| D ]}|tdd |||D  qqdd t|D S )a  
    Construct a map that can be used to compress ``nf`` (which is
    typically sparse).

    *nf(feature_vector)* is the sum of the feature values for
    *feature_vector*.

    This represents the number of features that are active for a
    given labeled text.  This method finds all values of *nf(t)*
    that are attested for at least one token in the given list of
    training tokens; and constructs a dictionary mapping these
    attested values to a continuous range *0...N*.  For example,
    if the only values of *nf()* that were attested were 3, 5, and
    7, then ``_nfmap`` might return the dictionary ``{3:0, 5:1, 7:2}``.

    :return: A map that can be used to compress ``nf`` to a dense
        vector.
    :rtype: dict(int -> int)
    c                 s   r   r   r   r<   idr   r   r   r   r>     r   z"calculate_nfmap.<locals>.<genexpr>c                 S   s   i | ]\}}||qS r   r   )r<   rZ   nfr   r   r   r     r   z#calculate_nfmap.<locals>.<dictcomp>)r   r   r   r   r+   rT   )r   r   Znfsetr   _r,   r   r   r   r     s   "r   c              	   C   s^  d}d}	t | d}
t t|| fd}| D ]7\}}||}| D ])}|||}tdd |D }|D ]\}}||| |f  |	|| 7  < q:q'q|t|  }t
|	D ]P}t ||
}d| }|| }t j|| dd}t j|| dd}|D ]
}||  d	7  < q|
|| |  8 }
t t|| t t|
 }||k r|
  S q\|
S )
a
  
    Calculate the update values for the classifier weights for
    this iteration of IIS.  These update weights are the value of
    ``delta`` that solves the equation::

      ffreq_empirical[i]
             =
      SUM[fs,l] (classifier.prob_classify(fs).prob(l) *
                 feature_vector(fs,l)[i] *
                 exp(delta[i] * nf(feature_vector(fs,l))))

    Where:
        - *(fs,l)* is a (featureset, label) tuple from ``train_toks``
        - *feature_vector(fs,l)* = ``encoding.encode(fs,l)``
        - *nf(vector)* = ``sum([val for (id,val) in vector])``

    This method uses Newton's method to solve this equation for
    *delta[i]*.  In particular, it starts with a guess of
    ``delta[i]`` = 1; and iteratively updates ``delta`` with:

    | delta[i] -= (ffreq_empirical[i] - sum1[i])/(-sum2[i])

    until convergence, where *sum1* and *sum2* are defined as:

    |    sum1[i](delta) = SUM[fs,l] f[i](fs,l,delta)
    |    sum2[i](delta) = SUM[fs,l] (f[i](fs,l,delta).nf(feature_vector(fs,l)))
    |    f[i](fs,l,delta) = (classifier.prob_classify(fs).prob(l) .
    |                        feature_vector(fs,l)[i] .
    |                        exp(delta[i] . nf(feature_vector(fs,l))))

    Note that *sum1* and *sum2* depend on ``delta``; so they need
    to be re-computed each iteration.

    The variables ``nfmap``, ``nfarray``, and ``nftranspose`` are
    used to generate a dense encoding for *nf(ltext)*.  This
    allows ``_deltas`` to calculate *sum1* and *sum2* using
    matrices, which yields a significant performance improvement.

    :param train_toks: The set of training tokens.
    :type train_toks: list(tuple(dict, str))
    :param classifier: The current classifier.
    :type classifier: ClassifierI
    :param ffreq_empirical: An array containing the empirical
        frequency for each feature.  The *i*\ th element of this
        array is the empirical frequency for feature *i*.
    :type ffreq_empirical: sequence of float
    :param unattested: An array that is 1 for features that are
        not attested in the training data; and 0 for features that
        are attested.  In other words, ``unattested[i]==0`` iff
        ``ffreq_empirical[i]==0``.
    :type unattested: sequence of int
    :param nfmap: A map that can be used to compress ``nf`` to a dense
        vector.
    :type nfmap: dict(int -> int)
    :param nfarray: An array that can be used to uncompress ``nf``
        from a dense vector.
    :type nfarray: array(float)
    :param nftranspose: The transpose of ``nfarray``
    :type nftranspose: array(float)
    g-q=i,  r   c                 s   r   r   r   r  r   r   r   r>   T  r   z#calculate_deltas.<locals>.<genexpr>r4   r   )ZaxisrH   )r   Zonesr   r   r   r#   r   r+   r   rK   rb   outerrB   )r   r   r   Zffreq_empiricalr  r  r  r   ZNEWTON_CONVERGEZ
MAX_NEWTONr  Ar   r,   distr-   r
  r	  r   ZrangenumZnf_deltaZexp_nf_deltaZnf_exp_nf_deltaZsum1Zsum2r_   Zn_errorr   r   r   r    s8   I
$
 r  c              
   K   s"  d}d}d|v r|d }d|v r|d }|du r(| dd}tj| ||dd}n|dur0tdz-tjd	d
\}	}
t|
d}t| ||||d W d   n1 sRw   Y  t	|	 W n t
tfyr } ztd| |d}~ww g }|g d7 }|r|dg7 }|s|dg7 }|rd|d  }nd}|dd| dg7 }|dk r|dg7 }d|v r|dd|d  g7 }d|v r|ddt|d  g7 }t|dr|dg7 }|d|
g7 }t|}zt|
 W n t
y } ztd |
 d!|  W Y d}~nd}~ww t|| |}|ttj9 }t||S )"a  
    Train a new ``ConditionalExponentialClassifier``, using the given
    training samples, using the external ``megam`` library.  This
    ``ConditionalExponentialClassifier`` will encode the model that
    maximizes entropy from all the models that are empirically
    consistent with ``train_toks``.

    :see: ``train_maxent_classifier()`` for parameter descriptions.
    :see: ``nltk.classify.megam``
    Trs   rt   Nrr   r   )r   r   z$Specify encoding or labels, not bothznltk-prefixw)rs   rt   z,Error while creating megam training file: %s)z-nobiasz-repeat10z	-explicitz-fvalsr(   r4   z-lambdaz%.2fz-tunero   z-quietrp   z-maxir9   ll_deltaz-dppZcostz-multilabelZ
multiclasszWarning: unable to delete z: )getr   r}   r~   tempfilemkstempopenr   oscloseOSErrorrB   r`   r   removerP   r   r   r   r   er   )r   ru   r   r   rv   r   rs   rt   rr   fdtrainfile_name	trainfiler  r   Zinv_variancestdoutr   r   r   r   r{     sl   





 
r{   c                   @   s   e Zd Zedd ZdS )r|   c                 K   s  | dd}| dd}| dd }| dd }| dd}| d	d}| d
}	| d}
|s8tj|||d}tjddd\}}tjdd\}}t|d}t||| |  g }|dg |d|g |rt|dd|d  g |	r|dd|	 g |
r|ddt	|
 g |d|g |d|g |dk r|dg n|dg t
| t|}t|}W d    n1 sw   Y  t| t| |ttj9 }| ||S )Nr   Ztao_lmvmru   ro   r   r   rv   r   rr   rp   rq   r   znltk-tadm-events-z.gz)r  suffixznltk-tadm-weights-r  r  z-monitorz-methodz-l2z%.6fr4   z-max_itz%dz-fatolz
-events_inz-params_outz2>&1z-summary)r  r   r}   r  r  r   r	   r  extendrB   r   r  r   r  r  r   r   r  )r   r   r   r   ru   r   r   sigmarr   rp   r  Ztrainfile_fdr  Zweightfile_fdZweightfile_namer  r   Z
weightfiler   r   r   r   r}     sR   








zTadmMaxentClassifier.trainN)r   r   r   r   r}   r   r   r   r   r|     s    r|   c           	   	   C   s  dd l }ddlm} | }t|  d}|tt|j||}W d    n1 s-w   Y  t|  d}|	|}W d    n1 sIw   Y  t|  d}||}W d    n1 sew   Y  t|  d}|
|}W d    n1 sw   Y  ||||fS )Nr   )MaxentDecoder/weights.txt/mapping.tab/labels.txt/alwayson.tab)r   nltk.tabdatar$  r  r  ra   mapZfloat64Ztxt2listZtupkey2dictZ
tab2ivdict)	tab_dirr   r$  Zmdecr   wgtmpglabaonr   r   r   load_maxent_params  s    r0  /tmpc           
   	   C   s`  ddl m} ddlm} ddlm} | }||s|| td|  t| dd}	|	|	t
t|    W d    n1 sEw   Y  t| dd}	|	||  W d    n1 sfw   Y  t| d	d}	|	|	|  W d    n1 sw   Y  t| d
d}	|	||  W d    d S 1 sw   Y  d S )Nr   )mkdir)isdir)MaxentEncoderzSaving Maxent parameters in r%  r  r&  r'  r(  )r  r2  Zos.pathr3  r)  r4  rP   r  writeZlist2txtr*  reprtolistZtupdict2tabZ
ivdict2tab)
r,  r-  r.  r/  r+  r2  r3  r4  Zmencr   r   r   r   save_maxent_params2  s&   "r8  c                  C   sN   ddl m}  ddlm} | d}t|\}}}}tt|||d|}||dS )Nr   )find)ClassifierBasedPOSTaggerz.taggers/maxent_treebank_pos_tagger_tab/english)r   )r   )	nltk.datar9  Znltk.tag.sequentialr:  r0  r   r   )r9  r:  r+  r,  r-  r.  r/  Zmcr   r   r   maxent_pos_taggerI  s   
r<  c                  C   s   ddl m}  | tj}d S )Nr   )
names_demo)nltk.classify.utilr=  r   r}   )r=  r   r   r   r   demoX  s   r?  __main__)ro   NN)ro   NNr   )r1  )1r   r   ImportErrorr  r  collectionsr   Znltk.classify.apir   Znltk.classify.megamr   r   r   Znltk.classify.tadmr   r   r	   r>  r
   r   r   r;  r   Znltk.probabilityr   Z	nltk.utilr   __docformat__r   r   r   r   r   r   r   r   rz   r   r   ry   r   r  r{   r|   r0  r8  r<  r?  r   r   r   r   r   <module>   s`   -  I/ K=8 k
b

\ 	
\?

