o
    rZh1                     @   s   d Z ddlZddlZddlZddlZddlZddlZddlmZ ddl	m
Z
 ddlmZmZ ddlmZ dag dZddd	Zd
d ZG dd de
ZG dd dZedkriddlmZmZ dd ZeeeZdS dS )z;
Classifiers that make use of the external 'Weka' package.
    N)stdin)ClassifierI)config_javajava)DictionaryProbDist).z/usr/share/wekaz/usr/local/share/wekaz/usr/lib/wekaz/usr/local/lib/wekac                 C   s   t   | d ur	| atd u rPt}dtjv r|dtjd  |D ]0}tjtj|drOtj|dat	t}|rEt
dt d| d nt
dt  t	t qtd u rXtdd S )	NZWEKAHOMEr   zweka.jarz[Found Weka: z
 (version z)]z[Found Weka: %s]zUnable to find weka.jar!  Use config_weka() or set the WEKAHOME environment variable. For more information about Weka, please see https://www.cs.waikato.ac.nz/ml/weka/)r   _weka_classpath_weka_searchosenvironinsertpathexistsjoin_check_weka_versionprintLookupError)	classpathZ
searchpathr   version r   A/var/www/auris/lib/python3.10/site-packages/nltk/classify/weka.pyconfig_weka"   s*   
r   c              	   C   sp   zt | }W n ttfy       Y d S zz|dW W |  S  ty2   Y W |  d S w |  w )Nzweka/core/version.txt)zipfileZipFile
SystemExitKeyboardInterruptreadcloseKeyError)jarzfr   r   r   r   C   s   

r   c                   @   sb   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd ZdddddddZ	e
dg dfddZdS )WekaClassifierc                 C   s   || _ || _d S N)
_formatter_model)self	formattermodel_filenamer   r   r   __init__T   s   
zWekaClassifier.__init__c                 C   s   |  |g dS )N)-p0z-distribution_classify_manyr%   featuresetsr   r   r   prob_classify_manyX      z!WekaClassifier.prob_classify_manyc                 C   s   |  |ddgS )Nr)   r*   r+   r-   r   r   r   classify_many[   r0   zWekaClassifier.classify_manyc           	      C   s   t   t }zZtj|d}| j|| dd| jd|g| }t	|t
tjtjd\}}|r>|s>d|v r8tdtd| | |tjd	W t|D ]}ttj|| qOt| S t|D ]}ttj|| qgt| w )
Nz	test.arff!weka.classifiers.bayes.NaiveBayesz-lz-T)r   stdoutstderrzIllegal options: -distributionzOThe installed version of weka does not support probability distribution output.z"Weka failed to generate output:
%s
)r   tempfilemkdtempr
   r   r   r#   writer$   r   r   
subprocessPIPE
ValueErrorparse_weka_outputdecoder   encodingsplitlistdirremovermdir)	r%   r.   optionstemp_dirZtest_filenamecmdr3   r4   fr   r   r   r,   ^   s@   
zWekaClassifier._classify_manyc                 C   s2   dd t d|D }tt| j |}t|S )Nc                 S   s   g | ]
}|  rt|qS r   )stripfloat).0vr   r   r   
<listcomp>   s    z:WekaClassifier.parse_weka_distribution.<locals>.<listcomp>z[*,]+)rer?   dictzipr#   labelsr   )r%   sZprobsr   r   r   parse_weka_distribution   s   z&WekaClassifier.parse_weka_distributionc                    s   t |D ]\}}| dr||d  } nq|d  g dkr-dd |dd  D S |d  g dkrD fdd|dd  D S td	|d rSd
d |D S |d d D ]}t| qYtd|d  )Ninst#r   )rR   actual	predictederrorZ
predictionc                 S   s*   g | ]}|  r| d  dd qS )   :   rG   r?   rI   liner   r   r   rK      s   * z4WekaClassifier.parse_weka_output.<locals>.<listcomp>rX   )rR   rS   rT   rU   distributionc                    s&   g | ]}|  r | d  qS ))rG   rQ   r?   rZ   r%   r   r   rK      s    z^0 \w+ [01]\.[0-9]* \?\s*$c                 S   s    g | ]}|  r| d  qS )rX   rY   rZ   r   r   r   rK      s     
   zRUnhandled output format -- your version of weka may not be supported.
  Header: %s)	enumeraterG   
startswithr?   rL   matchr   r;   )r%   linesir[   r   r^   r   r<      s(   


z WekaClassifier.parse_weka_outputr2   zweka.classifiers.trees.J48z#weka.classifiers.functions.Logisticzweka.classifiers.functions.SMOzweka.classifiers.lazy.KStarzweka.classifiers.rules.JRip)
naivebayesC4.5Zlog_regressionZsvmZkstarZripperre   Tc                 C   s  t   t|}t }zatj|d}||| || j	v r%| j	| }	n|| j	
 v r/|}	ntd| |	d|d|g}
|
t|7 }
|rHtj}nd }t|
t|d t||W t|D ]}ttj|| q[t| S t|D ]}ttj|| qst| w )Nz
train.arffzUnknown classifier %sz-dz-t)r   r3   )r   ARFF_Formatter
from_trainr6   r7   r
   r   r   r8   _CLASSIFIER_CLASSvaluesr;   listr9   r:   r   r   r!   r@   rA   rB   )clsr'   r.   
classifierrC   quietr&   rD   Ztrain_filenameZ	javaclassrE   r3   rF   r   r   r   train   s0   



zWekaClassifier.trainN)__name__
__module____qualname__r(   r/   r1   r,   rQ   r<   ri   classmethodro   r   r   r   r   r!   S   s&    ,1	r!   c                   @   sV   e Zd ZdZdd Zdd Zdd Zdd	 Zed
d Z	dd Z
dddZdd ZdS )rg   z
    Converts featuresets and labeled featuresets to ARFF-formatted
    strings, appropriate for input into Weka.

    Features and classes can be specified manually in the constructor, or may
    be determined from data using ``from_train``.
    c                 C   s   || _ || _dS )a)  
        :param labels: A list of all class labels that can be generated.
        :param features: A list of feature specifications, where
            each feature specification is a tuple (fname, ftype);
            and ftype is an ARFF type string such as NUMERIC or
            STRING.
        N)_labels	_features)r%   rO   featuresr   r   r   r(      s   
zARFF_Formatter.__init__c                 C   s   |   | | S )zBReturns a string representation of ARFF output for the given data.)header_sectiondata_section)r%   tokensr   r   r   format
  s   zARFF_Formatter.formatc                 C   s
   t | jS )zReturns the list of classes.)rk   rt   r^   r   r   r   rO     s   
zARFF_Formatter.labelsc                 C   s0   t |ds
t|d}|| | |  dS )z.Writes ARFF data to a file for the given data.r8   wN)hasattropenr8   rz   r   )r%   outfilery   r   r   r   r8     s   

zARFF_Formatter.writec                 C   s   dd | D }i }| D ]K\}}|  D ]B\}}tt|tr!d}n"tt|tttfr.d}ntt|tr8d}n|du r=qtd| ||||krQtd| |||< qqt	|  }t
||S )	z
        Constructs an ARFF_Formatter instance with class labels and feature
        types determined from the given data. Handles boolean, numeric and
        string (note: not nominal) types.
        c                 S   s   h | ]\}}|qS r   r   )rI   toklabelr   r   r   	<setcomp>!      z,ARFF_Formatter.from_train.<locals>.<setcomp>z{True, False}ZNUMERICSTRINGNzUnsupported value type %rzInconsistent type for %s)items
issubclasstypeboolintrH   strr;   getsortedrg   )ry   rO   rv   r   r   fnamefvalftyper   r   r   rh     s&   

zARFF_Formatter.from_trainc                 C   sT   ddt    }|d7 }| jD ]\}}|d||f 7 }q|ddd| jf 7 }|S )z#Returns an ARFF header as a string.z3% Weka ARFF file
% Generated automatically by NLTK
z%% %s

z@RELATION rel

z@ATTRIBUTE %-30r %s
z@ATTRIBUTE %-30r {%s}
z-label-,)timectimeru   r   rt   )r%   rP   r   r   r   r   r   rw   9  s   
zARFF_Formatter.header_sectionNc              	   C   s   |du r|ot |d ttf}|sdd |D }d}|D ]!\}}| jD ]\}}|d| || 7 }q#|d| | 7 }q|S )a  
        Returns the ARFF data section for the given data.

        :param tokens: a list of featuresets (dicts) or labelled featuresets
            which are tuples (featureset, label).
        :param labeled: Indicates whether the given tokens are labeled
            or not.  If None, then the tokens will be assumed to be
            labeled if the first token's value is a tuple or list.
        Nr   c                 S   s   g | ]}|d fqS r"   r   )rI   r   r   r   r   rK   ]  r   z/ARFF_Formatter.data_section.<locals>.<listcomp>z
@DATA
z%s,z%s
)
isinstancetuplerk   ru   _fmt_arff_valr   )r%   ry   ZlabeledrP   r   r   r   r   r   r   r   rx   N  s   zARFF_Formatter.data_sectionc                 C   s<   |d u rdS t |ttfrd| S t |trd| S d| S )N?z%sz%r)r   r   r   rH   )r%   r   r   r   r   r   h  s   
zARFF_Formatter._fmt_arff_valr"   )rp   rq   rr   __doc__r(   rz   rO   r8   staticmethodrh   rw   rx   r   r   r   r   r   rg      s    

rg   __main__)binary_names_demo_features
names_democ                 C   s   t d| dS )Nz/tmp/name.modelrf   )r!   ro   )r.   r   r   r   make_classifierv  s   r   r"   )r   r
   rL   r9   r6   r   r   sysr   Znltk.classify.apir   Znltk.internalsr   r   Znltk.probabilityr   r   r	   r   r   r!   rg   rp   Znltk.classify.utilr   r   r   rm   r   r   r   r   <module>   s.   
	! $}