
    /h1                         S r SSKrSSKrSSKrSSKrSSKrSSKrSSKJr  SSK	J
r
  SSKJrJr  SSKJr  Sq/ SQrSS jrS	 r " S
 S\
5      r " S S5      r\S:X  a  SSKJrJr  S r\" \\5      rgg)z;
Classifiers that make use of the external 'Weka' package.
    N)stdin)ClassifierI)config_javajava)DictionaryProbDist).z/usr/share/wekaz/usr/local/share/wekaz/usr/lib/wekaz/usr/local/lib/wekac                 D   [        5         U b  U q[        c  [        nS[        R                  ;   a#  UR                  S[        R                  S   5        U H  n[        R                  R                  [        R                  R                  US5      5      (       d  MG  [        R                  R                  US5      q[        [        5      nU(       a  [        S[         SU S35        O[        S[        -  5        [        [        5        M     [        c  [        S5      eg )	NWEKAHOMEr   zweka.jarz[Found Weka: z
 (version z)]z[Found Weka: %s]zUnable to find weka.jar!  Use config_weka() or set the WEKAHOME environment variable. For more information about Weka, please see https://www.cs.waikato.ac.nz/ml/weka/)r   _weka_classpath_weka_searchosenvironinsertpathexistsjoin_check_weka_versionprintLookupError)	classpath
searchpathr   versions       J/var/www/auris/envauris/lib/python3.13/site-packages/nltk/classify/weka.pyconfig_wekar   "   s     M#!
#aJ!78Dww~~bggll4<=="$'',,tZ"@-o>M/):*WIRPQ,>?#O4  4
 	
     c                     [         R                  " U 5      n  UR	                  S5      UR                  5         $ ! [        [        4 a    e    g = f! [         a     UR                  5         g f = f! UR                  5         f = f)Nzweka/core/version.txt)zipfileZipFile
SystemExitKeyboardInterruptreadcloseKeyError)jarzfs     r   r   r   C   s|    __S!
	7723 	
 )*   	

	 	
s,   ; A A
A/A2 .A//A2 2Bc                   d    \ rS rSrS rS rS rS rS rS r	SS	S
SSSS.r
\S/ S4S j5       rSrg)WekaClassifierS   c                     Xl         X l        g N
_formatter_model)self	formattermodel_filenames      r   __init__WekaClassifier.__init__T   s    #$r   c                 *    U R                  U/ SQ5      $ )N)-p0z-distribution_classify_manyr.   featuresetss     r   prob_classify_many!WekaClassifier.prob_classify_manyX   s    "";0LMMr   c                 *    U R                  USS/5      $ )Nr4   r5   r6   r8   s     r   classify_manyWekaClassifier.classify_many[   s    "";s<<r   c           	         [        5         [        R                  " 5       n [        R                  R                  US5      nU R                  R                  XA5        SSU R                  SU/U-   n[        U[        [        R                  [        R                  S9u  pgU(       a&  U(       d  SU;   a  [        S5      e[        SU-  5      eU R                  UR                  [         R"                  5      R%                  S	5      5      [        R&                  " U5       H6  n[        R(                  " [        R                  R                  X85      5        M8     [        R*                  " U5        $ ! [        R&                  " U5       H6  n[        R(                  " [        R                  R                  X85      5        M8     [        R*                  " U5        f = f)
Nz	test.arff!weka.classifiers.bayes.NaiveBayesz-lz-T)r   stdoutstderrzIllegal options: -distributionzOThe installed version of weka does not support probability distribution output.z"Weka failed to generate output:
%s
)r   tempfilemkdtempr   r   r   r,   writer-   r   r   
subprocessPIPE
ValueErrorparse_weka_outputdecoder   encodingsplitlistdirremovermdir)	r.   r9   optionstemp_dirtest_filenamecmdrA   rB   fs	            r   r7   WekaClassifier._classify_many^   s[   ##%%	GGLL;?MOO!!-= 4 C  $)!!	 V f3v=$"  %%JV%STT ))&--*G*M*Md*ST ZZ)		"'',,x34 *HHX ZZ)		"'',,x34 *HHXs   C&E. .A(Gc                    [         R                  " SU5       Vs/ s H$  o"R                  5       (       d  M  [        U5      PM&     nn[	        [        U R                  R                  5       U5      5      n[        U5      $ s  snf )Nz[*,]+)	rerM   stripfloatdictzipr,   labelsr   )r.   svprobss       r   parse_weka_distribution&WekaClassifier.parse_weka_distribution   s^    #%88GQ#7E#7a779q#7ES//159:!%(( Fs
   A?A?c                 :   [        U5       H/  u  p#UR                  5       R                  S5      (       d  M+  XS  n  O   US   R                  5       / SQ:X  aO  USS   Vs/ s H>  o3R                  5       (       d  M  UR                  5       S   R                  S5      S   PM@     sn$ US   R                  5       / SQ:X  aM  USS   Vs/ s H<  nUR                  5       (       d  M  U R	                  UR                  5       S   5      PM>     sn$ [
        R                  " S	US   5      (       a:  U Vs/ s H,  o3R                  5       (       d  M  UR                  5       S   PM.     sn$ US S
  H  n[        U5        M     [        SUS   -  5      es  snf s  snf s  snf )Ninst#r   )rd   actual	predictederror
prediction      :)rd   re   rf   rg   distributionz^0 \w+ [01]\.[0-9]* \?\s*$
   zRUnhandled output format -- your version of weka may not be supported.
  Header: %s)		enumeraterY   
startswithrM   ra   rX   matchr   rI   )r.   linesilines       r   rJ    WekaClassifier.parse_weka_output   sr    'GAzz|&&w//b	 (
 8>>VV>CABiXid::<1DJJLO))#.q1iXX1X^^ "
 
 "!"I%D::< ?,,TZZ\"-=>%  XX3U1X>>05FODJJLOFF cr
d #!&q* + Y Gs$   F;)FF$&F0FFr@   zweka.classifiers.trees.J48z#weka.classifiers.functions.Logisticzweka.classifiers.functions.SMOzweka.classifiers.lazy.KStarzweka.classifiers.rules.JRip)
naivebayesC4.5log_regressionsvmkstarripperrv   Tc           	         [        5         [        R                  U5      n[        R                  " 5       n [
        R                  R                  US5      nUR                  X5        X0R                  ;   a  U R                  U   n	O.X0R                  R                  5       ;   a  Un	O[        SU-  5      eU	SUSU/n
U
[        U5      -  n
U(       a  [        R                  nOS n[        U
[         US9  [#        Xa5      [
        R$                  " U5       H6  n[
        R&                  " [
        R                  R                  X|5      5        M8     [
        R(                  " U5        $ ! [
        R$                  " U5       H6  n[
        R&                  " [
        R                  R                  X|5      5        M8     [
        R(                  " U5        f = f)Nz
train.arffzUnknown classifier %sz-dz-t)r   rA   )r   ARFF_Formatter
from_trainrD   rE   r   r   r   rF   _CLASSIFIER_CLASSvaluesrI   listrG   rH   r   r   r'   rN   rO   rP   )clsr0   r9   
classifierrQ   quietr/   rR   train_filename	javaclassrT   rA   rU   s                r   trainWekaClassifier.train   sR    	 #--k:	##%	WW\\(LANOON822211*=	44;;==&	 !8:!EFF dND.IC4= C#? ")< ZZ)		"'',,x34 *HHX ZZ)		"'',,x34 *HHXs   CE# #A(Gr+   N)__name__
__module____qualname____firstlineno__r1   r:   r=   r7   ra   rJ   r   classmethodr   __static_attributes__ r   r   r'   r'   S   s^    %N=*X)
!b :,?/./ 
  * *r   r'   c                   V    \ rS rSrSrS rS rS rS r\	S 5       r
S rSS
 jrS rSrg	)r}      z
Converts featuresets and labeled featuresets to ARFF-formatted
strings, appropriate for input into Weka.

Features and classes can be specified manually in the constructor, or may
be determined from data using ``from_train``.
c                     Xl         X l        g)z
:param labels: A list of all class labels that can be generated.
:param features: A list of feature specifications, where
    each feature specification is a tuple (fname, ftype);
    and ftype is an ARFF type string such as NUMERIC or
    STRING.
N)_labels	_features)r.   r]   featuress      r   r1   ARFF_Formatter.__init__   s     !r   c                 F    U R                  5       U R                  U5      -   $ )zBReturns a string representation of ARFF output for the given data.)header_sectiondata_section)r.   tokenss     r   formatARFF_Formatter.format
  s!    ""$t'8'8'@@@r   c                 ,    [        U R                  5      $ )zReturns the list of classes.)r   r   )r.   s    r   r]   ARFF_Formatter.labels  s    DLL!!r   c                     [        US5      (       d  [        US5      nUR                  U R                  U5      5        UR	                  5         g)z.Writes ARFF data to a file for the given data.rF   wN)hasattropenrF   r   r"   )r.   outfiler   s      r   rF   ARFF_Formatter.write  s9    w((7C(Gdkk&)*r   c                    U  VVs1 s H  u  pUiM	     nnn0 nU  H  u  pUR                  5        H  u  pV[        [        U5      [        5      (       a  SnO`[        [        U5      [        [
        [        45      (       a  SnO4[        [        U5      [        5      (       a  SnOUc  Mx  [        SW-  5      eUR                  XW5      U:w  a  [        SU-  5      eXtU'   M     M     [        UR                  5       5      n[        X45      $ s  snnf )z
Constructs an ARFF_Formatter instance with class labels and feature
types determined from the given data. Handles boolean, numeric and
string (note: not nominal) types.
z{True, False}NUMERICSTRINGzUnsupported value type %rzInconsistent type for %s)items
issubclasstypeboolintrZ   strrI   getsortedr}   )r   toklabelr]   r   fnamefvalftypes           r   r~   ARFF_Formatter.from_train  s     -33FLS%F3  JC"yy{d4j$//+ET
S%,>??%ET
C00$E\$%@5%HII<<-6$%?%%GHH"'  + !  (..*+f//- 4s   D	c                     SS[         R                  " 5       -  -   nUS-  nU R                   H  u  p#USU<S SU< S3-  nM     USS<S S	S
R                  U R                  5      < S3-  nU$ )z#Returns an ARFF header as a string.z3% Weka ARFF file
% Generated automatically by NLTK
z%% %s

z@RELATION rel

z@ATTRIBUTE 30 rC   z-label-z {,z}
)timectimer   r   r   )r.   r^   r   r   s       r   r   ARFF_Formatter.header_section9  sp    4DJJL() 	
 	
   !NNLEE599A + 	
)SXXdll5KLLr   Nc           	      L   Uc"  U=(       a    [        US   [        [        45      nU(       d  U Vs/ s H  o3S4PM     nnSnU HW  u  p5U R                   H+  u  pgUSU R	                  UR                  U5      5      -  -  nM-     USU R	                  U5      -  -  nMY     U$ s  snf )aY  
Returns the ARFF data section for the given data.

:param tokens: a list of featuresets (dicts) or labelled featuresets
    which are tuples (featureset, label).
:param labeled: Indicates whether the given tokens are labeled
    or not.  If None, then the tokens will be assumed to be
    labeled if the first token's value is a tuple or list.
Nr   z
@DATA
z%s,z%s
)
isinstancetupler   r   _fmt_arff_valr   )r.   r   labeledr   r^   r   r   r   s           r   r   ARFF_Formatter.data_sectionN  s     ?EF1It}!EG-34VcDkVF4  JC $UT//??? !/$,,U333A !
  5s   B!c                     Uc  g[        U[        [        45      (       a  SU-  $ [        U[        5      (       a  SU-  $ SU-  $ )N?z%sz%r)r   r   r   rZ   )r.   r   s     r   r   ARFF_Formatter._fmt_arff_valh  sF    <tSk**$;e$$$;$;r   )r   r   r*   )r   r   r   r   __doc__r1   r   r]   rF   staticmethodr~   r   r   r   r   r   r   r   r}   r}      s?    	"A" 0 0>*4r   r}   __main__)binary_names_demo_features
names_democ                 0    [         R                  SU S5      $ )Nz/tmp/name.modelrw   )r'   r   )r9   s    r   make_classifierr   v  s    ##$5{FKKr   r*   )r   r   rX   rG   rD   r   r   sysr   nltk.classify.apir   nltk.internalsr   r   nltk.probabilityr   r   r   r   r   r'   r}   r   nltk.classify.utilr   r   r   r   r   r   r   <module>r      s    
 	      ) , /
B `[ `Fz zz zIL O-GHJ r   