o
    rZh                     @   s   d Z ddlZddlZddlZddlZddlmZmZmZm	Z	m
Z
mZmZmZmZmZ ddlmZmZ ddlmZ ddlmZmZ ddlmZ ddlmZmZ dd	lmZ dd
lm Z  ddl!m"Z" G dd dZ#dd Z$e%dkrre$  dgZ&dS )zl
A graphical tool for exploring the regular expression based chunk
parser ``nltk.chunk.RegexpChunkParser``.
    N)
ButtonCanvasCheckbuttonFrameIntVarLabelMenu	ScrollbarTextTk)askopenfilenameasksaveasfilename)Font)
ChunkScoreRegexpChunkParser)RegexpChunkRule)	conll2000treebank_chunk)ShowText)Tree)in_idlec                   @   s  e Zd ZdZi dddddddd	d
dddddddddddddddddddddd d!d"d#i d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQZg dRZdSedTdUfdVedWdUfdXedYdZfd[ed\d]fd^ed\d]fd_ed`d`dafdbedcdddafdeedfdUfdgedhdUfdiedjdUfg
ZdkZ	 dlZ		 dmZ
	 dnZ	 doZ	 edpdqdrdrdkdsdtdudvZedldldrdrdwdkdsdtdudx	Zedydzd{d{dkdsdtdud|d}	Zed~dsdtdZedddZeddtdtddZed{d{dkdsdtdddZeddddZdZdrZedrdZZeddZZdZdd Z					dddZdd Zdd Zdd Zdd ZdZ dZ!dd Z"dZ#dd Z$dd Z%dd Z&dZ'dd Z(dd Z)dd Z*dd Z+dd Z,dd Z-dd Z.dd Z/dd Z0dddZ1dd Z2dd Z3dd Z4dd Z5dddÄZ6ddń Z7ddǄ Z8ddɄ Z9dZ:ddd̄Z;ddd΄Z<dddЄZ=dd҄ Z>dddԄZ?dddքZ@dd؄ ZAdS )RegexpChunkAppz
    A graphical tool for exploring the regular expression based chunk
    parser ``nltk.chunk.RegexpChunkParser``.

    See ``HELP`` for instructional text.
    CCzCoordinating conjunctionzPRP$zPossessive pronounZCDzCardinal numberZRBZAdverbZDTZ
DeterminerZRBRzAdverb, comparativeZEXzExistential thereZRBSzAdverb, superlativeZFWzForeign wordZRPZParticleZJJZ	AdjectiveZTOtoZJJRzAdjective, comparativeZUHZInterjectionZJJSzAdjective, superlativeZVBzVerb, base formZLSzList item markerZVBDzVerb, past tenseZMDZModalZNNSzNoun, pluralZNNzNoun, singular or massZVBNzVerb, past participleZVBZzVerb,3rd ps. sing. presentZNNPzProper noun, singularZNNPSzProper noun pluralZWDTzwh-determinerPDTZPredeterminerZWPz
wh-pronounZPOSzPossessive endingzWP$zPossessive wh-pronounZPRPzPersonal pronounZWRBz	wh-adverb(zopen parenthesis)zclose parenthesisz
open quotecommazclose quoteZperiodzpound sign (currency marker)zdollar sign (currency marker)zPreposition/subord. conjunctionz#Symbol (mathematical or scientific)zVerb, gerund/present participlezVerb, non-3rd ps. sing. presentcolon)z``,z''.#$INZSYMZVBGZVBP:))HelpZ20a-  Welcome to the regular expression chunk-parser grammar editor.  You can use this editor to develop and test chunk parser grammars based on NLTK's RegexpChunkParser class.

Use this box ('Help') to learn more about the editor; click on the tabs for help on specific topics:<indent>
Rules: grammar rule types
Regexps: regular expression syntax
Tags: part of speech tags
</indent>
Use the upper-left box ('Grammar') to edit your grammar.  Each line of your grammar specifies a single 'rule', which performs an action such as creating a chunk or merging two chunks.

The lower-left box ('Development Set') runs your grammar on the development set, and displays the results.  Your grammar's chunks are <highlight>highlighted</highlight>, and the correct (gold standard) chunks are <underline>underlined</underline>.  If they match, they are displayed in <green>green</green>; otherwise, they are displayed in <red>red</red>.  The box displays a single sentence from the development set at a time; use the scrollbar or the next/previous buttons view additional sentences.

The lower-right box ('Evaluation') tracks the performance of your grammar on the development set.  The 'precision' axis indicates how many of your grammar's chunks are correct; and the 'recall' axis indicates how many of the gold standard chunks your system generated.  Typically, you should try to design a grammar that scores high on both metrics.  The exact precision and recall of the current grammar, as well as their harmonic mean (the 'f-score'), are displayed in the status bar at the bottom of the window.)ZRules10a  <h1>{...regexp...}</h1><indent>
Chunk rule: creates new chunks from words matching regexp.</indent>

<h1>}...regexp...{</h1><indent>
Strip rule: removes words matching regexp from existing chunks.</indent>

<h1>...regexp1...}{...regexp2...</h1><indent>
Split rule: splits chunks that match regexp1 followed by regexp2 in two.</indent>

<h1>...regexp...{}...regexp...</h1><indent>
Merge rule: joins consecutive chunks that match regexp1 and regexp2</indent>
)ZRegexps10 60aZ  <h1>Pattern		Matches...</h1>
<hangindent>	<<var>T</var>>	a word with tag <var>T</var> (where <var>T</var> may be a regexp).
	<var>x</var>?	an optional <var>x</var>
	<var>x</var>+	a sequence of 1 or more <var>x</var>'s
	<var>x</var>*	a sequence of 0 or more <var>x</var>'s
	<var>x</var>|<var>y</var>	<var>x</var> or <var>y</var>
	.	matches any character
	(<var>x</var>)	Treats <var>x</var> as a group
	# <var>x...</var>	Treats <var>x...</var> (to the end of the line) as a comment
	\<var>C</var>	matches character <var>C</var> (useful when <var>C</var> is a special character like + or #)
</hangindent>
<h1>Examples:</h1>
<hangindent>	<regexp><NN></regexp>
		Matches <match>"cow/NN"</match>
		Matches <match>"green/NN"</match>
	<regexp><VB.*></regexp>
		Matches <match>"eating/VBG"</match>
		Matches <match>"ate/VBD"</match>
	<regexp><IN><DT><NN></regexp>
		Matches <match>"on/IN the/DT car/NN"</match>
	<regexp><RB>?<VBD></regexp>
		Matches <match>"ran/VBD"</match>
		Matches <match>"slowly/RB ate/VBD"</match>
\t<regexp><\#><CD> # This is a comment...</regexp>\n		Matches <match>"#/# 100/CD"</match>
</hangindent>)ZTagsr'   zB<h1>Part of Speech Tags:</h1>
<hangindent><<TAGSET>></hangindent>
redz#a00
foregroundgreenz#080	highlightz#ddd
background	underlineT)r/   Zh1indent   Zlmargin1lmargin2
hangindentr   <   varz#88fregexpz#ba7matchz#6a6      g?g{Gz?g{Gz?(      z#efeZgroove   word)widthheightr.   highlightbackgroundhighlightthicknessreliefborderwrapz#555)	r?   r@   r.   rA   r*   rB   rC   rD   rE   F   
   z#eef)   )	r?   r@   r.   rA   rB   rC   rD   rE   tabsz#9bb)r.   rC   rD   	helveticaifamilysizez#777   )r.   padxpadyrD   i,  i  )r.   rA   rB   rC   rD   r?   r@   )r.   ZactivebackgroundrA   z#aba   c                 C   sD   t dd|}t dd|}t dd|}| }t dd|}|S )	N((\\.|[^#])*)(#.*)?\1z + z\n\s+z\nz	([^\\])\$z\1\\$)resubstrip)selfgrammar rZ   G/var/www/auris/lib/python3.10/site-packages/nltk/app/chunkparser_app.pynormalize_grammar4  s   z RegexpChunkApp.normalize_grammarr   N NPc                 C   sz  || _ |du r
| j}|| _|du r*|dkrtd}n|dkr$t }ntd| d| _	 || _	 d| _		 d| _
	 || _	 || _	 d| _	 d| _	 g | _	 d| _	 d| _	 d| _	 d| _	 t|d| _	 t  }| _|d	 |d
 |d| j t|| _| jd | | |  | | !| | "| | j#$  |r| j#%d|d  | j#&dd | 'd | (  dS )a  
        :param devset_name: The name of the development set; used for
            display & for save files.  If either the name 'treebank'
            or the name 'conll2000' is used, and devset is None, then
            devset will be set automatically.
        :param devset: A list of chunked sentences
        :param grammar: The initial grammar to display.
        :param tagset: Dictionary from tags to string descriptions, used
            for the help page.  Defaults to ``self.TAGSET``.
        Nr   z	train.txtZtreebankzUnknown development set %sr   chunk_labelz+50+50zRegexp Chunk Parser Appz<Control-q>d   end
insert1.0))_chunk_labelTAGSETtagsetr   Zchunked_sentsr   
ValueErrorchunkerrY   normalized_grammargrammar_changeddevsetdevset_namedevset_index_last_keypress_history_history_index_eval_grammar_eval_normalized_grammar_eval_indexr   _eval_scorer   topZgeometrytitlebinddestroyr   _devset_sizeset_init_fonts_init_widgets_init_bindings_init_menubar
grammarboxZfocusre   mark_setshow_devsetupdate)rX   ro   rn   rY   ra   ri   rx   rZ   rZ   r[   __init__?  sl   









zRegexpChunkApp.__init__c                    s   | d j | d j | d j | d j | d fdd | d fd	d  j d j  j d j  j d j  j d
 j d S )Nz<Control-n>z<Control-p>z<Control-t>z
<KeyPress>z<Control-s>c                          S N)save_grammarerX   rZ   r[   <lambda>      z/RegexpChunkApp._init_bindings.<locals>.<lambda>z<Control-o>c                    r   r   )load_grammarr   r   rZ   r[   r     r   z<Configure>)rz   _devset_next_devset_prevtoggle_show_tracer   r   evalbox
_eval_plotrX   rx   rZ   r   r[   r     s   zRegexpChunkApp._init_bindingsc                 C   sR   t || _| jd td| j  d| _tdt| j d d  d| _d S )Nr1   rJ   rK      )r   _sizer}   r   get_fontint
_smallfontr   rZ   rZ   r[   r~     s   
zRegexpChunkApp._init_fontsc                 C   s  t |}t |dd}|jdd| jd |jddd| jd |jddd	| jd |jd
d| jd |jdd| jdd |jdd|d t |dd}|jd| j	dd| j
d |jd| j	dd| j
d |jd| j	dd| j
d |jd| j	dd| j
d |jd| j	dd| j
d |jdd|d t |dd}|jd| jd| jd  |jd!| jd"| jd  |jd#| jd$| jd  |jd%| jd&| jd  |jd'd|d t |dd}|jd(d| jd |jd)d|d |j|d* d S )+Nr   )ZtearoffzReset Application)labelr/   commandzSave Current GrammarzCtrl-s)r   r/   acceleratorr   zLoad GrammarzCtrl-ozSave Grammar History   ZExitr9   zCtrl-q)r   r/   r   r   File)r   r/   menuZTinyrG   )r   variabler/   valuer   ZSmall   ZMediumr1   ZLarge   ZHuge"   ZViewz50 sentences2   )r   r   r   r   z100 sentencesrb   z200 sentences   z500 sentencesi  zDevelopment-SetZAboutr%   )r   )r   Zadd_commandresetr   r   save_historyr{   Zadd_cascadeZadd_radiobuttonr   resizer|   set_devset_sizeaboutconfig)rX   parentZmenubarZfilemenuZviewmenuZ
devsetmenuZhelpmenurZ   rZ   r[   r     s   
zRegexpChunkApp._init_menubarc                 G   s   | j r	|   dS |   dS )Nbreak)_showing_tracer   
show_tracerX   r   rZ   rZ   r[   r   #  s
   z RegexpChunkApp.toggle_show_trace   Fc                 O   s\  | d| j }| d| j }| jd | jjd|d d dddd	}| j|d d
 |d }}| jj||| d  |d dddd}d| j|d d }}	| jd }
| j| jj	dd|d d|
|
d | j| jj	d|	d dd|
|
d | j
  rt| jdkrd }}d }}tdtt| j| jd D ] }| j|  \}}}}t||}t||}t||}t||}qt|d d}t|d d}t|d d}t|d d}nd }}d }}tdD ]T}||| |d | ||    }|	|	| |d | ||    }||  k r|k r&n n| jj||||	dd ||  k r2|	k r?n q| jj||||dd q| j||||	 | j||	||	 | jj|d |	dddd|  d	 | jj|d |dddd|  d	 | jj||	d dddd|  d	 | jj||	d dddd|  d	 d  }}t| jD ]\}\}}}}||| || ||    }|	|	| || ||    }|| jkr| jj|d |d |d |d d d!d d"|d  d#|d   d$|d   | jd%< n| j| jj|d |d |d |d d&d'd |d ur%| j  r%| j| jj||||d'd ||}}qd S )(Nr?   r@   allrG   r=   leftwZ	Precision)justifyanchortextr   sZRecallcenter)r   r   r   r9   r.   r   i  )filloutlineg{Gz?   g      $@z#888)r   rN   rightsez%d%%rb   nenwz#0f0z#000zPrecision: %.2f%%	zRecall: %.2f%%	zF-score: %.2f%%r   #afaz#8c8)r   r   Zwinfo_widthZwinfo_heightdeleteZcreate_textZbbox_EVALBOX_PARAMSlowerZcreate_rectangle
_autoscalelenrr   rangemin_SCALE_NmaxZcreate_line	enumeraters   Zcreate_ovalstatus_eval_lines)rX   r   r   r?   r@   tagr   r   rx   ZbotbgZmax_precisionZ
max_recallZmin_precisionZ
min_recallirY   	precisionrecallZfmeasurexyZprev_xZprev_y_fscorerZ   rZ   r[   r   -  s   
 






	



zRegexpChunkApp._eval_plotc           	      C   s"  | j d u rd S | jd u rd| _d S t }t | j | jk r5| j| jkr5d| _| j t	| j
d | jS | j| jkr~| jD ]-\}}}}| j| |krk| j||||f t| jd | _|   d| _d | _ d S q>d| _t| jd| _| j| _| j| _| j dkrd| _d S | j| jt| j| j | j  D ]}| | }| j || q|  j| j7  _| j| j kr| j| j| j! | j" | j# f t| jd | _|   d| _d | _d S d| j | j  }d	| | j$d
< d| _| %t |  | j t	| j
d | j d S )NFTi  r9   r   r`   r]   rb   z$Evaluating on Development Set (%d%%)r   )&rx   rk   _eval_demon_runningtimerq   _EVAL_DELAYrl   ru   afterr   
_EVAL_FREQ_eval_demonrr   r\   appendr   rs   r   rv   r   rg   rw   rY   rt   rW   rn   r   _EVAL_CHUNKr|   r   _chunkparseleavesZscorer   r   Z	f_measurer   _adaptively_modify_eval_chunk)	rX   t0gprfZgoldguessprogressrZ   rZ   r[   r     sh   


zRegexpChunkApp._eval_demonc                 C   s   || j kr#| jdkr#t| jd tt| j| j |  | jd | _dS || jk rAt| jd tt| j| j|  | jd | _dS dS )z
        Modify _EVAL_CHUNK to try to keep the amount of time that the
        eval demon takes between _EVAL_DEMON_MIN and _EVAL_DEMON_MAX.

        :param t: The amount of time that the eval demon took.
        r   r9   rG   N)_EVAL_DEMON_MAXr   r   r   r   _EVAL_DEMON_MIN)rX   trZ   rZ   r[   r     s"   


z,RegexpChunkApp._adaptively_modify_eval_chunkc                    s  t |fi  j}|jddd |jddd |jddd |jddd t|fd ji j _t| jd	d
 jd d _	 j	j
dddd  jj
dddd t| jjd}|j
dddd  jj|jd  jd }t ||d}|j
dddd t|fd jd jjdd t|fd jd jjdd t|fd ji j _ jj
dddd i  _ jd }t ||d}|j
dddd t jD ]<\}\}}	}
t|| jd}|j
|d ddd |d|f fdd	 | j|< t |d j|dj
|d d dd  qЈ j jd d  j jd!  jjd"d#d$  jD ]\}} jjd%| fi | q'  jd d  t| jjd} jj|jd |j
dddd t | jd d}t|fd ji j  _! j!jd#d&d' t| jd(d) j d d* _" j"j
dddd |j
dddd t| j#d _$ j$j
dddd t| j!j%d+d, _& j&j j!d-<  j&jd.d/d0  jd }t ||d}|j
dd1dd t|fd2 j'd jjdd t|fd3 j(d jjdd t|fd4 j)d5d6 j _* j*jd)d t|fd7 j+d j _, j,jd)d t-|fi  j. _/t| jd8d) j.d d*}|j
dddd  j/j
ddddd9  jd }t ||d}|j
dd1dd t0 j1 _2 j2d: t3|f j2 j4d;d< jjdd t0 j1 _5 j5d: t3|f j5 j4d=d< jjdd t|fd>d?i jjd)d t|fd ji j6 _7 j7j
dd@dAddddB d5 jdC< d5 j!dC<  jd }t |dDd|dj
ddd  t |ddD|dj
ddd  t |dEd|dj
ddFd  |jd&d#dG  j!jdHdIdJdK  j!jdLdJdMdN  j!jdOdPd  j!jdQdRdSdT  j!jdUdVdSdW  j!jdXdMdY  jjdXdZd  jjd[d\dY  jjd]d^dY  jjd_d`dY  jjdaddbdc d S )dNr      )weightrN   r=   r9   r   fontGrammar:blackr.   )r   r   Zhighlightcolorr.   ZSW)columnrowstickyZNEWS)r   ZNWS)Zyscrollcommandr-   ZEWzPrev Grammar)r   r   r   )sidezNext Grammar)r   r   Sz<ButtonPress>c                    s
     |S r   )	show_help)r   tabr   rZ   r[   r   N  s   
 z.RegexpChunkApp._init_widgets.<locals>.<lambda>)r@   r?   r.   )r   r   )r   elideT)r   tag-%sZboth)expandr   zDevelopment Set:r   )r   r   r   r.   Zhoriz)r   ZorientZxscrollcommandbottomr   )r   r      zPrev Example (Ctrl-p)zNext Example (Ctrl-n)zShow exampledisabled)r   r   statez
Show tracezEvaluation:)r   r   r   
columnspanFZZoom)r   r   r   Linesr   ZHistory	   ZNEW)r   r   r   rO   rP   r  r  rG   rQ      )r   r   true-posr   True)r.   r/   	false-negz#800)r/   r*   	false-posz#faatracez#666none)r*   rE   
wrapindentrH   )r3   rE   errorr)   z#feccommentz#840anglez#00fbracez#0a0r4   r;   r2   )8r   _FRAME_PARAMSZgrid_columnconfigureZgrid_rowconfigurer
   r   _GRAMMARBOX_PARAMSr   r   grammarlabelgridr	   Zyviewr   r}   r   _history_prev_BUTTON_PARAMSpack_history_nextr   _HELPBOX_PARAMShelpboxhelptabsr   HELPrz   _HELPTAB_SPACER	configureZ
tag_configHELP_AUTOTAGr   _DEVSETBOX_PARAMS	devsetboxdevsetlabel_devset_scrolldevset_scrollZxviewdevset_xscrollr   r   r   devset_buttonr   trace_buttonr   r   r   r   rx   r   r   r   r   _STATUS_PARAMSr   )rX   rx   Zframe0Zgrammar_scrollbarr   Zframe3Zhelptab_framer   r   tabstopsr   r   r   paramsZhelp_scrollbarZframe4Zframe1Zframe2rZ   r   r[   r     sr  









zRegexpChunkApp._init_widgetsc                    sX  d| _ d| jd< d| jd< d| jd< | jdd d| jd | j f | jd	< | j	d u r>| j
dd
 | jddd d S | j| j }| j	 }d}dg t| D ]\}\}}|d| 7 } t| qT fddtt|d D | _dd tt|d D | _tt|d D ]}|dkr| j
dd | jddd n| j
dd||d    | jddd | j
d|d  | jddd t|d | }	| | }
| |}| |
}||D ]	}| ||d q|| D ]	}| ||d q|| D ]
}| ||d qq| j
dd | jddd | jd| jjdd d S )NTr   r  normalrf   rc   Development Set (%d/%d)r9   r   z#Trace: waiting for a valid grammar.r  	z%s c                    s,   i | ]}t t D ]	}||f | q
qS rZ   )r   r   ).0r   jcharnumrZ   r[   
<dictcomp>  s    
z-RegexpChunkApp.show_trace.<locals>.<dictcomp>c                 S   s   i | ]	}||d  d  qS )r=   rZ   )r.  r   rZ   rZ   r[   r2    s    r   zStart:
r
  zend -2c linestartzend -2cz
Apply %s:
rd   r  r  r  r	  z
Finished.
rb   g333333?)r   r'  r&  r!  r   rp   r|   r   r"  rk   re   tag_addrn   rulesr   r   r   r   r   r1  linenumr   r   _chunksintersection_color_chunkrx   r   r%  r}   )rX   r   	gold_treer4  Ztagseqwordnumr>   posr   rk   	test_treegold_chunkstest_chunkschunkrZ   r0  r[   r     sZ   








zRegexpChunkApp.show_tracec           
      C   sl  d| j d< | j dd | jD ]\}}}||kr|dddd tt| j d	d
 dD }| j	| j
di | j | j j
|d | j d|d  d}| jD ]N\}}d| d| d}t||D ]:}	| j d||	d ||	d  | j d| ||	d ||	d  | j d||	d ||	d  qeqRq| j	| j
di | j qd| j d< d S )Nr+  r  rf   rc   z
<<TAGSET>>rd   c                 s       | ]}d | V  qdS )z	%s	%sNrZ   )r.  itemrZ   rZ   r[   	<genexpr>  s
    
z+RegexpChunkApp.show_help.<locals>.<genexpr>c                 S   s(   t d| d rd| d fpd| d fS )Nz\w+r   r9   )rU   r8   )Zt_wrZ   rZ   r[   r   "  s    
z*RegexpChunkApp.show_help.<locals>.<lambda>)key)rI   z



















z1.0 + %d charsz(?s)(<z
>)(.*?)(</z>)r   r9   r   r=   rN   r   rZ   )r  r   r  replacejoinsortedlistri   itemsr  r   _HELPTAB_FG_PARAMSre   r  rU   finditerr3  startrc   _HELPTAB_BG_PARAMS)
rX   r   namer)  r   Cr   r*  patternmrZ   rZ   r[   r     s:   

$&	zRegexpChunkApp.show_helpc                 G   s   |  | jd  dS Nr9   r   _view_historyrs   r   rZ   rZ   r[   r  9     zRegexpChunkApp._history_prevc                 G   s   |  | jd  dS rQ  rR  r   rZ   rZ   r[   r  =  rT  zRegexpChunkApp._history_nextc                 C   s.  t dtt| jd |}| jsd S || jkrd S d| jd< | jdd | jd| j| d  | jdd || _| 	| j| d  | 
| j| d | _| jr_dd	 | jd
D }ng }t|| _|   |   | jru|   | jt| jd k rd| jd t| j| jd< d S d| jd< d S )Nr   r9   r+  r  rf   rc   re   c                 S      g | ]}t |qS rZ   r   
fromstringr.  linerZ   rZ   r[   
<listcomp>T      z0RegexpChunkApp._view_history.<locals>.<listcomp>rd   zGrammar {}/{}:r   r   )r   r   r   rr   rs   r   r   re   r   _syntax_highlight_grammarr\   rl   splitr   rk   r   _highlight_devsetr   r   formatr  )rX   indexr4  rZ   rZ   r[   rS  A  s8   



zRegexpChunkApp._view_historyc                 G      |  ddd dS )Nscrollr9   pager   r#  r   rZ   rZ   r[   r   j     zRegexpChunkApp._devset_nextc                 G   ra  )Nrb  r_   rc  r   rd  r   rZ   rZ   r[   r   n  re  zRegexpChunkApp._devset_prevc                 G   s"   | j d u rd S | j   d | _ d S r   )rx   r{   r   rZ   rZ   r[   r{   r  s   


zRegexpChunkApp.destroyc                 G   s   d}| j }|dkr|d dr| | jt|d   n9|dkr7|d dr7| | j|t|d    n|dkrL| tt|d | j   n
J d| d| |r^|   d S d S )	Nr9   rb  unitr   rc  Zmovetozbad scroll command rT   )	r   
startswithr   rp   r   floatr|   r   r   )rX   r   argsNZshowing_tracerZ   rZ   r[   r#  x  s   "zRegexpChunkApp._devset_scrollc                 C   s  |d u r| j }ttd|| j d }|| j kr| jsd S || _ d| _d| jd< d| jd< d| jd< d| jd< | j	d	d
 d| j d | j f | j
d< | j| j | j d  }i | _ddi| _t|D ]>\}}d}t| D ]$\}\}}t|| j||f< || d| d7 }t|| j||d f< qq| jd
|d d d  qe| jd ur|   d| jd< | j | j  }	| j d | j  }
| j|	|
 d S )Nr   r9   Fr+  r  r   r>   rE   rf   rc   r,  r   r]   /rT   r_   z

r=   )rp   r   r   r|   r   r   r'  r&  r!  r   r"  rn   r1  r5  r   r   r   re   rk   r^  r$  r}   )rX   r`  samplesentnumsentZlinestrr:  r>   r;  firstlastrZ   rZ   r[   r     s>   







zRegexpChunkApp.show_devsetc                 C   s\   t  }d}|D ]$}t|tr'| | jkr |||t| f |t|7 }q|d7 }q|S )Nr   r9   )r}   
isinstancer   r   rg   addr   )rX   treechunksr:  childrZ   rZ   r[   r6    s   

zRegexpChunkApp._chunksc                 C   sT  | j d u rd S | jddd | jddd | jddd | jddd t|dD ]y\}}| s7q.td|}d }|	d	rg|
d	}d
|d |
d	f }d
|d |d	f }| jd|| td|D ]9}|d ur{|
 |kr{ n,d
|d |
 f }d
|d | f }|	 dv r| jd|| qm| jd|| qmq.d S )Nr  rf   rc   r  r  r4   rd   z(\\.|[^#])*(#.*)?r=   z%d.%dr9   z[<>{}]z<>)rx   r   
tag_remover3  r   r]  rW   rU   r8   grouprK  rc   rJ  )rX   rY   linenorY  rP  comment_startr   r   rZ   rZ   r[   r\    s4   


z(RegexpChunkApp._syntax_highlight_grammarc                 C   s   | j d u rd S | jddd g | _t|dD ];\}}tdd|}| }|rTzt	
| W q tyS } z| jdd|d  d	|d   W Y d }~qd }~ww qd
| jd< d S )Nr  rf   rc   rd   rR   rS   z%s.0r9   z%s.0 lineendr]   r   )rx   r   rv  Z_grammarcheck_errsr   r]  rU   rV   rW   r   rW  rj   r3  r   )rX   rY   rx  rY  r   rZ   rZ   r[   _grammarcheck  s$   
zRegexpChunkApp._grammarcheckc              
   G   s  |rt   | _| jdd | _}| |}|| jkrd S || _| jt| j	d k r/d| j
d< | | z|rBdd |dD }ng }W n ty` } z| | d | _W Y d }~d S d }~ww t|| _| jd	dd t   | _| jr{|   n|   | js|   d S d S )
Nrf   rc   r9   r   r   c                 S   rU  rZ   rV  rX  rZ   rZ   r[   rZ    r[  z)RegexpChunkApp.update.<locals>.<listcomp>rd   r  )r   rq   r   r   rY   r\   rl   rs   r   rr   r  r\  r]  rj   rz  rk   r   rv  rm   r   r   r^  r   r   )rX   eventrY   rl   r4  r   rZ   rZ   r[   r     s@   








zRegexpChunkApp.updatec                 C   s   |d u r| j | j| jd  }| jddd | jddd | jddd t|D ]@\}}| | }| |}| |}||D ]	}| 	||d qE|| D ]	}| 	||d qS|| D ]	}| 	||d qaq+d S )Nr9   r  rf   rc   r  r	  )
rn   rp   r!  rv  r   r   r   r6  r7  r8  )rX   rl  rm  r9  r<  r=  r>  r?  rZ   rZ   r[   r^    s"   

z RegexpChunkApp._highlight_devsetc              
   C   sL   z| j |W S  ttfy% } z| jddd |W  Y d }~S d }~ww )Nr  rf   rc   )rk   parserj   
IndexErrorr   r3  )rX   wordsr   rZ   rZ   r[   r   5  s   zRegexpChunkApp._chunkparsec              	   C   sT   |\}}| j || j|  d| j||f  | j|  d| j||f d   d S )Nr    r9   )r!  r3  r5  r1  )rX   rm  r?  r   rK  rc   rZ   rZ   r[   r8  @  s    zRegexpChunkApp._color_chunkc                 C   sH   d | _ d | _d | _d| _g | _d| _| jdd | d | 	  d S )Nr   rf   rc   )
rk   rY   rl   rm   rr   rs   r   r   r   r   r   rZ   rZ   r[   r   H  s   
zRegexpChunkApp.resetz# Regexp Chunk Parsing Grammar
# Saved %(date)s
#
# Development set: %(devset)s
#   Precision: %(precision)s
#   Recall:    %(recall)s
#   F-score:   %(fscore)s

%(grammar)s
c                 C   s   |sddg}t |dd}|sd S | jr2| j| | jd d kr2dd | jd d	d  D \}}}n| jd u r>d
 } }}nd } }}t|d}|| jtt	
 | j|||| j d  W d    d S 1 skw   Y  d S )NzChunk Gramamr.chunkz	All files*r  	filetypesZdefaultextensionr_   r   c                 s   s    | ]	}d d|  V  qdS )z%.2f%%rb   NrZ   )r.  vrZ   rZ   r[   rB  j  s    
z.RegexpChunkApp.save_grammar.<locals>.<genexpr>r9   zGrammar not well formedzNot finished evaluation yetr   )datern   r   r   r   rY   )r   rr   rl   r\   rk   openwriteSAVE_GRAMMAR_TEMPLATEdictr   ctimero   rY   rW   )rX   filenameftypesr   r   r   outfilerZ   rZ   r[   r   a  s8   
"zRegexpChunkApp.save_grammarc                 C   s   |sddg}t |dd}|sd S | jdd |   t|}| }W d    n1 s.w   Y  tdd| }| j	d| |   d S )	Nr  r  r  r  rf   rc   z2^\# Regexp Chunk Parsing Grammar[\s\S]*F-score:.*
r]   )
r   r   r   r   r  readrU   rV   lstripre   )rX   r  r  infilerY   rZ   rZ   r[   r     s"   

zRegexpChunkApp.load_grammarc           
   	   C   sd  |sddg}t |dd}|sd S t|d}|d |dt   |d| j  t| jD ]4\}\}}}}d	|d
 t| j|d |d |d f }	|d|	  |d	dd |
  D  q1| jrv| j| | jd d ks| jd u r|d n|d |d	dd | j
  D  W d    d S W d    d S 1 sw   Y  d S )N)zChunk Gramamr History.txtr  r  r  r   z'# Regexp Chunk Parsing Grammar History
z# Saved %s
z# Development set: %s
z>Grammar %d/%d (precision=%.2f%%, recall=%.2f%%, fscore=%.2f%%)r9   rb   z
%s
r]   c                 s   r@  z  %s
NrZ   rX  rZ   rZ   r[   rB        z.RegexpChunkApp.save_history.<locals>.<genexpr>r_   r   z#
Current Grammar (not well-formed)
z!
Current Grammar (not evaluated)
c                 s   r@  r  rZ   rX  rZ   rZ   r[   rB    r  )r   r  r  r   r  ro   r   rr   r   rE  rW   r]  rl   r\   rk   rY   )
rX   r  r  r  r   r   r   r   r   hdrrZ   rZ   r[   r     s<   
"$

"zRegexpChunkApp.save_historyc                 G   sF   d}d}zddl m} |||d  W d S    t| j|| Y d S )Nz<NLTK RegExp Chunk Parser Application
Written by Edward Loperz2About: Regular Expression Chunk Parser Applicationr   )Message)messagery   )Ztkinter.messageboxr  showr   rx   )rX   r   ZABOUTZTITLEr  rZ   rZ   r[   r     s   zRegexpChunkApp.aboutc                 C   sJ   |d ur
| j | | j tt| j| j   | d | d d S )Nr9   r   )r|   r}   r   r   rn   r   r   rX   rM   rZ   rZ   r[   r     s
   
zRegexpChunkApp.set_devset_sizec                 C   sX   |d ur
| j | | j  }| jjt| d | jjtdt| d d d d S )N)rM   ir   r1   )r   r}   r   r   r  absr   r   r  rZ   rZ   r[   r     s
   
&zRegexpChunkApp.resizec                 O   s    t  rdS | jj|i | dS )z
        Enter the Tkinter mainloop.  This function must be called if
        this demo is created from a non-interactive program (e.g.
        from a secript); otherwise, the demo will close as soon as
        the script completes.
        N)r   rx   mainloop)rX   ri  kwargsrZ   rZ   r[   r    s   zRegexpChunkApp.mainloop)r   Nr]   r^   Nr   )B__name__
__module____qualname____doc__rh   r  r  r  r   r   r   r   r   r  r  r   r(  Z_FONT_PARAMSr  r   r  Z_HELPTAB_BG_COLORZ_HELPTAB_FG_COLORrI  rL  r  r\   r   r   r~   r   r   r   Z_DRAW_LINESr   r   r   r   r   r   r   r   r  r  rS  r   r   r{   r#  r   r6  r\  rz  r   r^  r   r8  r   r  r   r   r   r   r   r   r  rZ   rZ   rZ   r[   r   -   s   	
 !"#7h
	


k	b K >?")
-
3


!


r   c                   C   s   t    d S r   )r   r  rZ   rZ   rZ   r[   app  s   r  __main__)'r  randomrU   textwrapr   tkinterr   r   r   r   r   r   r   r	   r
   r   Ztkinter.filedialogr   r   Ztkinter.fontr   Z
nltk.chunkr   r   Znltk.chunk.regexpr   Znltk.corpusr   r   Znltk.draw.utilr   Z	nltk.treer   Z	nltk.utilr   r   r  r  __all__rZ   rZ   rZ   r[   <module>   s<   
0           3
