o
    rZhm^                  	   @   s  d dl Zd dlZd dlZd dlmZmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZ d dl m!Z! d dl"m#Z# dZ$dZ%d	Z&d
Z'dZ(dZ)dZ*dZ+i ddd ddd ddd ddd ddd ddd ddd ddd dd d d!d"d d#d$d d%d&d d'd(d d)d*d d+d,d d-d.d d/d0d d1d d2d d3d d4d d5d d6d d7d d8Z,G d9d: d:Z-G d;d< d<Z.d=d> Z/e0d?kre/  d>gZ1dS )@    N)ENDLEFTSUNKENButtonEntryFrameIntVarLabelMenu
OptionMenu	Scrollbar	StringVarTextTk)Font)
alpinobrowncess_catcess_espflorestaindian
mac_morphonps_chatsinica_treebanktreebank)ShowText)in_idlez[^/ ]+z\bz<<CL_EVENT>>z<<ST_EVENT>>z<<SE_EVENT>>z<<ELC_EVENT>>2   z)English: Brown Corpus (Humor, simplified)z%Catalan: CESS-CAT Corpus (simplified)c                   C      t jddS N	universal)tagset)r   tagged_sents r#   r#   G/var/www/auris/lib/python3.10/site-packages/nltk/app/concordance_app.py<lambda>;       r%   zEnglish: Brown Corpusc                   C      t  S Nr   r"   r#   r#   r#   r$   r%   >       z"English: Brown Corpus (simplified)c                   C   r   r   r)   r#   r#   r#   r$   r%   ?   r&   z)English: Brown Corpus (Press, simplified)c                   C   s   t jg dddS )N)newsZ	editorialZreviewsr    
categoriesr!   r)   r#   r#   r#   r$   r%   B   s    z,English: Brown Corpus (Religion, simplified)c                   C      t jdddS )NZreligionr    r,   r)   r#   r#   r#   r$   r%   E       z+English: Brown Corpus (Learned, simplified)c                   C   r.   )NZlearnedr    r,   r)   r#   r#   r#   r$   r%   H   r/   z3English: Brown Corpus (Science Fiction, simplified)c                   C   r.   )NZscience_fictionr    r,   r)   r#   r#   r#   r$   r%   K   r/   z+English: Brown Corpus (Romance, simplified)c                   C   r.   )NZromancer    r,   r)   r#   r#   r#   r$   r%   N   r/   c                   C   r.   )NZhumorr    r,   r)   r#   r#   r#   r$   r%   Q   r/   zEnglish: NPS Chat Corpusc                   C   r'   r(   r   Ztagged_postsr#   r#   r#   r$   r%   T   r*   z%English: NPS Chat Corpus (simplified)c                   C   r   r   r0   r#   r#   r#   r$   r%   U   r&   z#English: Wall Street Journal Corpusc                   C   r'   r(   r   r"   r#   r#   r#   r$   r%   X   r*   z0English: Wall Street Journal Corpus (simplified)c                   C   r   r   r1   r#   r#   r#   r$   r%   Y   r&   zChinese: Sinica Corpusc                   C   r'   r(   r   r"   r#   r#   r#   r$   r%   \   r*   z#Chinese: Sinica Corpus (simplified)c                   C   r   r   r2   r#   r#   r#   r$   r%   ]   r&   zDutch: Alpino Corpusc                   C   r'   r(   r   r"   r#   r#   r#   r$   r%   `   r*   z!Dutch: Alpino Corpus (simplified)c                   C   r   r   r3   r#   r#   r#   r$   r%   a   r&   c                   C   r   )N	hindi.pos)filesr   r"   r#   r#   r#   r$   r%   d   s    c                   C   r.   )Nr4   r    )r5   r!   r6   r#   r#   r#   r$   r%   e   r/   c                   C   r'   r(   r   r"   r#   r#   r#   r$   r%   h   r*   c                   C   r   r   r7   r#   r#   r#   r$   r%   i   r&   c                   C   r'   r(   r   r"   r#   r#   r#   r$   r%   l   r*   c                   C   r   r   r8   r#   r#   r#   r$   r%   m   r&   c                   C   r   r   )r   r"   r#   r#   r#   r$   r%   p   r&   )zHindi: Indian Languages Corpusz+Hindi: Indian Languages Corpus (simplified)z&Portuguese: Floresta Corpus (Portugal)z2Portuguese: Floresta Corpus (Portugal, simplified)z&Portuguese: MAC-MORPHO Corpus (Brazil)z2Portuguese: MAC-MORPHO Corpus (Brazil, simplified)z%Spanish: CESS-ESP Corpus (simplified)c                   @   sD  e Zd ZdZdZdZdZdZdZdd Z	d	d
 Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Zd5d6 Z d7d8 Z!d9d: Z"d;d< Z#d=d> Z$d?d@ Z%dAdB Z&dCdD Z'dEdF Z(dGdH Z)dIdJ Z*dKdL Z+dMdN Z,dOS )PConcordanceSearchViewz#FFFz#F00Z
HL_WRD_TAGz#C0C0C0Z
HL_LBL_TAGg333333?c                 C   sb   t  | _t| j| _t | _| | j |   | 	| j | 
| jj | jt| j| _d S r(   )qQueuequeueConcordanceSearchModelmodelr   top	_init_top_init_menubar_init_widgetsload_corpusDEFAULT_CORPUSafterPOLL_INTERVAL_pollselfr#   r#   r$   __init__   s   
zConcordanceSearchView.__init__c                 C   s@   | d |d |d| j |d| j |dd d S )Nz950x680+50+50zNLTK Concordance Searchz<Control-q>ZWM_DELETE_WINDOWi  i  )ZgeometrytitlebinddestroyprotocolZminsize)rI   r?   r#   r#   r$   r@      s
   

zConcordanceSearchView._init_topc                 C   sj   t |t| jdddd| _| | j | | j | | j | | j | | j | jj	ddd d S )N   )
backgroundpadxpadyborderbothT)fillexpand)
r   dict_BACKGROUND_COLOUR
main_frame_init_corpus_select_init_query_box_init_results_box_init_paging_init_statuspackrI   parentr#   r#   r$   rB      s   z#ConcordanceSearchView._init_widgetsc                 C   s  t | j| _t | j| _t | j| _t| j}t|ddd}|jdd| jdd |jdd|d t|dd	}t|dd	}|j	d
| jdd| j
d |j	d| jdd| j
d |j	d| jdd| j
d |d |jdd|d t|dd	}t|dd	}|j	d| jdd| jd |j	d| jdd| jd |j	d| jdd| jd |d |jdd|d t|dd	}|j	d| jdd| jd |j	d| jdd| jd |j	d| jdd| jd |d |jdd|d |jdd|d |jd d|d | jj|d! d S )"Nr   )tearoffborderwidthZExitrO   zCtrl-q)label	underlinecommandZacceleratorFile)rd   re   menu)rb   20   )rd   variablere   valuerf   Z50r   100d   zResult Countz60 characters<   z80 charactersP   z100 charactersZBeforez70 charactersF   z90 charactersZ   z110 charactersn   ZAfterContextZEdit)rh   )r   r?   _result_size_cntx_bf_len_cntx_af_lenr
   Zadd_commandrM   Zadd_cascadeZadd_radiobuttonset_result_sizeZinvokeset_cntx_bf_lenset_cntx_af_lenconfig)rI   ZmenubarZfilemenuZeditmenuZ
rescntmenuZcntxmenuZ
cntxbfmenuZ
cntxafmenur#   r#   r$   rA      s   




z#ConcordanceSearchView._init_menubarc                 K   s   | j  | j_d S r(   )ru   getr>   result_countrI   kwargsr#   r#   r$   rx      s   z%ConcordanceSearchView.set_result_sizec                 K      | j  | _d S r(   )rw   r|   _char_afterr~   r#   r#   r$   rz        z%ConcordanceSearchView.set_cntx_af_lenc                 K   r   r(   )rv   r|   _char_beforer~   r#   r#   r$   ry     r   z%ConcordanceSearchView.set_cntx_bf_lenc              	   C   s   t || jd}t|| _| j| jj t|td| jddddj	dd t
| jj | jj}t|| j| jjg| j R d	| ji}d|d
< d|d< |j	dd |j	dddd d S )NrP   z	 Corpus:    rO   r   )justifytextrP   rQ   rR   rS   left)siderf   rc   highlightthicknessr?   xnr   rU   anchor)r   rX   r   varsetr>   rD   r	   r   r_   listCORPORAkeysremover   non_default_corporacorpus_selected)rI   ra   
innerframeZother_corporaomr#   r#   r$   rZ     s<   

z)ConcordanceSearchView._init_corpus_selectc              	   C   s.   t |tt| jdddd| _| jjddd d S )Nr   rO   )r   ZreliefrP   rS   rQ   rR   r?   swr   r   )r	   r   r   rX   statusr_   r`   r#   r#   r$   r^   $  s   	z"ConcordanceSearchView._init_statusc                 C   s   t || jd}t || jd}t|dd| _| jjddddd t|d	| jd
d
d| _| jjddddd | jd| j	 |  |jdddd d S )Nr   ro   )widthr   r      center)r   rU   rR   r   ZSearchrO   )r   rf   rc   r   z<KeyPress-Return>r?   r   r   )
r   rX   r   	query_boxr_   r   searchsearch_buttonrL   search_enter_keypress_handler)rI   ra   r   Zanotherr#   r#   r$   r[   0  s   z%ConcordanceSearchView._init_query_boxc                 G   s   |    d S r(   )r   rI   eventr#   r#   r$   r   A  s   z3ConcordanceSearchView.search_enter_keypress_handlerc                 C   s(  t |}t |}t |}t|dd}t|ddd}t|tddddd|j|jd	d
ddd
| _| jjdddd | jj| j| j	d | jj| j
| jd |jdddd |j| jjd |jddddd |j| jjd t|d| jdjddd |jddddd |jdddd |jdddd d S ) NrO   )rc   Zhoriz)rc   ZorientZcourierZ16)familysizedisablednoneZ40ri   )	Zfontstaterc   ZyscrollcommandZxscrollcommandwrapr   heightZexportselectionr   rT   T)r   rU   rV   )
foregroundyer   )rf   r   w)r   rU   rV   r   z   )r   rP   r   r?   r   bottoms)r   r   r   r   r   results_boxr_   Z
tag_config_HIGHLIGHT_WORD_TAG_HIGHLIGHT_WORD_COLOUR_HIGHLIGHT_LABEL_TAG_HIGHLIGHT_LABEL_COLOURr{   ZyviewZxviewr	   rX   )rI   ra   r   i1i2Z
vscrollbarZ
hscrollbarr#   r#   r$   r\   D  sD   
z'ConcordanceSearchView._init_results_boxc              	   C   s~   t || jd}t|d| jddddd | _}|jddd	 t|d
| jddddd | _}|jddd	 |jddd d| _d S )Nr   ZPrevious10rO   r   )r   rf   r   rc   r   r   r   r   r   ZNextrightr?   r   )r   rU   r   )	r   rX   r   previousprevr_   __next__nextcurrent_page)rI   ra   r   r   r   r#   r#   r$   r]   i  s.   		
z"ConcordanceSearchView._init_pagingc                 C   s&   |    |   | j| jd  d S NrO   )clear_results_boxfreeze_editabler>   r   r   rH   r#   r#   r$   r        zConcordanceSearchView.previousc                 C   s&   |    |   | j| jd  d S r   )r   r   r>   r   r   rH   r#   r#   r$   r     r   zConcordanceSearchView.__next__c                 G   sJ   d}d}zddl m} |||| jd  W d S    t| j|| Y d S )NzNLTK Concordance Search Demo
z#About: NLTK Concordance Search Demor   )Message)messagerK   ra   )Ztkinter.messageboxr   rY   showr   r?   )rI   r   ZABOUTZTITLEr   r#   r#   r$   about  s   zConcordanceSearchView.aboutc                 C   sD   | j t| j | j t| j | j t| j | j t| j	 d S r(   )
r?   rL   CORPUS_LOADED_EVENThandle_corpus_loadedSEARCH_TERMINATED_EVENThandle_search_terminatedSEARCH_ERROR_EVENThandle_search_errorERROR_LOADING_CORPUS_EVENThandle_error_loading_corpusrH   r#   r#   r$   _bind_event_handlers  s   z*ConcordanceSearchView._bind_event_handlersc                 C   s   z	| j jdd}W n
 tjy   Y n(w |tkr| | n|tkr(| | n|tkr2| 	| n	|t
kr;| | | jt| j| _d S )NF)block)r<   r|   r:   Emptyr   r   r   r   r   r   r   r   r?   rE   rF   rG   r   r#   r#   r$   rG     s   
zConcordanceSearchView._pollc                 C   s0   d| j   | jd< |   |   |   d S )NzError in loading r   )r   r|   r   unfreeze_editable	clear_allr   r   r#   r#   r$   r     s   z1ConcordanceSearchView.handle_error_loading_corpusc                 C   s2   | j  d | jd< |   |   | j  d S )Nz
 is loadedr   )r   r|   r   r   r   r   Z	focus_setr   r#   r#   r$   r     s   z*ConcordanceSearchView.handle_corpus_loadedc                 C   sb   | j  }| | d| jd< t|dkrd| j j | jd< n| j j| _|   | j	
| j d S )N r   r   zNo results found for )r>   get_resultswrite_resultsr   lenquerylast_requested_pager   r   r   Zxview_moveto_FRACTION_LEFT_TEXT)rI   r   resultsr#   r#   r$   r     s   



z.ConcordanceSearchView.handle_search_terminatedc                 C   s   d| j j | jd< |   d S )NzError in query r   )r>   r   r   r   r   r#   r#   r$   r     s   z)ConcordanceSearchView.handle_search_errorc                 G   s   | j  }| | d S r(   )r   r|   rC   )rI   argsZnew_selectionr#   r#   r$   r     s   
z%ConcordanceSearchView.corpus_selectedc                 C   s:   | j j|krd| d | jd< |   | j | d S d S )NzLoading z...r   )r>   selected_corpusr   r   rC   )rI   Z	selectionr#   r#   r$   rC     s
   z!ConcordanceSearchView.load_corpusc                 C   sd   d| _ |   | j  | j }t| dkrd S d| | jd< | 	  | j
|| j d  d S )Nr   zSearching for r   rO   )r   r   r>   reset_resultsr   r|   r   stripr   r   r   )rI   r   r#   r#   r$   r     s   

zConcordanceSearchView.searchc              
   C   sT  d| j d< d}|D ]}|d  |d |d }}}t|dkr|| jk r0| |||\}}}||| j || j  }|t|ksF|d7 }| j t|d | | |||\}}	|D ]}
| j 	| j
t|d t|
d  t|d t|
d   q\|	D ]}
| j 	| jt|d t|
d  t|d t|
d   q~|d7 }q	d	| j d< d S )
Nnormalr   rO   r   r   
z.0.r   )r   r   r   r   padr   insertstrwords_and_labelsZtag_addr   r   )rI   r   roweachsentpos1pos2sentenceZword_markersZlabel_markersmarkerr#   r#   r$   r     s6   
 
z#ConcordanceSearchView.write_resultsc                 C   s   ||| }g g }}| d}d}|D ]F}	|	dkr|d7 }n7|	 d\}
}|| j| | j| t|
 f |t|
d 7 }|| j| | j| t| f |t|7 }|d7 }q||fS )N r   r   rO   /)splitappendr   r   )rI   r   r   r   Z
search_expwordslabelsZlabeled_wordsindexr   wordrd   r#   r#   r$   r     s$   



z&ConcordanceSearchView.words_and_labelsc                 C   sD   || j kr
|||fS | j | }ddg| | }||| || fS )Nr   r   )r   join)rI   r   ZhstartZhenddr#   r#   r$   r     s
   


zConcordanceSearchView.padc                 G   s0   | j d u rd S | j | j | j   d | _ d S r(   )r?   Zafter_cancelrE   rM   )rI   r   r#   r#   r$   rM     s
   


zConcordanceSearchView.destroyc                 C   s$   | j dt | j  |   d S Nr   )r   deleter   r>   reset_queryr   rH   r#   r#   r$   r     s   
zConcordanceSearchView.clear_allc                 C   s&   d| j d< | j dt d| j d< d S )Nr   r   z1.0r   )r   r   r   rH   r#   r#   r$   r   !  s   
z'ConcordanceSearchView.clear_results_boxc                 C   s,   d| j d< d| jd< d| jd< d| jd< d S )Nr   r   )r   r   r   r   rH   r#   r#   r$   r   &  s   


z%ConcordanceSearchView.freeze_editablec                 C   s    d| j d< d| jd< |   d S )Nr   r   )r   r   set_paging_button_statesrH   r#   r#   r$   r   ,  s   

z'ConcordanceSearchView.unfreeze_editablec                 C   sT   | j dks
| j dkrd| jd< nd| jd< | j| j r#d| jd< d S d| jd< d S )Nr   rO   r   r   r   )r   r   r>   has_more_pagesr   rH   r#   r#   r$   r   1  s   
z.ConcordanceSearchView.set_paging_button_statesc                 C   s   | j j|dd d S )Ntail)when)r?   Zevent_generater   r#   r#   r$   
fire_event;  s   z ConcordanceSearchView.fire_eventc                 O   s    t  rd S | jj|i | d S r(   )r   r?   mainloop)rI   r   r   r#   r#   r$   r  ?  s   zConcordanceSearchView.mainloopN)-__name__
__module____qualname__rX   r   r   r   r   r   rJ   r@   rB   rA   rx   rz   ry   rZ   r^   r[   r   r\   r]   r   r   r   r   rG   r   r   r   r   r   rC   r   r   r   r   rM   r   r   r   r   r   r   r  r#   r#   r#   r$   r9   v   sV    
_%

r9   c                   @   s   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd ZG dd dejZG dd dejZdS )r=   c                 C   s8   || _ t| _t| _d | _|   |   d | _d| _	d S r   )
r<   _CORPORAr   _DEFAULTrD   r   r   r   r}   last_sent_searched)rI   r<   r#   r#   r$   rJ   F  s   
zConcordanceSearchModel.__init__c                 C   s0   g }| t| j  || j |  |S r(   )extendr   r   r   r   rD   sort)rI   copyr#   r#   r$   r   P  s
   z*ConcordanceSearchModel.non_default_corporac                 C   s$   || _ g | _| || }|  d S r(   )r   r"   
LoadCorpusstart)rI   nameZrunner_threadr#   r#   r$   rC   W  s   z"ConcordanceSearchModel.load_corpusc                 C   s$   || _ || _| | || j  d S r(   )r   r   SearchCorpusr}   r  )rI   r   pager#   r#   r$   r   ]  s   zConcordanceSearchModel.searchc                 C   s6   || _ t| j|k r| | j| d S | jt d S r(   )r   r   r   r   r   r<   putr   rI   r  r#   r#   r$   r   b  s   zConcordanceSearchModel.nextc                 C   s   || _ | jt d S r(   )r   r<   r  r   r  r#   r#   r$   r   i  s   zConcordanceSearchModel.prevc                 C   s   d| _ g | _d | _d S r   )r  r   	last_pagerH   r#   r#   r$   r   m  s   
z$ConcordanceSearchModel.reset_resultsc                 C   s
   d | _ d S r(   )r   rH   r#   r#   r$   r   r  s   
z"ConcordanceSearchModel.reset_queryc                 C   s   | j |d | d S r   )r   r   )rI   r  Z	resultsetr#   r#   r$   set_resultsu  s   z"ConcordanceSearchModel.set_resultsc                 C   s   | j | jd  S r   )r   r   rH   r#   r#   r$   r   x  r   z"ConcordanceSearchModel.get_resultsc                 C   s4   | j g ks| j d g krdS | jd u rdS || jk S )Nr   FT)r   r  r  r#   r#   r$   r   {  s
   

z%ConcordanceSearchModel.has_more_pagesc                   @   s   e Zd Zdd Zdd ZdS )z!ConcordanceSearchModel.LoadCorpusc                 C   s   t j|  ||| _| _d S r(   )	threadingThreadrJ   r>   r  )rI   r  r>   r#   r#   r$   rJ     s   z*ConcordanceSearchModel.LoadCorpus.__init__c              
   C   st   z| j j| j  }dd |D | j _| j jt W d S  ty9 } zt| | j jt	 W Y d }~d S d }~ww )Nc                 S   s    g | ]}d  dd |D qS )r   c                 s   s     | ]\}}|d  | V  qdS )r   Nr#   ).0r   tr#   r#   r$   	<genexpr>  s    zCConcordanceSearchModel.LoadCorpus.run.<locals>.<listcomp>.<genexpr>)r   )r  r   r#   r#   r$   
<listcomp>  s    z9ConcordanceSearchModel.LoadCorpus.run.<locals>.<listcomp>)
r>   r   r  r"   r<   r  r   	Exceptionprintr   )rI   tsr   r#   r#   r$   run  s   
z%ConcordanceSearchModel.LoadCorpus.runN)r  r  r  rJ   r  r#   r#   r#   r$   r    s    r  c                   @   s$   e Zd Zdd Zdd Zdd ZdS )z#ConcordanceSearchModel.SearchCorpusc                 C   s&   |||| _ | _| _tj|  d S r(   )r>   countr  r  r  rJ   )rI   r>   r  r  r#   r#   r$   rJ     s   z,ConcordanceSearchModel.SearchCorpus.__init__c              	   C   s*  |   }g dd}}}| jj| jjd  D ]J}zt||}W n tjy8   | j  | jj	t
 Y  d S w |r\||| | f |d7 }|| jkr\| j j|d 7  _ n|d7 }q| jt|kr| j j|d 7  _| j| j_| j| j| n| j| j|d d  | jj	t d S )Nr   rO   )processed_queryr>   r"   r  rer   errorr   r<   r  r   r   r  endr  r   r  r  r  r   )rI   r:   Zsent_posiZ
sent_countr   mr#   r#   r$   r    s.   



z'ConcordanceSearchModel.SearchCorpus.runc                 C   s   g }| j j D ]8}tdd|}td|r%|tt d | t  qd|v r3|t| t  q|t| d t t  qd	|S )Nz\.z[^/ ]z[A-Z]+$r   r   )
r>   r   r   r!  submatchr   BOUNDARYWORD_OR_TAGr   )rI   newtermr#   r#   r$   r     s   
z3ConcordanceSearchModel.SearchCorpus.processed_queryN)r  r  r  rJ   r  r   r#   r#   r#   r$   r    s    r  N)r  r  r  rJ   r   rC   r   r   r   r   r   r  r   r   r  r  r  r  r#   r#   r#   r$   r=   E  s    
r=   c                  C   s   t  } |   d S r(   )r9   r  )r   r#   r#   r$   app  s   r,  __main__)2r<   r:   r!  r  tkinterr   r   r   r   r   r   r   r	   r
   r   r   r   r   r   Ztkinter.fontr   Znltk.corpusr   r   r   r   r   r   r   r   r   r   Znltk.draw.utilr   Z	nltk.utilr   r)  r(  r   r   r   r   rF   r  r  r9   r=   r,  r  __all__r#   r#   r#   r$   <module>   s   @0"#&'*<   Rx
