o
    rZh                     @   sp   d dl Z d dlmZ d dlT d dlT G dd deZG dd deZdd	 Z	d
d Z
edkr6e	  e
  dS dS )    N)util)*c                   @   sR   e Zd ZdddZdddZdddZdd	d
ZdddZdddZdddZ	dS )ChasenCorpusReaderutf8Nc                 C   s   || _ t| ||| d S N)_sent_splitterCorpusReader__init__)selfrootfileidsencodingsent_splitter r   H/var/www/auris/lib/python3.10/site-packages/nltk/corpus/reader/chasen.pyr	      s   zChasenCorpusReader.__init__c                       t  fdd |dD S )Nc              	      $   g | ]\}}t ||d d d  jqS )FChasenCorpusViewr   .0Zfileidencr
   r   r   
<listcomp>       z,ChasenCorpusReader.words.<locals>.<listcomp>TconcatZabspathsr
   r   r   r   r   words   
   

zChasenCorpusReader.wordsc                    r   )Nc              	      $   g | ]\}}t ||d dd jqS TFr   r   r   r   r   r      r   z3ChasenCorpusReader.tagged_words.<locals>.<listcomp>Tr   r   r   r   r   tagged_words   r   zChasenCorpusReader.tagged_wordsc                    r   )Nc              	      s$   g | ]\}}t ||d dd  jqS FTr   r   r   r   r   r   %   r   z,ChasenCorpusReader.sents.<locals>.<listcomp>Tr   r   r   r   r   sents#   r   zChasenCorpusReader.sentsc                    r   )Nc              	      s$   g | ]\}}t ||d d d jqS r!   r   r   r   r   r   r   -   r   z3ChasenCorpusReader.tagged_sents.<locals>.<listcomp>Tr   r   r   r   r   tagged_sents+   r   zChasenCorpusReader.tagged_sentsc                    r   )Nc              	      r    r#   r   r   r   r   r   r   5   r   z,ChasenCorpusReader.paras.<locals>.<listcomp>Tr   r   r   r   r   paras3   r   zChasenCorpusReader.parasc                    r   )Nc              	      r   )Tr   r   r   r   r   r   =   r   z3ChasenCorpusReader.tagged_paras.<locals>.<listcomp>Tr   r   r   r   r   tagged_paras;   r   zChasenCorpusReader.tagged_paras)r   Nr   )
__name__
__module____qualname__r	   r   r"   r$   r%   r&   r'   r   r   r   r   r      s    





r   c                   @   s$   e Zd ZdZ	dddZdd ZdS )r   z
    A specialized corpus view for ChasenReader. Similar to ``TaggedCorpusView``,
    but this'll use fixed sets of word and sentence tokenizer.
    Nc                 C   s,   || _ || _|| _|| _tj| ||d d S )Nr   )_tagged_group_by_sent_group_by_parar   StreamBackedCorpusViewr	   )r
   Zcorpus_filer   ZtaggedZgroup_by_sentZgroup_by_parar   r   r   r   r	   J   s
   	zChasenCorpusView.__init__c           
      C   s  g }t |ddD ]|}g }g }| D ]E}| dk}|d}|d d|dd f}	|s3||	 |s=| jrW| |	rW| jsGdd	 |D }| jrP|| n|	| g }qt
|dkrv| jshd
d	 |D }| jrq|| n|	| | jr|| q|	| q|S )zReads one paragraph at a time..z^EOS\nZEOS	r      Nc                 S      g | ]\}}|qS r   r   r   wtr   r   r   r   i       z/ChasenCorpusView.read_block.<locals>.<listcomp>c                 S   r3   r   r   r4   r   r   r   r   r   r7   )Zread_regexp_block
splitlinesstripsplitjoinappendr   r,   r-   extendlenr.   )
r
   streamblockZpara_strparasentlineZ_eos_cellsr5   r   r   r   
read_blockY   s8   



zChasenCorpusView.read_blockr   )r(   r)   r*   __doc__r	   rE   r   r   r   r   r   D   s
    
r   c                  C   sf   dd l } ddlm} |dtddd}td| dd	  td
dd | dd D  d S )Nr   LazyCorpusLoaderjeita.*chasenutf-8r+   /iTV  i|V  z
EOS
c                 s   s$    | ]}d  dd |D V  qdS )
c                 s   s.    | ]}d  |d |d dd V  qdS )z{}/{}r   r2   r1      N)formatr:   )r   r5   r   r   r   	<genexpr>   s   , z!demo.<locals>.<genexpr>.<genexpr>N)r;   )r   rB   r   r   r   rP      s
    
zdemo.<locals>.<genexpr>iz  i}  )nltknltk.corpus.utilrH   r   printr;   r   r%   )rQ   rH   rI   r   r   r   demo   s   
rT   c                  C   s:   ddl m}  | dtddd}t| d d tsJ d S )Nr   rG   rI   rJ   rK   r+   r2   )rR   rH   r   
isinstancer"   str)rH   rI   r   r   r   test   s   rW   __main__)sysZnltk.corpus.readerr   Znltk.corpus.reader.apiZnltk.corpus.reader.utilr   r   r/   r   rT   rW   r(   r   r   r   r   <module>   s   6=
