o
    rZhf                     @   sP   d Z ddlZddlT dddddd	d
ZeeZG dd dZG dd deZdS )a)  
Corpus reader for the Information Extraction and Entity Recognition Corpus.

NIST 1999 Information Extraction: Entity Recognition Evaluation
https://www.itl.nist.gov/iad/894.01/tests/ie-er/er_99/er_99.htm

This corpus contains the NEWSWIRE development test data for the
NIST 1999 IE-ER Evaluation.  The files were taken from the
subdirectory: ``/ie_er_99/english/devtest/newswire/*.ref.nwt``
and filenames were shortened.

The corpus contains the following files: APW_19980314, APW_19980424,
APW_19980429, NYT_19980315, NYT_19980403, and NYT_19980407.
    N)*z&Associated Press Weekly, 14 March 1998z&Associated Press Weekly, 24 April 1998z&Associated Press Weekly, 29 April 1998zNew York Times, 15 March 1998zNew York Times, 3 April 1998zNew York Times, 7 April 1998)ZAPW_19980314ZAPW_19980424ZAPW_19980429ZNYT_19980315ZNYT_19980403ZNYT_19980407c                   @   s   e Zd ZdddZdd ZdS )IEERDocumentN c                 C   s"   || _ || _|| _|| _|| _d S N)textdocnodoctype	date_timeheadline)selfr   r   r   r	   r
    r   F/var/www/auris/lib/python3.10/site-packages/nltk/corpus/reader/ieer.py__init__+   s
   
zIEERDocument.__init__c                 C   sd   | j rd| j  }nddd | j D d d d }| jd ur.d| j d|dS d	| S )
N c                 S   s    g | ]}|d d dkr|qS )N   <r   ).0wr   r   r   
<listcomp>7   s     z)IEERDocument.__repr__.<locals>.<listcomp>   z...z<IEERDocument z: >z<IEERDocument: %r>)r
   joinleavesr   r   )r   r
   r   r   r   __repr__2   s   $
zIEERDocument.__repr__)NNNr   )__name__
__module____qualname__r   r   r   r   r   r   r   *   s    
r   c                   @   s<   e Zd ZdZdddZdddZdd Zd	d
 Zdd ZdS )IEERCorpusReaderr   Nc                       t  fdd |dD S )Nc                        g | ]\}}t | j|d qS )encoding)StreamBackedCorpusView_read_blockr   Zfileidencr   r   r   r   D       z)IEERCorpusReader.docs.<locals>.<listcomp>TconcatZabspathsr   Zfileidsr   r&   r   docsB   
   

zIEERCorpusReader.docsc                    r   )Nc                    r   r    )r"   _read_parsed_blockr$   r&   r   r   r   L   r'   z0IEERCorpusReader.parsed_docs.<locals>.<listcomp>Tr(   r*   r   r&   r   parsed_docsJ   r,   zIEERCorpusReader.parsed_docsc                    s    fdd  |D S )Nc                    s&   g | ]}  |jd ur  |qS r   )_parser   )r   docr&   r   r   r   T   s
    z7IEERCorpusReader._read_parsed_block.<locals>.<listcomp>)r#   )r   streamr   r&   r   r-   R   s   
z#IEERCorpusReader._read_parsed_blockc                 C   s0   t jj|dd}t|trtdi |S t|S )NZDOCUMENT)Z
root_labelr   )nltkchunkZieerstr2tree
isinstancedictr   )r   r0   valr   r   r   r/   Z   s   
zIEERCorpusReader._parsec                 C   sd   g }	 |  }|s
n| dkrnq|| 	 |  }|sn|| | dkr+nqd|gS )NTz<DOC>z</DOC>
)readlinestripappendr   )r   r1   outliner   r   r   r#   a   s$   

zIEERCorpusReader._read_blockr   )	r   r   r   __doc__r+   r.   r-   r/   r#   r   r   r   r   r   ?   s    

r   )	r=   r2   Znltk.corpus.reader.apititlessortedZ	documentsr   ZCorpusReaderr   r   r   r   r   <module>   s   
