o
    rZh=                     @   s   d dl mZ d dlmZ d dlT d dlT d dlmZ d dlm	Z	 G dd de
ZG dd	 d	ZG d
d dZG dd deZG dd deZeG dd deZdS )    )total_ordering)ElementTree)*)raise_unorderable_types)Treec                   @   s`   e Zd ZdZ					dddZdddZd	d
 Zdd ZdddZdd Z	dd fddZ
dS )NombankCorpusReadera  
    Corpus reader for the nombank corpus, which augments the Penn
    Treebank with information about the predicate argument structure
    of every noun instance.  The corpus consists of two parts: the
    predicate-argument annotations themselves, and a set of "frameset
    files" which define the argument labels used by the annotations,
    on a per-noun basis.  Each "frameset file" contains one or more
    predicates, such as ``'turn'`` or ``'turn_on'``, each of which is
    divided into coarse-grained word senses called "rolesets".  For
    each "roleset", the frameset file provides descriptions of the
    argument roles, along with examples.
     Nutf8c                 C   sL   t |trt||| _t|| _t| ||| || _|| _|| _	|| _
dS )a  
        :param root: The root directory for this corpus.
        :param nomfile: The name of the file containing the predicate-
            argument annotations (relative to ``root``).
        :param framefiles: A list or regexp specifying the frameset
            fileids for this corpus.
        :param parse_fileid_xform: A transform that should be applied
            to the fileids in this corpus.  This should be a function
            of one argument (a fileid) that returns a string (the new
            fileid).
        :param parse_corpus: The corpus containing the parse trees
            corresponding to this corpus.  These parse trees are
            necessary to resolve the tree pointers used by nombank.
        N)
isinstancestrZfind_corpus_fileidsZ_fileidslistCorpusReader__init___nomfile
_nounsfile_parse_fileid_xform_parse_corpus)selfrootZnomfile
framefilesZ	nounsfileparse_fileid_xformparse_corpusencoding r   I/var/www/auris/lib/python3.10/site-packages/nltk/corpus/reader/nombank.pyr       s   


zNombankCorpusReader.__init__c                    sD   i  dur fddd< t jfddjdS )z
        :return: a corpus view that acts as a list of
            ``NombankInstance`` objects, one for each noun in the corpus.
        Nc                    s
   | j  kS Nbaseforminstr   r   r   <lambda>M   s   
 z/NombankCorpusReader.instances.<locals>.<lambda>instance_filterc                    s   j | fi  S r   )_read_instance_block)stream)kwargsr   r   r   r    P   s    r   )StreamBackedCorpusViewabspathr   r   )r   r   r   )r   r$   r   r   	instancesF   s   

zNombankCorpusReader.instancesc                 C      t | | jt| | jdS )z
        :return: a corpus view that acts as a list of strings, one for
            each line in the predicate-argument annotation file.
        r%   )r&   r'   r   read_line_blockr   r   r   r   r   linesT   
   

zNombankCorpusReader.linesc                 C   s   | dd }|dd}|dddd}d| }||  vr'td	| | | }t| }W d
   n1 s@w   Y  |	dD ]}|j
d |krW|  S qJtd| d| )zE
        :return: the xml description for the given roleset.
        .r   	perc-sign%oneslashonezero1/10
1-slash-10frames/%s.xmlFrameset file for %s not foundNpredicate/rolesetidzRoleset z not found in )splitreplacefileids
ValueErrorr'   openr   parsegetrootfindallZattrib)r   Z
roleset_idr   	framefilefpetreerolesetr   r   r   rC   _   s    zNombankCorpusReader.rolesetc              	   C   s   |durd| }||   vrtd| |g}n|   }g }|D ](}| | }t| }W d   n1 s;w   Y  ||d q t	|S )zA
        :return: list of xml descriptions for rolesets.
        Nr4   r5   r6   )
r:   r;   r'   r<   r   r=   r>   appendr?   ZLazyConcatenation)r   r   r@   r   ZrsetsrA   rB   r   r   r   rolesetsu   s   zNombankCorpusReader.rolesetsc                 C   r)   )z
        :return: a corpus view that acts as a list of all noun lemmas
            in this corpus (from the nombank.1.0.words file).
        r%   )r&   r'   r   r*   r   r+   r   r   r   nouns   r-   zNombankCorpusReader.nounsc                 C   s   dS )NTr   r   r   r   r   r       s    zNombankCorpusReader.<lambda>c                 C   sJ   g }t dD ]}|  }|r"t|| j| j}||r"|| q|S )Nd   )rangereadlinestripNombankInstancer=   r   r   rD   )r   r#   r!   blockiliner   r   r   r   r"      s   

z(NombankCorpusReader._read_instance_block)r   NNNr	   r   )__name__
__module____qualname____doc__r   r(   r,   rC   rE   rF   r"   r   r   r   r   r      s    

&
r   c                   @   sV   e Zd Z	dddZedd Zdd Zdd	 Zd
d ZeeddZ	e
dddZdS )rK   Nc
           
      C   sN   || _ 	 || _	 || _	 || _	 || _	 || _	 || _	 t|| _	 |	| _	d S r   )
fileidsentnumwordnumr   sensenumber	predicatepredidtuple	argumentsr   )
r   rS   rT   rU   r   rV   rW   rX   rZ   r   r   r   r   r      s$   
zNombankInstance.__init__c                 C   s2   | j dd}|dddd}| d| j S )zThe name of the roleset used by this instance's predicate.
        Use ``nombank.roleset() <NombankCorpusReader.roleset>`` to
        look up information about the roleset.r0   r/   r2   r3   r1   r.   )r   r9   rV   )r   rr   r   r   rC      s   zNombankInstance.rolesetc                 C   s   d | j| j| jS )Nz'<NombankInstance: {}, sent {}, word {}>)formatrS   rT   rU   r+   r   r   r   __repr__   s
   zNombankInstance.__repr__c                 C   sX   d | j| j| j| j| j}| j| jdff }t|D ]\}}|d| d| 7 }q|S )Nz{} {} {} {} {}rel -)	r\   rS   rT   rU   r   rV   rZ   rW   sorted)r   sitemsarglocargidr   r   r   __str__   s   zNombankInstance.__str__c                 C   s6   | j d u rd S | j| j  vrd S | j | j| j S r   )r   rS   r:   Zparsed_sentsrT   r+   r   r   r   	_get_tree   s
   
zNombankInstance._get_treezs
        The parse tree corresponding to this instance, or None if
        the corresponding tree is not available.)docc              
      s   |   }t|dk rtd|  |d d \}}}}}|dd    fddt D }	t|	dkr8td|  |d ur@||}t|}t|}|	d  dd\}
}t|
}g } D ]}| dd\}}|t||f q[t|||||||||	S )	N   z Badly formatted nombank line: %r   c                    s"   g | ]\}}d |v r  |qS )z-rel)pop).0rM   pargsr   r   
<listcomp>  s   " z)NombankInstance.parse.<locals>.<listcomp>   r   r`   )	r8   lenr;   	enumerateintNombankTreePointerr=   rD   rK   )rb   r   r   piecesrS   rT   rU   r   rV   r^   ZpredlocrX   rW   rZ   argrd   re   r   rn   r   r=     s:   
zNombankInstance.parser   )NN)rO   rP   rQ   r   propertyrC   r]   rf   rg   treestaticmethodr=   r   r   r   r   rK      s    
1
rK   c                   @   s   e Zd ZdZdd ZdS )NombankPointeran  
    A pointer used by nombank to identify one or more constituents in
    a parse tree.  ``NombankPointer`` is an abstract base class with
    three concrete subclasses:

    - ``NombankTreePointer`` is used to point to single constituents.
    - ``NombankSplitTreePointer`` is used to point to 'split'
      constituents, which consist of a sequence of two or more
      ``NombankTreePointer`` pointers.
    - ``NombankChainTreePointer`` is used to point to entire trace
      chains in a tree.  It consists of a sequence of pieces, which
      can be ``NombankTreePointer`` or ``NombankSplitTreePointer`` pointers.
    c                 C   s   | j tkrt d S r   )	__class__r{   NotImplementedErrorr+   r   r   r   r   E  s   
zNombankPointer.__init__N)rO   rP   rQ   rR   r   r   r   r   r   r{   6  s    r{   c                   @   ,   e Zd Zdd Zdd Zdd Zdd Zd	S )
NombankChainTreePointerc                 C   
   || _ d S r   rv   r   rv   r   r   r   r   K     z NombankChainTreePointer.__init__c                 C      d dd | jD S )Nr   c                 s       | ]}d | V  qdS z%sNr   rl   rm   r   r   r   	<genexpr>R      z2NombankChainTreePointer.__str__.<locals>.<genexpr>joinrv   r+   r   r   r   rf   Q     zNombankChainTreePointer.__str__c                 C      d|  S )Nz<NombankChainTreePointer: %s>r   r+   r   r   r   r]   T     z NombankChainTreePointer.__repr__c                    *    d u rt dtd fdd| jD S )NParse tree not availablez*CHAIN*c                       g | ]}|  qS r   selectr   ry   r   r   rp   Z      z2NombankChainTreePointer.select.<locals>.<listcomp>r;   r   rv   r   ry   r   r   r   r   W     zNombankChainTreePointer.selectNrO   rP   rQ   r   rf   r]   r   r   r   r   r   r   J  s
    r   c                   @   r~   )
NombankSplitTreePointerc                 C   r   r   r   r   r   r   r   r   ^  r   z NombankSplitTreePointer.__init__c                 C   r   )N,c                 s   r   r   r   r   r   r   r   r   d  r   z2NombankSplitTreePointer.__str__.<locals>.<genexpr>r   r+   r   r   r   rf   c  r   zNombankSplitTreePointer.__str__c                 C   r   )Nz<NombankSplitTreePointer: %s>r   r+   r   r   r   r]   f  r   z NombankSplitTreePointer.__repr__c                    r   )Nr   z*SPLIT*c                    r   r   r   r   r   r   r   rp   l  r   z2NombankSplitTreePointer.select.<locals>.<listcomp>r   r   r   r   r   r   i  r   zNombankSplitTreePointer.selectNr   r   r   r   r   r   ]  s
    r   c                   @   s\   e Zd ZdZdd Zedd Zdd Zdd	 Zd
d Z	dd Z
dd Zdd Zdd ZdS )ru   z@
    wordnum:height*wordnum:height*...
    wordnum:height,

    c                 C   s   || _ || _d S r   rU   height)r   rU   r   r   r   r   r   w  s   
zNombankTreePointer.__init__c                 C   s   |  d}t|dkrtdd |D S |  d}t|dkr(tdd |D S |  d}t|dkr9td	|  tt|d
 t|d S )Nr   rq   c                 S      g | ]}t |qS r   ru   r=   rl   eltr   r   r   rp     r   z,NombankTreePointer.parse.<locals>.<listcomp>r   c                 S   r   r   r   r   r   r   r   rp     r   :   zbad nombank pointer %rr   )r8   rr   r   r   r;   ru   rt   )rb   rv   r   r   r   r=   {  s   


zNombankTreePointer.parsec                 C   s   | j  d| j S )Nr   r   r+   r   r   r   rf     s   zNombankTreePointer.__str__c                 C   s   d| j | jf S )NzNombankTreePointer(%d, %d)r   r+   r   r   r   r]     s   zNombankTreePointer.__repr__c                 C   sP   t |ttfr|jd }t |ttfst |ts| |u S | j|jko'| j|jkS Nr   )r
   r   r   rv   ru   rU   r   r   otherr   r   r   __eq__  s   

zNombankTreePointer.__eq__c                 C   s
   | |k S r   r   r   r   r   r   __ne__  s   
zNombankTreePointer.__ne__c                 C   s\   t |ttfr|jd }t |ttfst |ts t| t|k S | j| j f|j|j fk S r   )r
   r   r   rv   ru   r7   rU   r   r   r   r   r   __lt__  s   

zNombankTreePointer.__lt__c                 C   s   |d u rt d|| | S )Nr   )r;   treeposr   r   r   r   r     s   zNombankTreePointer.selectc                 C   s   |du rt d|g}g }d}	 t|d trLt|t|k r%|d n|d  d7  < |d t|d k rC||d |d   n%|  |  n|| jkr`t|dt|| j d  S |d7 }|  q)z}
        Convert this pointer to a standard 'tree position' pointer,
        given that it points to the given tree.
        Nr   r   Trq   )	r;   r
   r   rr   rD   rk   rU   rY   r   )r   ry   stackr   rU   r   r   r   r     s&   

zNombankTreePointer.treeposN)rO   rP   rQ   rR   r   rz   r=   rf   r]   r   r   r   r   r   r   r   r   r   ru   o  s    
		ru   N)	functoolsr   Z	xml.etreer   Znltk.corpus.reader.apiZnltk.corpus.reader.utilZnltk.internalsr   Z	nltk.treer   r   r   rK   r{   r   r   ru   r   r   r   r   <module>   s     