o
    rZheC                     @   s   d dl Z d dlmZ d dlmZ d dlT d dlT d dlmZ d dl	m
Z
 G dd deZG d	d
 d
ZG dd dZG dd deZG dd deZeG dd deZG dd dZdS )    N)total_ordering)ElementTree)*)raise_unorderable_types)Treec                   @   s`   e Zd ZdZ					dddZdddZd	d
 Zdd ZdddZdd Z	dd fddZ
dS )PropbankCorpusReadera  
    Corpus reader for the propbank corpus, which augments the Penn
    Treebank with information about the predicate argument structure
    of every verb instance.  The corpus consists of two parts: the
    predicate-argument annotations themselves, and a set of "frameset
    files" which define the argument labels used by the annotations,
    on a per-verb basis.  Each "frameset file" contains one or more
    predicates, such as ``'turn'`` or ``'turn_on'``, each of which is
    divided into coarse-grained word senses called "rolesets".  For
    each "roleset", the frameset file provides descriptions of the
    argument roles, along with examples.
     Nutf8c                 C   sV   t |tr
t||}t|}t| |||g| | || _|| _|| _|| _	|| _
dS )a  
        :param root: The root directory for this corpus.
        :param propfile: The name of the file containing the predicate-
            argument annotations (relative to ``root``).
        :param framefiles: A list or regexp specifying the frameset
            fileids for this corpus.
        :param parse_fileid_xform: A transform that should be applied
            to the fileids in this corpus.  This should be a function
            of one argument (a fileid) that returns a string (the new
            fileid).
        :param parse_corpus: The corpus containing the parse trees
            corresponding to this corpus.  These parse trees are
            necessary to resolve the tree pointers used by propbank.
        N)
isinstancestrZfind_corpus_fileidslistCorpusReader__init__	_propfile_framefiles
_verbsfile_parse_fileid_xform_parse_corpus)selfrootZpropfile
framefilesZ	verbsfileparse_fileid_xformparse_corpusencoding r   J/var/www/auris/lib/python3.10/site-packages/nltk/corpus/reader/propbank.pyr       s   


zPropbankCorpusReader.__init__c                    sD   i  dur fddd< t jfddjdS )z
        :return: a corpus view that acts as a list of
            ``PropBankInstance`` objects, one for each noun in the corpus.
        Nc                    s
   | j  kS Nbaseforminstr   r   r   <lambda>M   s   
 z0PropbankCorpusReader.instances.<locals>.<lambda>instance_filterc                    s   j | fi  S r   )_read_instance_block)stream)kwargsr   r   r   r!   P   s    r   )StreamBackedCorpusViewabspathr   r   )r   r   r   )r   r%   r   r   	instancesF   s   

zPropbankCorpusReader.instancesc                 C      t | | jt| | jdS )z
        :return: a corpus view that acts as a list of strings, one for
            each line in the predicate-argument annotation file.
        r&   )r'   r(   r   read_line_blockr   r   r   r   r   linesT   
   

zPropbankCorpusReader.linesc                 C   s   | dd }d| }|| jvrtd| | | }t| }W d   n1 s/w   Y  |dD ]}|j	d |krF|  S q9td| d	| )
zE
        :return: the xml description for the given roleset.
        .r   frames/%s.xmlFrameset file for %s not foundNpredicate/rolesetidzRoleset z not found in )
splitr   
ValueErrorr(   openr   parsegetrootfindallZattrib)r   Z
roleset_idr   	framefilefpetreerolesetr   r   r   r=   _   s   
zPropbankCorpusReader.rolesetc              	   C   s   |durd| }|| j vrtd| |g}n| j }g }|D ](}| | }t| }W d   n1 s9w   Y  ||d qt	|S )zA
        :return: list of xml descriptions for rolesets.
        Nr0   r1   r2   )
r   r5   r(   r6   r   r7   r8   appendr9   ZLazyConcatenation)r   r   r:   r   Zrsetsr;   r<   r   r   r   rolesetsq   s   
zPropbankCorpusReader.rolesetsc                 C   r*   )z
        :return: a corpus view that acts as a list of all verb lemmas
            in this corpus (from the verbs.txt file).
        r&   )r'   r(   r   r+   r   r,   r   r   r   verbs   r.   zPropbankCorpusReader.verbsc                 C      dS )NTr   r   r   r   r   r!      s    zPropbankCorpusReader.<lambda>c                 C   sJ   g }t dD ]}|  }|r"t|| j| j}||r"|| q|S )Nd   )rangereadlinestripPropbankInstancer7   r   r   r>   )r   r$   r"   blockiliner    r   r   r   r#      s   

z)PropbankCorpusReader._read_instance_block)r   NNNr	   r   )__name__
__module____qualname____doc__r   r)   r-   r=   r?   r@   r#   r   r   r   r   r      s    

&
r   c                   @   sn   e Zd Z	dddZedd Zedd Zedd	 Zd
d Zdd Z	dd Z
ee
ddZedddZdS )rF   Nc
           
      C   sN   || _ 	 || _	 || _	 || _	 || _	 || _	 || _	 t|| _	 |	| _	d S r   )
fileidsentnumwordnumtaggerr=   
inflection	predicatetuple	argumentsr   )
r   rN   rO   rP   rQ   r=   rR   rS   rU   r   r   r   r   r      s$   
zPropbankInstance.__init__c                 C      | j dd S )zThe baseform of the predicate.r/   r   r=   r4   r,   r   r   r   r         zPropbankInstance.baseformc                 C   rV   )z"The sense number of the predicate.r/      rW   r,   r   r   r   sensenumber   rX   zPropbankInstance.sensenumberc                 C   rA   )zIdentifier of the predicate.relr   r,   r   r   r   predid   s   zPropbankInstance.predidc                 C   s   d | j| j| jS )Nz(<PropbankInstance: {}, sent {}, word {}>)formatrN   rO   rP   r,   r   r   r   __repr__   s
   zPropbankInstance.__repr__c                 C   s\   d | j| j| j| j| j| j}| j| jdff }t	|D ]\}}|d| d| 7 }q|S )Nz{} {} {} {} {} {}r[    -)
r]   rN   rO   rP   rQ   r=   rR   rU   rS   sorted)r   sitemsarglocargidr   r   r   __str__   s   zPropbankInstance.__str__c                 C   s6   | j d u rd S | j| j  vrd S | j | j| j S r   )r   rN   ZfileidsZparsed_sentsrO   r,   r   r   r   	_get_tree   s
   
zPropbankInstance._get_treezs
        The parse tree corresponding to this instance, or None if
        the corresponding tree is not available.)docc              
   C   s  |   }t|dk rtd|  |d d \}}}}}}	dd |dd  D }
dd |dd  D }t|
dkr>td|  |d urF||}t|}t|}t|	}	t|
d d d	 }g }|D ]}| d
d\}}|t||f qbt||||||	|||	S )N   z!Badly formatted propbank line: %r   c                 S   s   g | ]	}| d r|qS z-relendswith.0pr   r   r   
<listcomp>      z*PropbankInstance.parse.<locals>.<listcomp>c                 S   s   g | ]	}| d s|qS rk   rl   rn   r   r   r   rq     rr   rY   r   r`   )	r4   lenr5   intPropbankInflectionr7   PropbankTreePointerr>   rF   )rb   r   r   piecesrN   rO   rP   rQ   r=   rR   r[   argsrS   rU   argrd   re   r   r   r   r7     s:   
zPropbankInstance.parser   )NN)rJ   rK   rL   r   propertyr   rZ   r\   r^   rf   rg   treestaticmethodr7   r   r   r   r   rF      s$    
5


rF   c                   @   s   e Zd ZdZdd ZdS )PropbankPointera  
    A pointer used by propbank to identify one or more constituents in
    a parse tree.  ``PropbankPointer`` is an abstract base class with
    three concrete subclasses:

      - ``PropbankTreePointer`` is used to point to single constituents.
      - ``PropbankSplitTreePointer`` is used to point to 'split'
        constituents, which consist of a sequence of two or more
        ``PropbankTreePointer`` pointers.
      - ``PropbankChainTreePointer`` is used to point to entire trace
        chains in a tree.  It consists of a sequence of pieces, which
        can be ``PropbankTreePointer`` or ``PropbankSplitTreePointer`` pointers.
    c                 C   s   | j tkrt d S r   )	__class__r~   NotImplementedErrorr,   r   r   r   r   L  s   
zPropbankPointer.__init__N)rJ   rK   rL   rM   r   r   r   r   r   r~   =  s    r~   c                   @   ,   e Zd Zdd Zdd Zdd Zdd Zd	S )
PropbankChainTreePointerc                 C   
   || _ d S r   rx   r   rx   r   r   r   r   R     z!PropbankChainTreePointer.__init__c                 C      d dd | jD S )Nr   c                 s       | ]}d | V  qdS z%sNr   rn   r   r   r   	<genexpr>Y      z3PropbankChainTreePointer.__str__.<locals>.<genexpr>joinrx   r,   r   r   r   rf   X     z PropbankChainTreePointer.__str__c                 C      d|  S )Nz<PropbankChainTreePointer: %s>r   r,   r   r   r   r^   [     z!PropbankChainTreePointer.__repr__c                    *    d u rt dtd fdd| jD S )NParse tree not availablez*CHAIN*c                       g | ]}|  qS r   selectrn   r|   r   r   rq   a      z3PropbankChainTreePointer.select.<locals>.<listcomp>r5   r   rx   r   r|   r   r   r   r   ^     zPropbankChainTreePointer.selectNrJ   rK   rL   r   rf   r^   r   r   r   r   r   r   Q  s
    r   c                   @   r   )
PropbankSplitTreePointerc                 C   r   r   r   r   r   r   r   r   e  r   z!PropbankSplitTreePointer.__init__c                 C   r   )N,c                 s   r   r   r   rn   r   r   r   r   k  r   z3PropbankSplitTreePointer.__str__.<locals>.<genexpr>r   r,   r   r   r   rf   j  r   z PropbankSplitTreePointer.__str__c                 C   r   )Nz<PropbankSplitTreePointer: %s>r   r,   r   r   r   r^   m  r   z!PropbankSplitTreePointer.__repr__c                    r   )Nr   z*SPLIT*c                    r   r   r   rn   r   r   r   rq   s  r   z3PropbankSplitTreePointer.select.<locals>.<listcomp>r   r   r   r   r   r   p  r   zPropbankSplitTreePointer.selectNr   r   r   r   r   r   d  s
    r   c                   @   s\   e Zd ZdZdd Zedd Zdd Zdd	 Zd
d Z	dd Z
dd Zdd Zdd ZdS )rw   z@
    wordnum:height*wordnum:height*...
    wordnum:height,

    c                 C   s   || _ || _d S r   rP   height)r   rP   r   r   r   r   r   ~  s   
zPropbankTreePointer.__init__c                 C   s   |  d}t|dkrtdd |D S |  d}t|dkr(tdd |D S |  d}t|dkr9td	|  tt|d
 t|d S )Nr   rY   c                 S      g | ]}t |qS r   rw   r7   ro   eltr   r   r   rq     r   z-PropbankTreePointer.parse.<locals>.<listcomp>r   c                 S   r   r   r   r   r   r   r   rq     r   :   zbad propbank pointer %rr   )r4   rt   r   r   r5   rw   ru   )rb   rx   r   r   r   r7     s   


zPropbankTreePointer.parsec                 C   s   | j  d| j S )Nr   r   r,   r   r   r   rf     s   zPropbankTreePointer.__str__c                 C   s   d| j | jf S )NzPropbankTreePointer(%d, %d)r   r,   r   r   r   r^     s   zPropbankTreePointer.__repr__c                 C   sP   t |ttfr|jd }t |ttfst |ts| |u S | j|jko'| j|jkS Nr   )r
   r   r   rx   rw   rP   r   r   otherr   r   r   __eq__  s   

zPropbankTreePointer.__eq__c                 C   s
   | |k S r   r   r   r   r   r   __ne__  s   
zPropbankTreePointer.__ne__c                 C   s\   t |ttfr|jd }t |ttfst |ts t| t|k S | j| j f|j|j fk S r   )r
   r   r   rx   rw   r3   rP   r   r   r   r   r   __lt__  s   

zPropbankTreePointer.__lt__c                 C   s   |d u rt d|| | S )Nr   )r5   treeposr   r   r   r   r     s   zPropbankTreePointer.selectc                 C   s   |du rt d|g}g }d}	 t|d trLt|t|k r%|d n|d  d7  < |d t|d k rC||d |d   n%|  |  n|| jkr`t|dt|| j d  S |d7 }|  q)z}
        Convert this pointer to a standard 'tree position' pointer,
        given that it points to the given tree.
        Nr   r   TrY   )	r5   r
   r   rt   r>   poprP   rT   r   )r   r|   stackr   rP   r   r   r   r     s&   

zPropbankTreePointer.treeposN)rJ   rK   rL   rM   r   r}   r7   rf   r^   r   r   r   r   r   r   r   r   r   rw   v  s    
		rw   c                   @   st   e Zd ZdZdZdZdZdZdZdZ	dZ
dZdZd	Zd
ZdZdZdddZdd Zdd ZedZedd ZdS )rv   rH   grp   vfnob3ar`   c                 C   s"   || _ || _|| _|| _|| _d S r   formtenseaspectpersonvoice)r   r   r   r   r   r   r   r   r   r     s
   
zPropbankInflection.__init__c                 C   s   | j | j | j | j | j S r   r   r,   r   r   r   rf     s   zPropbankInflection.__str__c                 C   r   )Nz<PropbankInflection: %s>r   r,   r   r   r   r^     r   zPropbankInflection.__repr__z"[igpv\-][fpn\-][pob\-][3\-][ap\-]$c                 C   s>   t | ts	tdt| dkstj| std|  t|  S )Nzexpected a string   z!Bad propbank inflection string %r)r
   r   	TypeErrorrt   rv   	_VALIDATEmatchr5   )rb   r   r   r   r7     s
   
zPropbankInflection.parseN)r`   r`   r`   r`   r`   )rJ   rK   rL   Z
INFINITIVEZGERUNDZ
PARTICIPLEZFINITEZFUTUREZPASTZPRESENTZPERFECTZPROGRESSIVEZPERFECT_AND_PROGRESSIVEZTHIRD_PERSONZACTIVEZPASSIVENONEr   rf   r^   recompiler   r}   r7   r   r   r   r   rv     s*    

rv   )r   	functoolsr   Z	xml.etreer   Znltk.corpus.reader.apiZnltk.corpus.reader.utilZnltk.internalsr   Z	nltk.treer   r   r   rF   r~   r   r   rw   rv   r   r   r   r   <module>   s"     d