o
    rZh`                     @   s@   d Z ddlZddlZddlmZ ddlmZ G dd deZdS )z
An NLTK interface to the VerbNet verb lexicon

For details about VerbNet see:
https://verbs.colorado.edu/~mpalmer/projects/verbnet.html
    N)defaultdict)XMLCorpusReaderc                   @   s0  e Zd ZdZd@ddZedZ	 edZ	 edZ		 dAd	d
Z
dAddZdBddZdd ZdAddZdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, ZdCd.d/ZdCd0d1ZdCd2d3ZdCd4d5ZdCd6d7Z dCd8d9Z!dCd:d;Z"dCd<d=Z#dCd>d?Z$dS )DVerbnetCorpusReadera  
    An NLTK interface to the VerbNet verb lexicon.

    From the VerbNet site: "VerbNet (VN) (Kipper-Schuler 2006) is the largest
    on-line verb lexicon currently available for English. It is a hierarchical
    domain-independent, broad-coverage verb lexicon with mappings to other
    lexical resources such as WordNet (Miller, 1990; Fellbaum, 1998), XTAG
    (XTAG Research Group, 2001), and FrameNet (Baker et al., 1998)."

    For details about VerbNet see:
    https://verbs.colorado.edu/~mpalmer/projects/verbnet.html
    Fc                 C   sB   t | ||| tt| _	 tt| _	 i | _	 i | _|   d S N)	r   __init__r   list_lemma_to_class_wordnet_to_class_class_to_fileid_shortid_to_longid_quick_index)selfrootfileidsZ
wrap_etree r   I/var/www/auris/lib/python3.10/site-packages/nltk/corpus/reader/verbnet.pyr   %   s   

zVerbnetCorpusReader.__init__z([^\-\.]*)-([\d+.\-]+)$z
[\d+.\-]+$zH<MEMBER name="\??([^"]+)" wn="([^"]*)"[^>]+>|<VNSUBCLASS ID="([^"]+)"/?>Nc                 C   s>   |du rt | j S t|tr| |}dd |dD S )zx
        Return a list of all verb lemmas that appear in any class, or
        in the ``classid`` if specified.
        Nc                 S      g | ]}| d qS )nameget.0memberr   r   r   
<listcomp>S   s    z.VerbnetCorpusReader.lemmas.<locals>.<listcomp>MEMBERS/MEMBER)sortedr   keys
isinstancestrvnclassfindallr   r   r   r   r   lemmasH   s
   

zVerbnetCorpusReader.lemmasc                 C   sD   |du rt | j S t|tr| |}tdd |dD g S )z|
        Return a list of all wordnet identifiers that appear in any
        class, or in ``classid`` if specified.
        Nc                 s   s     | ]}| d d V  qdS )wn N)r   splitr   r   r   r   	<genexpr>a   s
    
z1VerbnetCorpusReader.wordnetids.<locals>.<genexpr>r   )r   r	   r   r   r   r   sumr    r!   r   r   r   
wordnetidsU   s   

zVerbnetCorpusReader.wordnetidsc                    sx    dur fdd| j  D S |dur| j| S |dur"| j| S |dur5| |}dd |dD S t| j  S )a  
        Return a list of the VerbNet class identifiers.  If a file
        identifier is specified, then return only the VerbNet class
        identifiers for classes (and subclasses) defined by that file.
        If a lemma is specified, then return only VerbNet class
        identifiers for classes that contain that lemma as a member.
        If a wordnetid is specified, then return only identifiers for
        classes that contain that wordnetid as a member.  If a classid
        is specified, then return only identifiers for subclasses of
        the specified VerbNet class.
        If nothing is specified, return all classids within VerbNet
        Nc                    s   g | ]
\}}| kr|qS r   r   )r   cffileidr   r   r   v   s    z0VerbnetCorpusReader.classids.<locals>.<listcomp>c                 S   r   IDr   r   subclassr   r   r   r   }   s    SUBCLASSES/VNSUBCLASS)r
   itemsr   r	   r   r    r   r   )r   ZlemmaZ	wordnetidr,   classidxmltreer   r+   r   classidsh   s   


zVerbnetCorpusReader.classidsc                 C   s   || j v r
| |S | |}|| jv r?| j| | }| |}||dkr*|S |dD ]}||dkr<|  S q/J td| )a  Returns VerbNet class ElementTree

        Return an ElementTree containing the xml for the specified
        VerbNet class.

        :param fileid_or_classid: An identifier specifying which class
            should be returned.  Can be a file identifier (such as
            ``'put-9.1.xml'``), or a VerbNet class identifier (such as
            ``'put-9.1'``) or a short VerbNet class identifier (such as
            ``'9.1'``).
        r.   z.//VNSUBCLASSFzUnknown identifier )_fileidsxmllongidr
   r   r    
ValueError)r   Zfileid_or_classidr3   r,   treer0   r   r   r   r      s   




zVerbnetCorpusReader.vnclassc                    s<   |du r j S t|tr j | gS  fdd|D S )z
        Return a list of fileids that make up this corpus.  If
        ``vnclass_ids`` is specified, then return the fileids that make
        up the specified VerbNet class(es).
        Nc                    s   g | ]
} j  | qS r   )r
   r8   )r   Z
vnclass_idr   r   r   r          z/VerbnetCorpusReader.fileids.<locals>.<listcomp>)r6   r   r   r
   r8   )r   Zvnclass_idsr   r;   r   r      s   

zVerbnetCorpusReader.fileidsc              	   C   s\   t |tr
| |}g }|d}|D ]}|| || || || |d q|S )ap  Given a VerbNet class, this method returns VerbNet frames

        The members returned are:
        1) Example
        2) Description
        3) Syntax
        4) Semantics

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        :return: frames - a list of frame dictionaries
        zFRAMES/FRAME)exampledescriptionsyntax	semantics)	r   r   r   r    append_get_example_within_frame_get_description_within_frame _get_syntactic_list_within_frame_get_semantics_within_frame)r   r   framesZvnframesvnframer   r   r   rF      s   


zVerbnetCorpusReader.framesc                 C   s,   t |tr
| |}dd |dD }|S )aA  Returns subclass ids, if any exist

        Given a VerbNet class, this method returns subclass ids (if they exist)
        in a list of strings.

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        :return: list of subclasses
        c                 S   r   r-   r   r/   r   r   r   r      s    
z2VerbnetCorpusReader.subclasses.<locals>.<listcomp>r1   )r   r   r   r    )r   r   
subclassesr   r   r   rH      s   


zVerbnetCorpusReader.subclassesc                 C   sR   t |tr
| |}g }|dD ]}||ddd |dD d q|S )ab  Returns thematic roles participating in a VerbNet class

        Members returned as part of roles are-
        1) Type
        2) Modifiers

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        :return: themroles: A list of thematic roles in the VerbNet class
        zTHEMROLES/THEMROLEtypec                 S   "   g | ]}| d | ddqS ValuerI   valuerI   r   r   restrr   r   r   r          z1VerbnetCorpusReader.themroles.<locals>.<listcomp>SELRESTRS/SELRESTR)rI   	modifiers)r   r   r   r    rA   r   )r   r   	themrolesZtroler   r   r   rT      s   

	zVerbnetCorpusReader.themrolesc                 C   s"   | j D ]}| | || qdS )aC  
        Initialize the indexes ``_lemma_to_class``,
        ``_wordnet_to_class``, and ``_class_to_fileid`` by scanning
        through the corpus fileids.  This is fast if ElementTree
        uses the C implementation (<0.1 secs), but quite slow (>10 secs)
        if only the python implementation is available.
        N)r6   _index_helperr7   )r   r,   r   r   r   _index  s   
zVerbnetCorpusReader._indexc                 C   s   | d}|| j|< || j| |< |dD ] }| j| d | | dd D ]
}| j| | q,q|dD ]}| 	|| q=dS )zHelper for ``_index()``r.   r   r   r#   r$   r1   N)
r   r
   r   shortidr    r   rA   r%   r	   rU   )r   r4   r,   r   r   r#   r0   r   r   r   rU     s   

z!VerbnetCorpusReader._index_helperc              	   C   s   | j D ]x}|dd }|| j|< || j| |< | |V}| j| D ]E}| }|d durN| j	|d  
| |d  D ]
}| j| 
| qBq&|d durh|| j|d < |d }|| j| |< q&J dW d   n1 svw   Y  qdS )a  
        Initialize the indexes ``_lemma_to_class``,
        ``_wordnet_to_class``, and ``_class_to_fileid`` by scanning
        through the corpus fileids.  This doesn't do proper xml parsing,
        but is good enough to find everything in the standard VerbNet
        corpus -- and it runs about 30 times faster than xml parsing
        (with the python ElementTree; only 2-3 times faster
        if ElementTree uses the C implementation).
        Nr         Fzunexpected match condition)r6   r
   r   rW   open	_INDEX_REfinditerreadgroupsr   rA   r%   r	   )r   r,   r   fpmr_   r#   r   r   r   r     s*   

z VerbnetCorpusReader._quick_indexc              
   C   s\   | j |r|S | j|std| z| j| W S  ty- } ztd| |d}~ww )zReturns longid of a VerbNet class

        Given a short VerbNet class identifier (eg '37.10'), map it
        to a long id (eg 'confess-37.10').  If ``shortid`` is already a
        long id, then return it as-isvnclass identifier %r not foundN)
_LONGID_REmatch_SHORTID_REr9   r   KeyError)r   rW   er   r   r   r8   ;  s   zVerbnetCorpusReader.longidc                 C   s6   | j |r|S | j|}|r|dS td| )zReturns shortid of a VerbNet class

        Given a long VerbNet class identifier (eg 'confess-37.10'),
        map it to a short id (eg '37.10').  If ``longid`` is already a
        short id, then return it as-is.rZ   rb   )re   rd   rc   groupr9   )r   r8   ra   r   r   r   rW   J  s   
zVerbnetCorpusReader.shortidc                 C   sN   g }| dD ]}dd | dD }||d||ddkd q|S )	an  Returns semantics within a single frame

        A utility function to retrieve semantics within a frame in VerbNet
        Members of the semantics dictionary:
        1) Predicate value
        2) Arguments

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        :return: semantics: semantics dictionary
        zSEMANTICS/PREDc                 S   rJ   )rI   rN   )rI   rN   r   )r   argr   r   r   r   j  rQ   zCVerbnetCorpusReader._get_semantics_within_frame.<locals>.<listcomp>zARGS/ARGrN   bool!)predicate_value	argumentsnegated)r    rA   r   )r   rG   Zsemantics_within_single_framepredrm   r   r   r   rE   \  s   z/VerbnetCorpusReader._get_semantics_within_framec                 C   s$   | d}|dur|j}|S d}|S )a'  Returns example within a frame

        A utility function to retrieve an example within a frame in VerbNet.

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        :return: example_text: The example sentence for this particular frame
        zEXAMPLES/EXAMPLENr$   )findtext)r   rG   Zexample_elementZexample_textr   r   r   rB   w  s   
	z-VerbnetCorpusReader._get_example_within_framec                 C   s"   | d}|jd |dddS )ab  Returns member description within frame

        A utility function to retrieve a description of participating members
        within a frame in VerbNet.

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        :return: description: a description dictionary with members - primary and secondary
        ZDESCRIPTIONprimary	secondaryr$   )rr   rs   )rp   attribr   )r   rG   Zdescription_elementr   r   r   rC     s   


z1VerbnetCorpusReader._get_description_within_framec                 C   s   g }| dD ]6}|j}t }d|jv r|dnd|d< dd |dD |d< dd |d	D |d
< |||d q|S )a[  Returns semantics within a frame

        A utility function to retrieve semantics within a frame in VerbNet.
        Members of the syntactic dictionary:
        1) POS Tag
        2) Modifiers

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        :return: syntax_within_single_frame
        ZSYNTAXrN   r$   c                 S   rJ   rK   r   rO   r   r   r   r     rQ   zHVerbnetCorpusReader._get_syntactic_list_within_frame.<locals>.<listcomp>rR   	selrestrsc                 S   rJ   rK   r   rO   r   r   r   r     rQ   zSYNRESTRS/SYNRESTR	synrestrs)pos_tagrS   )rp   tagdictrt   r   r    rA   )r   rG   Zsyntax_within_single_frameeltrw   rS   r   r   r   rD     s   

z4VerbnetCorpusReader._get_syntactic_list_within_framec                 C   s   t |tr
| |}|dd }|| j|ddd 7 }|| j|ddd 7 }|d7 }|| j|ddd 7 }|d7 }|| j|dd7 }|S )a%  Returns pretty printed version of a VerbNet class

        Return a string containing a pretty-printed representation of
        the given VerbNet class.

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        r.   
  indentz  Thematic roles:
    z
  Frames:
)r   r   r   r   pprint_subclassespprint_memberspprint_themrolespprint_frames)r   r   sr   r   r   pprint  s   
	
zVerbnetCorpusReader.pprintr$   c                 C   L   t |tr
| |}| |}|sdg}dd| }tj|d||d dS )a>  Returns pretty printed version of subclasses of VerbNet class

        Return a string containing a pretty-printed representation of
        the given VerbNet class's subclasses.

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        (none)zSubclasses:  F   r|   initial_indentsubsequent_indent)r   r   r   rH   jointextwrapfill)r   r   r~   rH   r   r   r   r   r        
	

z%VerbnetCorpusReader.pprint_subclassesc                 C   r   )a?  Returns pretty printed version of members in a VerbNet class

        Return a string containing a pretty-printed representation of
        the given VerbNet class's member verbs.

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        r   z	Members: r   r   r|   r   )r   r   r   r"   r   r   r   )r   r   r~   membersr   r   r   r   r     r   z"VerbnetCorpusReader.pprint_membersc                 C   sx   t |tr
| |}g }| |D ]%}|d |d }dd |d D }|r1|dd|7 }|| qd|S )	aH  Returns pretty printed version of thematic roles in a VerbNet class

        Return a string containing a pretty-printed representation of
        the given VerbNet class's thematic roles.

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        * rI   c                 S   s   g | ]
}|d  |d  qS rM   r   )r   modifierr   r   r   r     r<   z8VerbnetCorpusReader.pprint_themroles.<locals>.<listcomp>rS   [{}]r   r{   )r   r   r   rT   r   formatr   rA   )r   r   r~   piecesZthemrolepiecerS   r   r   r   r     s   
	

z$VerbnetCorpusReader.pprint_themrolesc                 C   sD   t |tr
| |}g }| |D ]}|| || qd|S )a?  Returns pretty version of all frames in a VerbNet class

        Return a string containing a pretty-printed representation of
        the list of frames within the VerbNet class.

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        r{   )r   r   r   rF   rA   _pprint_single_framer   )r   r   r~   r   rG   r   r   r   r     s   
	

z!VerbnetCorpusReader.pprint_framesc                 C   sd   |  ||d }|| ||d d 7 }|| ||d d 7 }||d 7 }|| ||d 7 }|S )a  Returns pretty printed version of a single frame in a VerbNet class

        Returns a string containing a pretty-printed representation of
        the given frame.

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        r{   r   z
  Syntax: z  Semantics:
r   ) _pprint_description_within_frame_pprint_example_within_frame_pprint_syntax_within_frame_pprint_semantics_within_frame)r   rG   r~   Zframe_stringr   r   r   r     s   	z(VerbnetCorpusReader._pprint_single_framec                 C   s   |d r|d |d  S dS )a&  Returns pretty printed version of example within frame in a VerbNet class

        Return a string containing a pretty-printed representation of
        the given VerbNet frame example.

        :param vnframe: An ElementTree containing the xml contents of
            a Verbnet frame.
        r=   z
 Example: Nr   )r   rG   r~   r   r   r   r   0  s   	z0VerbnetCorpusReader._pprint_example_within_framec                 C   s6   ||d d  }|d d r|d |d d 7 }|S )a  Returns pretty printed version of a VerbNet frame description

        Return a string containing a pretty-printed representation of
        the given VerbNet frame description.

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        r>   rr   rs   z ({})r   )r   rG   r~   r>   r   r   r   r   <  s   	z4VerbnetCorpusReader._pprint_description_within_framec                 C   s   g }|d D ]A}|d }g }d|d v r#|d d r#| |d d  |dd |d d |d d  D 7 }|rB|d	d
|7 }| | q|d
| S )a&  Returns pretty printed version of syntax within a frame in a VerbNet class

        Return a string containing a pretty-printed representation of
        the given VerbNet frame syntax.

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        r?   rw   rN   rS   c                 S   s    g | ]}d  |d |d qS )z{}{}rN   rI   r   rO   r   r   r   r   Y  s    zCVerbnetCorpusReader._pprint_syntax_within_frame.<locals>.<listcomp>ru   rv   r   r   )rA   r   r   )r   rG   r~   r   elementr   Zmodifier_listr   r   r   r   J  s   	

z/VerbnetCorpusReader._pprint_syntax_within_framec              	      sl   g }|d D ]#}dd |d D }| |d rdnd |d  d	d
| d qd fdd|D S )a,  Returns a pretty printed version of semantics within frame in a VerbNet class

        Return a string containing a pretty-printed representation of
        the given VerbNet frame semantics.

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        r@   c                 S   s   g | ]}|d  qS )rN   r   )r   argumentr   r   r   r   q  s    zFVerbnetCorpusReader._pprint_semantics_within_frame.<locals>.<listcomp>rm   rn      ¬r$   rl   (z, )r{   c                 3   s    | ]
}  d | V  qdS )r   Nr   )r   r   r}   r   r   r&   u  s    zEVerbnetCorpusReader._pprint_semantics_within_frame.<locals>.<genexpr>)rA   r   )r   rG   r~   r   	predicaterm   r   r}   r   r   f  s   	(z2VerbnetCorpusReader._pprint_semantics_within_frame)Fr   )NNNN)r$   )%__name__
__module____qualname____doc__r   recompilerc   re   r\   r"   r(   r5   r   r   rF   rH   rT   rV   rU   r   r8   rW   rE   rB   rC   rD   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r      sL    






!""







r   )r   r   r   collectionsr   Znltk.corpus.reader.xmldocsr   r   r   r   r   r   <module>   s   