o
    rZh                     @   s   d dl Z G dd dZdS )    Nc                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	DependencyEvaluatora5  
    Class for measuring labelled and unlabelled attachment score for
    dependency parsing. Note that the evaluation ignores punctuation.

    >>> from nltk.parse import DependencyGraph, DependencyEvaluator

    >>> gold_sent = DependencyGraph("""
    ... Pierre  NNP     2       NMOD
    ... Vinken  NNP     8       SUB
    ... ,       ,       2       P
    ... 61      CD      5       NMOD
    ... years   NNS     6       AMOD
    ... old     JJ      2       NMOD
    ... ,       ,       2       P
    ... will    MD      0       ROOT
    ... join    VB      8       VC
    ... the     DT      11      NMOD
    ... board   NN      9       OBJ
    ... as      IN      9       VMOD
    ... a       DT      15      NMOD
    ... nonexecutive    JJ      15      NMOD
    ... director        NN      12      PMOD
    ... Nov.    NNP     9       VMOD
    ... 29      CD      16      NMOD
    ... .       .       9       VMOD
    ... """)

    >>> parsed_sent = DependencyGraph("""
    ... Pierre  NNP     8       NMOD
    ... Vinken  NNP     1       SUB
    ... ,       ,       3       P
    ... 61      CD      6       NMOD
    ... years   NNS     6       AMOD
    ... old     JJ      2       NMOD
    ... ,       ,       3       AMOD
    ... will    MD      0       ROOT
    ... join    VB      8       VC
    ... the     DT      11      AMOD
    ... board   NN      9       OBJECT
    ... as      IN      9       NMOD
    ... a       DT      15      NMOD
    ... nonexecutive    JJ      15      NMOD
    ... director        NN      12      PMOD
    ... Nov.    NNP     9       VMOD
    ... 29      CD      16      NMOD
    ... .       .       9       VMOD
    ... """)

    >>> de = DependencyEvaluator([parsed_sent],[gold_sent])
    >>> las, uas = de.eval()
    >>> las
    0.6
    >>> uas
    0.8
    >>> abs(uas - 0.8) < 0.00001
    True
    c                 C   s   || _ || _dS )z
        :param parsed_sents: the list of parsed_sents as the output of parser
        :type parsed_sents: list(DependencyGraph)
        N)_parsed_sents_gold_sents)selfZparsed_sentsZ
gold_sents r   B/var/www/auris/lib/python3.10/site-packages/nltk/parse/evaluate.py__init__G   s   
zDependencyEvaluator.__init__c                    s    h d d  fdd|D S )z
        Function to remove punctuation from Unicode string.
        :param input: the input string
        :return: Unicode string after remove all punctuation
        >   ZPfZPcZPoZPeZPdPiZPs c                 3   s"    | ]}t | vr|V  qd S )N)unicodedatacategory).0xZpunc_catr   r   	<genexpr>V   s     z4DependencyEvaluator._remove_punct.<locals>.<genexpr>)join)r   ZinStrr   r   r   _remove_punctO   s   z!DependencyEvaluator._remove_punctc           
      C   s  t | jt | jkrtdd}d}d}tt | jD ]`}| j| j}| j| j}t |t |kr5td| D ]A\}}|| }	|d du rHq9|d |	d krTtd| |d dkr^q9|d7 }|d	 |	d	 krz|d7 }|d
 |	d
 krz|d7 }q9q|| || fS )z
        Return the Labeled Attachment Score (LAS) and Unlabeled Attachment Score (UAS)

        :return : tuple(float,float)
        zE Number of parsed sentence is different with number of gold sentence.r   z!Sentences must have equal length.wordNz!Sentence sequence is not matched.r
      headrel)lenr   r   
ValueErrorrangenodesitemsr   )
r   ZcorrZcorrLtotaliZparsed_sent_nodesZgold_sent_nodesZparsed_node_addressZparsed_nodeZ	gold_noder   r   r   evalX   s8   zDependencyEvaluator.evalN)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r      s
    :	r   )r   r   r   r   r   r   <module>   s   