
    /hP                     F   S SK r S SKJr  S SKJr  S SKJr  S SKJ	r	  S r " S S5      r
S	 r SS jr\ R                  " S5      rSS jrS r SS jrS r\ R                  " S\ R&                  5      r\ R                  " S5      rS r/ SQS
4S jrS r\S:X  a  \" 5         gg)    N)accuracy)map_tag)	str2tuple)Treec                     / n/ nU H>  nU R                  UR                  5       5      nU[        U5      -  nU[        U5      -  nM@     [        X#5      $ )aX  
Score the accuracy of the chunker against the gold standard.
Strip the chunk information from the gold standard and rechunk it using
the chunker, then compute the accuracy score.

:type chunker: ChunkParserI
:param chunker: The chunker being evaluated.
:type gold: tree
:param gold: The chunk structures to score the chunker on.
:rtype: float
)parseflattentree2conlltags	_accuracy)chunkergold	gold_tags	test_tags	gold_tree	test_trees         G/var/www/auris/envauris/lib/python3.13/site-packages/nltk/chunk/util.pyr   r      sZ     II	MM)"3"3"56	^I..	^I..	  Y**    c                   p    \ rS rSrSrS rS rS rS rS r	S r
SS	 jrS
 rS rS rS rS rS rS rSrg)
ChunkScore3   ao
  
A utility class for scoring chunk parsers.  ``ChunkScore`` can
evaluate a chunk parser's output, based on a number of statistics
(precision, recall, f-measure, misssed chunks, incorrect chunks).
It can also combine the scores from the parsing of multiple texts;
this makes it significantly easier to evaluate a chunk parser that
operates one sentence at a time.

Texts are evaluated with the ``score`` method.  The results of
evaluation can be accessed via a number of accessor methods, such
as ``precision`` and ``f_measure``.  A typical use of the
``ChunkScore`` class is::

    >>> chunkscore = ChunkScore()           # doctest: +SKIP
    >>> for correct in correct_sentences:   # doctest: +SKIP
    ...     guess = chunkparser.parse(correct.leaves())   # doctest: +SKIP
    ...     chunkscore.score(correct, guess)              # doctest: +SKIP
    >>> print('F Measure:', chunkscore.f_measure())       # doctest: +SKIP
    F Measure: 0.823

:ivar kwargs: Keyword arguments:

    - max_tp_examples: The maximum number actual examples of true
      positives to record.  This affects the ``correct`` member
      function: ``correct`` will not return more than this number
      of true positive examples.  This does *not* affect any of
      the numerical metrics (precision, recall, or f-measure)

    - max_fp_examples: The maximum number actual examples of false
      positives to record.  This affects the ``incorrect`` member
      function and the ``guessed`` member function: ``incorrect``
      will not return more than this number of examples, and
      ``guessed`` will not return more than this number of true
      positive examples.  This does *not* affect any of the
      numerical metrics (precision, recall, or f-measure)

    - max_fn_examples: The maximum number actual examples of false
      negatives to record.  This affects the ``missed`` member
      function and the ``correct`` member function: ``missed``
      will not return more than this number of examples, and
      ``correct`` will not return more than this number of true
      negative examples.  This does *not* affect any of the
      numerical metrics (precision, recall, or f-measure)

    - chunk_label: A regular expression indicating which chunks
      should be compared.  Defaults to ``'.*'`` (i.e., all chunks).

:type _tp: list(Token)
:ivar _tp: List of true positives
:type _fp: list(Token)
:ivar _fp: List of false positives
:type _fn: list(Token)
:ivar _fn: List of false negatives

:type _tp_num: int
:ivar _tp_num: Number of true positives
:type _fp_num: int
:ivar _fp_num: Number of false positives
:type _fn_num: int
:ivar _fn_num: Number of false negatives.
c                    [        5       U l        [        5       U l        [        5       U l        [        5       U l        [        5       U l        UR                  SS5      U l        UR                  SS5      U l        UR                  SS5      U l	        UR                  SS5      U l
        SU l        SU l        SU l        SU l        SU l        SU l        S	U l        g )
Nmax_tp_examplesd   max_fp_examplesmax_fn_exampleschunk_labelz.*r   g        F)set_correct_guessed_tp_fp_fnget_max_tp_max_fp_max_fn_chunk_label_tp_num_fp_num_fn_num_count_tags_correct_tags_total_measuresNeedUpdate)selfkwargss     r   __init__ChunkScore.__init__r   s    555zz"3S9zz"3S9zz"3S9"JJ}d; #( r   c                    U R                   (       a  U R                  U R                  -  U l        U R                  U R                  -
  U l        U R                  U R                  -
  U l        [        U R                  5      U l        [        U R
                  5      U l        [        U R                  5      U l	        SU l         g g )NF)
r.   r   r   r    r"   r!   lenr(   r)   r*   r/   s    r   _updateMeasuresChunkScore._updateMeasures   s    ##}}t}}4DH}}t}}4DH}}t}}4DHtxx=DLtxx=DLtxx=DL',D$ $r   c           	         U =R                   [        XR                  U R                  5      -  sl         U =R                  [        X R                  U R                  5      -  sl        U =R                  S-  sl        SU l         [        U5      n[        U5      nU =R                  [        U5      -  sl        U =R                  [        S [        XC5       5       5      -  sl
        g! [         a    S=p4 N\f = f)a  
Given a correctly chunked sentence, score another chunked
version of the same sentence.

:type correct: chunk structure
:param correct: The known-correct ("gold standard") chunked
    sentence.
:type guessed: chunk structure
:param guessed: The chunked sentence to be scored.
   T c              3   8   #    U  H  u  pX:X  d  M  S v   M     g7f)r9   Nr:   ).0tgs      r   	<genexpr>#ChunkScore.score.<locals>.<genexpr>   s      "
;&1qvAA;s   	N)r   
_chunksetsr+   r'   r   r.   r
   
ValueErrorr-   r4   r,   sumzip)r/   correctguessedcorrect_tagsguessed_tagss        r   scoreChunkScore.score   s     	G[[$:K:KLLG[[$:K:KLLq#' 	-)'2L)'2L 	C--c "
;"
 
 	
  	- +-,L<		-s   C' 'C76C7c                 V    U R                   S:X  a  gU R                  U R                   -  $ )z
Return the overall tag-based accuracy for all text that have
been scored by this ``ChunkScore``, using the IOB (conll2000)
tag encoding.

:rtype: float
r   r9   )r-   r,   r5   s    r   r   ChunkScore.accuracy   s,     q !!D$4$444r   c                     U R                  5         U R                  U R                  -   nUS:X  a  gU R                  U-  $ )zi
Return the overall precision for all texts that have been
scored by this ``ChunkScore``.

:rtype: float
r   )r6   r(   r)   r/   divs     r   	precisionChunkScore.precision   ;     	llT\\)!8<<#%%r   c                     U R                  5         U R                  U R                  -   nUS:X  a  gU R                  U-  $ )zf
Return the overall recall for all texts that have been
scored by this ``ChunkScore``.

:rtype: float
r   r6   r(   r*   rN   s     r   recallChunkScore.recall   rR   r   c                     U R                  5         U R                  5       nU R                  5       nUS:X  d  US:X  a  gSX-  SU-
  U-  -   -  $ )as  
Return the overall F measure for all texts that have been
scored by this ``ChunkScore``.

:param alpha: the relative weighting of precision and recall.
    Larger alpha biases the score towards the precision value,
    while smaller alpha biases the score towards the recall
    value.  ``alpha`` should have a value in the range [0,1].
:type alpha: float
:rtype: float
r   r9   )r6   rP   rU   )r/   alphaprs       r   	f_measureChunkScore.f_measure   sQ     	NNKKM6Q!VEIUa/00r   c                     U R                  5         [        U R                  5      nU Vs/ s H  o"S   PM	     sn$ s  snf )z
Return the chunks which were included in the
correct chunk structures, but not in the guessed chunk
structures, listed in input order.

:rtype: list of chunks
r9   )r6   listr"   r/   chunkscs      r   missedChunkScore.missed   s7     	dhh$%f!f%%%   ;c                     U R                  5         [        U R                  5      nU Vs/ s H  o"S   PM	     sn$ s  snf )z
Return the chunks which were included in the guessed chunk structures,
but not in the correct chunk structures, listed in input order.

:rtype: list of chunks
r9   )r6   r^   r!   r_   s      r   	incorrectChunkScore.incorrect   s7     	dhh$%f!f%%%rd   c                 `    [        U R                  5      nU Vs/ s H  o"S   PM	     sn$ s  snf )zw
Return the chunks which were included in the correct
chunk structures, listed in input order.

:rtype: list of chunks
r9   )r^   r   r_   s      r   rE   ChunkScore.correct   ,     dmm$$%f!f%%%   +c                 `    [        U R                  5      nU Vs/ s H  o"S   PM	     sn$ s  snf )zw
Return the chunks which were included in the guessed
chunk structures, listed in input order.

:rtype: list of chunks
r9   )r^   r   r_   s      r   rF   ChunkScore.guessed  rj   rk   c                 T    U R                  5         U R                  U R                  -   $ )NrT   r5   s    r   __len__ChunkScore.__len__  s!    ||dll**r   c                 6    S[        [        U 5      5      -   S-   $ )zH
Return a concise representation of this ``ChunkScoring``.

:rtype: str
z<ChunkScoring of z chunks>)reprr4   r5   s    r   __repr__ChunkScore.__repr__  s     #T#d)_4zAAr   c                     SSU R                  5       S-  S S3-   SU R                  5       S-  S S3-   SU R                  5       S-  S S3-   SU R                  5       S-  S S	3-   $ )
z
Return a verbose representation of this ``ChunkScoring``.
This representation includes the precision, recall, and
f-measure scores.  For other information about the score,
use the accessor methods (e.g., ``missed()`` and ``incorrect()``).

:rtype: str
zChunkParse score:
z    IOB Accuracy: r   z5.1fz%%
z    Precision:    z    Recall:       z    F-Measure:    z%%)r   rP   rU   r[   r5   s    r   __str__ChunkScore.__str__  s     "#DMMOc$9$#?tDF#DNN$4s$:4#@EG $DKKMC$7#=TBD $DNN$4s$:4#@C	E	
r   )r'   r   r+   r"   r*   r!   r)   r   r&   r%   r$   r.   r,   r-   r    r(   N)g      ?)__name__
__module____qualname____firstlineno____doc__r1   r6   rI   r   rP   rU   r[   rb   rf   rE   rF   ro   rs   rv   __static_attributes__r:   r   r   r   r   3   sO    <|)&-
:
5&&1&
&	&&&+B
r   r   c                 8   Sn/ nU  H  n[        U[        5      (       ai  [        R                  " X%R	                  5       5      (       a"  UR                  X4UR                  5       45        U[        UR                  5       5      -  nM  US-  nM     [        U5      $ )Nr   r9   )

isinstancer   rematchlabelappendfreezer4   leavesr   )r=   countr   posr`   childs         r   rA   rA   2  s{    
CFeT""xx[[]33|U\\^<=3u||~&&C1HC  v;r   Sc                    [         R                  " S5      n[        U/ 5      /nUR                  U 5       GH  nUR	                  5       n	U	S   S:X  a_  [        U5      S:w  a  [        SUR                  5       S 35      e[        U/ 5      n
US   R                  U
5        UR                  U
5        M|  U	S   S:X  a>  [        U5      S	:w  a  [        S
UR                  5       S 35      eUR                  5         M  Uc  US   R                  U	5        M  [        X5      u  pU(       a  U(       a  [        XEU5      nUS   R                  X45        GM     [        U5      S:w  a  [        S[        U 5      S 35      eUS   $ )a
  
Divide a string of bracketted tagged text into
chunks and unchunked tokens, and produce a Tree.
Chunks are marked by square brackets (``[...]``).  Words are
delimited by whitespace, and each word should have the form
``text/tag``.  Words that do not contain a slash are
assigned a ``tag`` of None.

:param s: The string to be converted
:type s: str
:param chunk_label: The label to use for chunk nodes
:type chunk_label: str
:param root_label: The label to use for the root of the tree
:type root_label: str
:rtype: Tree
z\[|\]|[^\[\]\s]+r   [r9   zUnexpected [ at char d]   zUnexpected ] at char zExpected ] at char )r   compiler   finditergroupr4   rB   startr   popr   r   )sr   
root_labelsepsource_tagsettarget_tagsetWORD_OR_BRACKETstackr   textchunkwordtags                r   tagstr2treer   ?  sP   ( jj!45O*b!"E ))!,{{}7c>5zQ #8q8I!JKKb)E"IU#LL!W^5zQ #8q8I!JKKIIK{b	  &%d0	 ]!-DCb	  $-' -* 5zQ.s1vaj9::8Or   z(\S+)\s+(\S+)\s+([IOB])-?(\S+)?c                 f   [        U/ 5      /n[        U R                  S5      5       GH  u  pEUR                  5       (       d  M  [        R                  U5      nUc  [        SUS 35      eUR                  5       u  pxpUb  X;  a  Sn	U	S:H  =(       a    XS   R                  5       :g  nU	S;   d  U(       a  [        U5      S:X  a  UR                  5         U	S	:X  d  U(       a1  [        U
/ 5      nUS   R                  U5        UR                  U5        US   R                  Xx45        GM     US
   $ )a  
Return a chunk structure for a single sentence
encoded in the given CONLL 2000 style string.
This function converts a CoNLL IOB string into a tree.
It uses the specified chunk types
(defaults to NP, PP and VP), and creates a tree rooted at a node
labeled S (by default).

:param s: The CoNLL string to be converted.
:type s: str
:param chunk_types: The chunk types to be converted.
:type chunk_types: tuple
:param root_label: The node label to use for the root.
:type root_label: str
:rtype: Tree

zError on line r   OIr   BOr   Br   )r   	enumeratesplitstrip_LINE_REr   rB   groupsr   r4   r   r   )r   chunk_typesr   r   linenoliner   r   r   state
chunk_type
mismatch_Ir   s                r   conllstr2treer   u  s   $ *b!"E!!''$-0zz|| t$=~fQZ899).&E "z'DE c\Ej"IOO4E&E
D=J5zQ		 C<:R(E"IU#LL 	b	$%9 1< 8Or   c                 &   / nU  H]  n UR                  5       nSnU HA  n[        U[        5      (       a  [        S5      eUR	                  US   US   XC-   45        SnMC     M_     U$ ! [
         a    UR	                  US   US   S45         M  f = f)z
Return a list of 3-tuples containing ``(word, tag, IOB-tag)``.
Convert a tree to the CoNLL IOB tag format.

:param t: The tree to be converted.
:type t: Tree
:rtype: list(tuple)
B-z7Tree is too deeply nested to be printed in CoNLL formatr   r9   I-r   )r   r   r   rB   r   AttributeError)r=   tagsr   categoryprefixcontentss         r   r
   r
     s     D	3{{}HF!h--$Q  Xa[(1+v7HIJ "	  K  	3KKq58S12	3s   AA(($BBc                    [        U/ 5      nU  GH2  u  pVnUc&  U(       a  [        S5      eUR                  XV45        M0  UR                  S5      (       a"  UR                  [        USS XV4/5      5        Mh  UR                  S5      (       a  [	        U5      S:X  d2  [        US   [         5      (       a  US   R                  5       USS :w  a4  U(       a  [        S5      eUR                  [        USS XV4/5      5        M  US   R                  XV45        GM  US:X  a  UR                  XV45        GM&  [        S	U< 35      e   U$ )
z)
Convert the CoNLL IOB format to a tree.
NzBad conll tag sequencer   r   r   r   r   r   zBad conll tag )r   rB   r   
startswithr4   r   r   )sentencer   r   stricttreer   postagchunktags           r   conlltags2treer     s(    
BD"*h !9:: TN+  &&KKXab\TN+;<=  &&D	Q!$r(D118>>#x|3$%=>> KKXab\TN3C DER/_KK'~h\:;;3 #+4 Kr   c                     [        U 5       Vs/ s H  nSR                  U5      PM     nnSR                  U5      $ s  snf )z
Return a multiline string where each line contains a word, tag and IOB tag.
Convert a tree to the CoNLL IOB string format

:param t: The tree to be converted.
:type t: Tree
:rtype: str
 r   )r
   join)r=   tokenliness      r   tree2conllstrr     s;     +9*;<*;SXXe_*;E<99U =s   <a   <DOC>\s*(<DOCNO>\s*(?P<docno>.+?)\s*</DOCNO>\s*)?(<DOCTYPE>\s*(?P<doctype>.+?)\s*</DOCTYPE>\s*)?(<DATE_TIME>\s*(?P<date_time>.+?)\s*</DATE_TIME>\s*)?<BODY>\s*(<HEADLINE>\s*(?P<headline>.+?)\s*</HEADLINE>\s*)?<TEXT>(?P<text>.*?)</TEXT>\s*</BODY>\s*</DOC>\s*z#<b_\w+\s+[^>]*?type="(?P<type>\w+)"c                    [        U/ 5      /nU c  / $ [        R                  " SU 5       H  nUR                  5       n UR	                  S5      (       af  [
        R                  U5      nUc  [        SU5        [        UR                  S5      / 5      nUS   R                  U5        UR                  U5        M  UR	                  S5      (       a  UR                  5         M  US   R                  U5        M     [        U5      S
:w  a  [        S5      eUS   $ ! [        [        4 a$  n[        SUR                  5       S S	35      UeS nAff = f)Nz<[^>]+>|[^\s<]+z<b_XXXXtyper   z<e_z$Bad IEER string (error at character r   )r9   zBad IEER stringr   )r   r   r   r   r   _IEER_TYPE_REr   printr   r   
IndexErrorrB   r   r4   )r   r   r   piece_mpiecemr   es           r   _ieer_read_textr     s1   *b!"E 	y	;;115	&&!''.9&%(QWWV_b1b	  'U#!!%((		
 b	  '! 6* 5zQ*++8O J' 	6w}}q6IK	s$   A:D:&D"DE)EE)	LOCATIONORGANIZATIONPERSONDURATIONDATECARDINALPERCENTMONEYMEASUREc           	         [         R                  U 5      nU(       ag  [        UR                  S5      U5      UR                  S5      UR                  S5      UR                  S5      [        UR                  S5      U5      S.$ [        X5      $ )aP  
Return a chunk structure containing the chunked tagged text that is
encoded in the given IEER style string.
Convert a string of chunked tagged text in the IEER named
entity format into a chunk structure.  Chunks are of several
types, LOCATION, ORGANIZATION, PERSON, DURATION, DATE, CARDINAL,
PERCENT, MONEY, and MEASURE.

:rtype: Tree
r   docnodoctype	date_timeheadline)r   r   r   r   r   )_IEER_DOC_REr   r   r   )r   r   r   r   s       r   ieerstr2treer   '  sy    8 	1A#AGGFOZ@WWW%wwy)- (
(;ZH
 	
 q--r   c                  "   Sn SS K nUR                  R                  U SS9nUR                  5         [	        5         Sn [        U SS9nUR                  5         [	        S5        [	        UR                  R                  U5      5        [	        5         g )	Nzd[ Pierre/NNP Vinken/NNP ] ,/, [ 61/CD years/NNS ] old/JJ ,/, will/MD join/VB [ the/DT board/NN ] ./.r   NP)r   av  
These DT B-NP
research NN I-NP
protocols NNS I-NP
offer VBP B-VP
to TO B-PP
the DT B-NP
patient NN I-NP
not RB O
only RB O
the DT B-NP
very RB I-NP
best JJS I-NP
therapy NN I-NP
which WDT B-NP
we PRP B-NP
have VBP B-VP
established VBN I-VP
today NN B-NP
but CC B-NP
also RB I-NP
the DT B-NP
hope NN I-NP
of IN B-PP
something NN B-NP
still RB B-ADJP
better JJR I-ADJP
. . O
)r   PP)r   zCoNLL output:)nltkr   r   pprintr   r   r   )r   r   r=   
conll_trees       r   demor   R  sx    nA

qd3AHHJ	G	A< ql;J 
/	$**
"
":
./	Gr   __main__)r   r   /NN)r   r   VPr   )r   r   F)r   nltk.metricsr   r   nltk.tag.mappingr   nltk.tag.utilr   	nltk.treer   r   rA   r   r   r   r   r
   r   r   DOTALLr   r   r   r   r   rx   r:   r   r   <module>r      s    
 . $ # +<z
 z
~
 UY.f ::892j8 FK!H
 zz II
 

ABD
 (.V,^ zF r   