
    eTht                         S r SSKrSSKrSSKrSSKrSSKrSSKJr  SSKJ	r	  \	R                  " \5      rS rS rS rS	 rS
 rS rSS jrS rS rS rS rS rSS jrSS jrS rS rS rS rg)a  
Very heavily inspired by the official evaluation script for SQuAD version 2.0 which was modified by XLNet authors to
update `find_best_threshold` scripts for SQuAD V2.0

In addition to basic functionality, we also compute additional statistics and plot precision-recall curves if an
additional na_prob.json file is provided. This file is expected to map question ID's to the model's predicted
probability that a question is unanswerable.
    N   )BasicTokenizer)loggingc           	      N    S nS nS nS nU" U" U" U" U 5      5      5      5      $ )zALower text and remove punctuation, articles and extra whitespace.c                 |    [         R                  " S[         R                  5      n[         R                  " USU 5      $ )Nz\b(a|an|the)\b )recompileUNICODEsub)textregexs     _/var/www/auris/envauris/lib/python3.13/site-packages/transformers/data/metrics/squad_metrics.pyremove_articles)normalize_answer.<locals>.remove_articles'   s*    

,bjj9vveS$''    c                 @    SR                  U R                  5       5      $ )Nr   )joinsplitr   s    r   white_space_fix)normalize_answer.<locals>.white_space_fix+   s    xx

%%r   c                 l   ^ [        [        R                  5      mSR                  U4S jU  5       5      $ )N c              3   6   >#    U  H  oT;  d  M
  Uv   M     g 7fN ).0chexcludes     r   	<genexpr>8normalize_answer.<locals>.remove_punc.<locals>.<genexpr>0   s     >Dbg,=rrDs   		)setstringpunctuationr   )r   r    s    @r   remove_punc%normalize_answer.<locals>.remove_punc.   s(    f(()ww>D>>>r   c                 "    U R                  5       $ r   )lowerr   s    r   r)   normalize_answer.<locals>.lower2   s    zz|r   r   )sr   r   r&   r)   s        r   normalize_answerr,   $   s1    (&? ?;uQx+@ABBr   c                 F    U (       d  / $ [        U 5      R                  5       $ r   )r,   r   )r+   s    r   
get_tokensr.   8   s    	A$$&&r   c                 B    [        [        U 5      [        U5      :H  5      $ r   )intr,   )a_golda_preds     r   compute_exactr3   >   s    '+;F+CCDDr   c                    [        U 5      n[        U5      n[        R                  " U5      [        R                  " U5      -  n[        UR	                  5       5      n[        U5      S:X  d  [        U5      S:X  a  [        X#:H  5      $ US:X  a  gSU-  [        U5      -  nSU-  [        U5      -  nSU-  U-  Xg-   -  nU$ )Nr         ?   )r.   collectionsCountersumvalueslenr0   )	r1   r2   	gold_toks	pred_tokscommonnum_same	precisionrecallf1s	            r   
compute_f1rC   B   s    6"I6"I  +k.A.A).LLF6==?#H
9~c)n19)**1}hY/I8^c)n,F
i-&
 Y%7	8BIr   c                 P  ^ 0 n0 nU  H  nUR                   nUR                   Vs/ s H  n[        US   5      (       d  M  US   PM     nnU(       d  S/nXQ;  a  [        SU 35        Mb  X   m[	        U4S jU 5       5      X%'   [	        U4S jU 5       5      X5'   M     X#4$ s  snf )zN
Computes the exact and f1 scores from the examples and the model predictions
r   r   zMissing prediction for c              3   <   >#    U  H  n[        UT5      v   M     g 7fr   )r3   r   a
predictions     r   r!   !get_raw_scores.<locals>.<genexpr>f   s     "VA=J#?#?   c              3   <   >#    U  H  n[        UT5      v   M     g 7fr   )rC   rF   s     r   r!   rI   g   s     P<a
1j 9 9<rJ   )qas_idanswersr,   printmax)	examplespredsexact_scores	f1_scoresexamplerL   answergold_answersrH   s	           @r   get_raw_scoresrW   R   s     LI5<__i_6HXY_`fYgHhv_i4L+F845]
""V"VVP<PP	   "" js   B# 	B#c                     0 nU R                  5        H-  u  pVX   U:  nU(       a  [        X%   (       + 5      XE'   M)  XdU'   M/     U$ r   )itemsfloat)scoresna_probsqid_to_has_ansna_prob_thresh
new_scoresqidr+   pred_nas           r   apply_no_ans_thresholdrb   l   sI    J,,.-.0#(;$;<JOsO ! r   c           	        ^ ^ U(       dd  [        T 5      n[        R                  " SS[        T R	                  5       5      -  U-  4SS[        TR	                  5       5      -  U-  4SU4/5      $ [        U5      n[        R                  " SS[        U 4S jU 5       5      -  U-  4SS[        U4S jU 5       5      -  U-  4SU4/5      $ )Nexact      Y@rB   totalc              3   .   >#    U  H
  nTU   v   M     g 7fr   r   )r   krR   s     r   r!   !make_eval_dict.<locals>.<genexpr>   s     %Hx!l1ox   c              3   .   >#    U  H
  nTU   v   M     g 7fr   r   )r   rh   rS   s     r   r!   ri      s     "BA9Q<rj   )r;   r7   OrderedDictr9   r:   )rR   rS   qid_listrf   s   ``  r   make_eval_dictrn   w   s    L!&&%#l&9&9&;"<<uDEus9#3#3#566>?% 
 	
 H&&%#%Hx%H"HH5PQus"B"BBBUJK% 
 	
r   c                 ,    U H  nX   X SU 3'   M     g )N_r   )	main_evalnew_evalprefixrh   s       r   
merge_evalrt      s     %-[	HAaS/" r   c                 x  ^^ [        U4S jT 5       5      nUnUnSn[        TU4S jS9n[        U5       H;  u  pX;  a  M  TU
   (       a  X   nOX
   (       a  SnOSnX[-  nXV:  d  M4  UnTU
   nM=     Su  pU H"  n
TU
   (       d  M  US-  nX;  a  M  XU
   -  nM$     S	U-  [        U5      -  US
U-  U-  4$ )Nc              3   @   >#    U  H  nTU   (       a  M  S v   M     g7f   Nr   r   rh   r]   s     r   r!   &find_best_thresh_v2.<locals>.<genexpr>        H1nQ6GQQ   	        c                    > TU    $ r   r   rh   r\   s    r   <lambda>%find_best_thresh_v2.<locals>.<lambda>   	    hqkr   keyr   )r   r   rx   re   r5   r9   sorted	enumerater;   )rQ   r[   r\   r]   
num_no_ans	cur_score
best_scorebest_threshrm   ir`   diffhas_ans_scorehas_ans_cnts     ``          r   find_best_thresh_v2r      s    HHHJIJKh$9:HH%#;Dz	!"J"3-K & "&Mc"q$  :F+[#:MP[:[[[r   c                 p    [        XXE5      u  pgn[        XXE5      u  pnX`S'   XpS'   XS'   XS'   XS'   XS'   g )N
best_exactbest_exact_threshbest_f1best_f1_threshhas_ans_exact
has_ans_f1)r   )rq   rQ   	exact_rawf1_rawr\   r]   r   exact_threshr   r   	f1_threshr   s               r   find_all_best_thresh_v2r      sW    .A%T\.m+Jm%8%a"G
(l%1!""i"+!.o(lr   c                   ^^ [        U4S jT 5       5      nUnUnSn[        TU4S jS9n[        U5       H;  u  pX;  a  M  TU
   (       a  X   nOX
   (       a  SnOSnX[-  nXV:  d  M4  UnTU
   nM=     SU-  [        U5      -  U4$ )Nc              3   @   >#    U  H  nTU   (       a  M  S v   M     g7frw   r   ry   s     r   r!   #find_best_thresh.<locals>.<genexpr>   r{   r|   r}   c                    > TU    $ r   r   r   s    r   r   "find_best_thresh.<locals>.<lambda>   r   r   r   r   r   re   r   )rQ   r[   r\   r]   r   r   r   r   rm   rp   r`   r   s     ``        r   find_best_threshr      s    HHHJIJKh$9:HH%#;Dz	!"J"3-K & :F+[88r   c                 \    [        XXE5      u  pg[        XXE5      u  pX`S'   XpS'   XS'   XS'   g )Nr   r   r   r   )r   )
rq   rQ   r   r   r\   r]   r   r   r   r   s
             r   find_all_best_threshr      sB    /([J)%RG(l%1!""i"+r   c                 j   U  Vs0 s H"  oDR                   [        UR                  5      _M$     nnUR                  5        VVs/ s H  u  pgU(       d  M  UPM     nnnUR                  5        VVs/ s H  u  pgU(       a  M  UPM     n	nnUc  [        R                  US5      n[        X5      u  p[        XXS5      n[        XXS5      n[        X5      nU(       a  [        XUS9n[        XS5        U	(       a  [        XU	S9n[        UUS5        U(       a  [        XXX%5        U$ s  snf s  snnf s  snnf )Nr}   )rm   HasAnsNoAns)rL   boolrM   rY   dictfromkeysrW   rb   rn   rt   r   )rP   rQ   no_answer_probsno_answer_probability_thresholdrT   qas_id_to_has_answerrL   
has_answerhas_answer_qidsno_answer_qidsrd   rB   exact_thresholdf1_threshold
evaluationhas_ans_evalno_ans_evals                    r   squad_evaluater      s   QYZQYgNND,AAQYZ8L8R8R8Tc8T"4&Xbv8TOc7K7Q7Q7Sf7S!3[ef7SNf--s3x/IE, 4O *"?SuL>J%oo^:X6$_^\:{G4Z?a7 [cfs   )D$D)D)0D/D/c                    S n[        US9nSR                  UR                  U5      5      nUR                  U 5      nUS:X  a%  U(       a  [        R                  SU  SU S35        U$ U[        U 5      -   S-
  nU" U5      u  pU" U5      u  p[        U	5      [        U5      :w  a%  U(       a  [        R                  S	U	 S
U S35        U$ 0 nUR                  5        H	  u  pXU'   M     SnX};   a  X   nUU
;   a  U
U   nUc  U(       a  [        R                  S5        U$ SnX;   a  X   nUU
;   a  U
U   nUc  U(       a  [        R                  S5        U$ UUUS-    nU$ )z;Project the tokenized prediction back to the original text.c                     / n[         R                  " 5       n[        U 5       H+  u  p4US:X  a  M  X2[        U5      '   UR	                  U5        M-     SR                  U5      nXR4$ )Nr   r   )r7   rl   r   r;   appendr   )r   ns_charsns_to_s_mapr   cns_texts         r   _strip_spaces%get_final_text.<locals>._strip_spaces  s`    !--/dODACx)*H&OOA	 $
 ''(#%%r   )do_lower_caser   r   zUnable to find text: 'z' in ''rx   z*Length not equal after stripping spaces: 'z' vs 'NzCouldn't map start positionzCouldn't map end position)r   r   tokenizefindloggerinfor;   rY   )	pred_text	orig_textr   verbose_loggingr   	tokenizertok_textstart_positionend_positionorig_ns_textorig_ns_to_s_maptok_ns_texttok_ns_to_s_maptok_s_to_ns_mapr   	tok_indexorig_start_positionns_start_positionorig_end_positionns_end_positionoutput_texts                        r   get_final_textr      s   8	& ];Ixx	**956H]]9-NKK06)ANO!C	N2Q6L'4Y'?$\%28%<"[
<C,,KKD\NRXYdXeefgh O'--/%&	" 0 (+; 00"23D"E"KK56&)7.. 0 A KK34/3Dq3HJKr   c                     [        [        U 5      S SS9n/ n[        [        U5      5       H!  nXA:  a    U$ UR	                  X$   S   5        M#     U$ )z"Get the n-best logits from a list.c                     U S   $ )Nrx   r   xs    r   r   #_get_best_indexes.<locals>.<lambda>^  s    adr   Tr   reverser   )r   r   ranger;   r   )logitsn_best_sizeindex_and_scorebest_indexesr   s        r   _get_best_indexesr   \  s]    Yv.NDQOL3'( 	O.q12 ) r   c                     U (       d  / $ SnU  H  nUb  X!:  d  M  UnM     / nSnU  H0  n[         R                  " X!-
  5      nUR                  U5        XE-  nM2     / nU H  nUR                  X$-  5        M     U$ )z,Compute softmax probability over raw logits.Nr}   )mathexpr   )r[   	max_scorescore
exp_scores	total_sumr   probss          r   _compute_softmaxr   h  s    	I 1I  JIHHU&'!	 
 EU&' Lr   c                 ~   U(       a  [         R                  SU 35        U(       a  [         R                  SU 35        U(       a  U
(       a  [         R                  SU 35        [        R                  " [        5      nU H   nXR
                     R                  U5        M"     0 nU H  nUUUR                  '   M     [        R                  " S/ SQ5      n[        R                  " 5       n[        R                  " 5       n[        R                  " 5       n[        U 5       GH  u  nnUU   n/ nSnSnSnSn[        U5       GHi  u  nnXR                     n[        UR                  U5      n[        UR                  U5      nU
(       aG  UR                  S   UR                  S   -   n U U:  a"  U nUnUR                  S   nUR                  S   nU H  n!U H  n"U![        UR                  5      :  a  M  U"[        UR                  5      :  a  M9  U!UR                   ;  a  MK  U"UR                   ;  a  M]  UR"                  R%                  U!S5      (       d  M  U"U!:  a  M  U"U!-
  S	-   n#U#U:  a  M  UR                  U" UU!U"UR                  U!   UR                  U"   S95        M     M     GMl     U
(       a  UR                  U" USSUUS95        ['        US
 SS9n[        R                  " S/ SQ5      n$0 n%/ n&U GH>  n'[        U&5      U:  a    GO-UU'R(                     nU'R*                  S:  a  UR                  U'R*                  U'R,                  S	-    n(UR                   U'R*                     n)UR                   U'R,                     n*UR.                  U)U*S	-    n+UR1                  U(5      n,U,R3                  5       n,SR5                  U,R7                  5       5      n,SR5                  U+5      n-[9        U,U-XY5      n.U.U%;   a  GM  SU%U.'   OSn.SU%U.'   U&R                  U$" U.U'R:                  U'R<                  S95        GMA     U
(       aD  SU%;  a  U&R                  U$" SUUS95        [        U&5      S	:X  a  U&R?                  SU$" SSSS95        U&(       d  U&R                  U$" SSSS95        [        U&5      S	:  a  [A        S5      e/ n/Sn0U& HI  n1U/R                  U1R:                  U1R<                  -   5        U0(       a  M4  U1RB                  (       d  MG  U1n0MK     [E        U/5      n2/ n3[        U&5       Ha  u  n4n1[        R                  " 5       n5U1RB                  U5S'   U2U4   U5S'   U1R:                  U5S'   U1R<                  U5S'   U3R                  U55        Mc     [        U35      S	:  a  [A        S5      eU
(       d  U3S   S   UURF                  '   OZUU0R:                  -
  U0R<                  -
  n6U6UURF                  '   U6U:  a  SUURF                  '   OU0RB                  UURF                  '   U3UURF                  '   GM     U(       a<  [I        US5       n7U7RK                  [L        RN                  " USS9S-   5        SSS5        U(       a<  [I        US5       n7U7RK                  [L        RN                  " USS9S-   5        SSS5        U(       aE  U
(       a>  [I        US5       n7U7RK                  [L        RN                  " USS9S-   5        SSS5        U$ U$ ! , (       d  f       N= f! , (       d  f       Nm= f! , (       d  f       U$ = f)zHWrite final predictions to the json file and log-odds of null if needed.Writing predictions to: zWriting nbest to: zWriting null_log_odds to: PrelimPrediction)feature_indexstart_index	end_indexstart_logit	end_logit@B r   Frx   c                 4    U R                   U R                  -   $ r   )r   r   r   s    r   r   ,compute_predictions_logits.<locals>.<lambda>  s    q}}WXWbWbGbr   Tr   NbestPrediction)r   r   r   r   r   emptyr}   No valid predictionsNr   probabilityr   r   w   indent
)(r   r   r7   defaultdictlistexample_indexr   	unique_id
namedtuplerl   r   r   start_logits
end_logitsr;   tokenstoken_to_orig_maptoken_is_max_contextgetr   r   r   r   
doc_tokensconvert_tokens_to_stringstripr   r   r   r   r   insert
ValueErrorr   r   rL   openwritejsondumps)8all_examplesall_featuresall_resultsr   max_answer_lengthr   output_prediction_fileoutput_nbest_fileoutput_null_log_odds_filer   version_2_with_negativenull_score_diff_thresholdr   example_index_to_featuresfeatureunique_id_to_resultresult_PrelimPredictionall_predictionsall_nbest_jsonscores_diff_jsonr  rT   featuresprelim_predictions
score_nullmin_null_feature_indexnull_start_logitnull_end_logitr   start_indexesend_indexesfeature_null_scorer   r   length_NbestPredictionseen_predictionsnbestpred
tok_tokensorig_doc_startorig_doc_endorig_tokensr   r   
final_texttotal_scoresbest_non_null_entryentryr   
nbest_jsonr   output
score_diffwriters8                                                           r   compute_predictions_logitsrC    s     ./E.FGH():(;<= %<01J0KLM + 7 7 =!"7"78??H   06F,,-  $..e "--/O ,,.N"..0"+L"9w,];
!"&/&9"M7():):;F-f.A.A;OM+F,=,={KK&%+%8%8%;f>O>OPQ>R%R"%
2!3J-:*'-':':1'=$%+%6%6q%9N,!,I #c'..&99  C$77 "'*C*CC  (A(AA "77;;KOO  ;. &4q8F 11 &--)*7(3&/(.(;(;K(H&,&7&7	&B' "-  - ':R #%%!"8 ! 0, $$6<cmqr&11C
 &D5z[(t112G!#$^^D,<,<QR@RT
!(!:!:4;K;K!L&88H%00<RSCSU$==jI $>>+88HNN$45HH[1	+Hi`
!11/3 ,
/3 ,LL)ztGWGWcgcqcqrsA 'D #))-2CS_mno 5zQQ 0g3Z] ^_ LL)wCSVWXu:>344"E 1 1EOO CD&&:::*/'	  !.
!%(HAu ,,.F"ZZF6N$)!HF=!$)$5$5F=!"'//F;f% ) z?Q344&.8mF.COGNN+ $&9&E&EEI\IfIfgJ/9W^^,5524/2E2J2J/)3w~~&o #:r (#.&LLOA>EF / #S)VLLN1=DE * !%<+S1VLL$4Q?$FG 2 ? /. *) 21 s$   
(\(\(\-
\
\*-
\<c                    [         R                  " S/ SQ5      n[         R                  " S/ SQ5      n[        R                  SU 35        [         R                  " [
        5      nU H!  nUUR                     R                  U5        M#     0 nU H  nUUUR                  '   M     [         R                  " 5       n[         R                  " 5       n[         R                  " 5       n[        U 5       GH  u  nnUU   n/ nSn[        U5       GH  u  nnUUR                     nUR                  n[        UU5      n[        U5       H  n[        U	5       H  nUR                  U   nUR                  U   n UU	-  U-   n!UR                   U!   n"UR"                  U!   n#U UR$                  S-
  :  a  M\  U#UR$                  S-
  :  a  Mq  UR&                  R)                  U S5      (       d  M  U#U :  a  M  U#U -
  S-   n$U$U:  a  M  UR                  U" UU U#UU"S95        M     M     GM     [+        US	 S
S9n0 n%/ n&U GHQ  n'[-        U&5      U:  a    GO@UU'R.                     nUR0                  U'R2                  U'R4                  S-    n(UR6                  U'R2                     n)UR6                  U'R4                     n*UR8                  U)U*S-    n+UR;                  U(5      n,U,R=                  5       n,SR?                  U,RA                  5       5      n,SR?                  U+5      n-[C        US5      (       a  URD                  n.OURF                  n.[I        U,U-U.U5      n/U/U%;   a  GM!  S
U%U/'   U&R                  U" U/U'RJ                  U'RL                  S95        GMT     U&(       d  U&R                  U" SSSS95        / n0Sn1U& H6  n2U0R                  U2RJ                  U2RL                  -   5        U1(       a  M4  U2n1M8     [O        U05      n3/ n4[        U&5       Ha  u  nn2[         R                  " 5       n5U2RP                  U5S'   U3U   U5S'   U2RJ                  U5S'   U2RL                  U5S'   U4R                  U55        Mc     [-        U45      S:  a  [S        S5      eU1c  [S        S5      eUn6U6UURT                  '   U1RP                  UURT                  '   U4UURT                  '   GM     [W        US5       n7U7RY                  [Z        R\                  " USS9S-   5        SSS5        [W        US5       n7U7RY                  [Z        R\                  " USS9S-   5        SSS5        U
(       a>  [W        US5       n7U7RY                  [Z        R\                  " USS9S-   5        SSS5        U$ U$ ! , (       d  f       N= f! , (       d  f       Nf= f! , (       d  f       U$ = f)z
XLNet write prediction logic (more complex than Bert's). Write final predictions to the json file and log-odds of
null if needed.

Requires utils_squad_evaluate.py
r   )r   r   r   start_log_probend_log_probr   )r   rE  rF  r   r   rx   Fc                 4    U R                   U R                  -   $ r   )rE  rF  r   s    r   r   /compute_predictions_log_probs.<locals>.<lambda>  s    q/?/?!../Pr   Tr   r   r   r   g    .Nr   r   rE  rF  r   r   r   r  r  )/r7   r  r   r   r  r  r  r   r  rl   r   
cls_logitsminr   r	  start_top_indexr
  end_top_indexparagraph_lenr  r  r   r;   r   r  r   r   r  r  r  r  r   r   hasattrr   do_lowercase_and_remove_accentr   rE  rF  r   r   r  rL   r  r  r  r  )8r  r  r  r   r  r  r  r  start_n_top	end_n_topr  r   r   r%  r3  r!  r"  r#  r$  r&  r'  r(  r  rT   r)  r*  r+  r   cur_null_scorer   jrE  r   j_indexrF  r   r2  r4  r5  r6  r7  r8  r9  r:  r   r   r   r;  r<  r=  r>  r   r?  r@  rA  rB  s8                                                           r   compute_predictions_log_probsrU  N  s   * $..k #--E KK*+A*BCD + 7 7 =!'"7"78??H   06F,,-  "--/O ,,.N"..0"+L"9w,];
&/&9"M7():):;F#..N Z8J;'y)A%+%8%8%;N"("8"8";K)ma/G#)#4#4W#=L & 4 4W =I
 #g&;&;a&??  G$9$9A$== "77;;KOO  ;. &4q8F 11 &--)*7(3&/+9)53 * ( ':X $$Q[_
 &D5z[(t112G !(8(8DNNQ<NPJ$66t7G7GHN"44T^^DL!,,^|a?OQK 99*EH  ~~'Hxx 01H-Iy/22 ) 7 7 ) H H')]O\J--+/Z(LL jATATcgctctuO 'Z LL)r$UYZ["E 4 4u7I7I IJ&&&+# 
 !.
!%(HAu ,,.F"ZZF6N$)!HF=!',';';F#$%*%7%7F>"f% ) z?Q344&344
+5( +>*B*B')3w~~&M #:P 
$c	*fTZZ:TAB 
+ 
	%TZZq9D@A 
& +S1VLL$4Q?$FG 2 ? 
+	* 
&	% 21 s$   (V(V(V.
V
V+.
V=r   )Nr5   )F)__doc__r7   r  r   r	   r$   models.bertr   utilsr   
get_logger__name__r   r,   r.   r3   rC   rW   rb   rn   rt   r   r   r   r   r   r   r   r   rC  rU  r   r   r   <module>r[     s       	  )  
		H	%C('E #4
*1
\D)9.,>[|	.L^}r   