o
    Zht                     @   s   d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	 e	
eZdd Zdd	 Zd
d Zdd Zdd Zdd Zd,ddZdd Zdd Zdd Zdd Zdd Zd-dd Zd.d"d#Zd$d% Zd&d' Zd(d) Zd*d+ ZdS )/a  
Very heavily inspired by the official evaluation script for SQuAD version 2.0 which was modified by XLNet authors to
update `find_best_threshold` scripts for SQuAD V2.0

In addition to basic functionality, we also compute additional statistics and plot precision-recall curves if an
additional na_prob.json file is provided. This file is expected to map question ID's to the model's predicted
probability that a question is unanswerable.
    N   )BasicTokenizer)loggingc                 C   s4   dd }dd }dd }dd }||||| S )	zALower text and remove punctuation, articles and extra whitespace.c                 S   s   t dt j}t |d| S )Nz\b(a|an|the)\b )recompileUNICODEsub)textregex r   V/var/www/auris/lib/python3.10/site-packages/transformers/data/metrics/squad_metrics.pyremove_articles'   s   z)normalize_answer.<locals>.remove_articlesc                 S   s   d |  S )Nr   )joinsplitr
   r   r   r   white_space_fix+   s   z)normalize_answer.<locals>.white_space_fixc                    s"   t tj d fdd| D S )N c                 3   s    | ]	}| vr|V  qd S Nr   ).0chexcluder   r   	<genexpr>0       z8normalize_answer.<locals>.remove_punc.<locals>.<genexpr>)setstringpunctuationr   r   r   r   r   remove_punc.   s   
z%normalize_answer.<locals>.remove_puncc                 S   s   |   S r   )lowerr   r   r   r   r   2   s   znormalize_answer.<locals>.lowerr   )sr   r   r   r   r   r   r   normalize_answer$   s
   r!   c                 C   s   | sg S t |  S r   )r!   r   )r    r   r   r   
get_tokens8   s   r"   c                 C   s   t t| t|kS r   )intr!   )a_golda_predr   r   r   compute_exact>   s   r&   c           	      C   s   t | }t |}t|t|@ }t| }t|dks$t|dkr*t||kS |dkr0dS d| t| }d| t| }d| | ||  }|S )Nr         ?   )r"   collectionsCountersumvalueslenr#   )	r$   r%   Z	gold_toksZ	pred_tokscommonZnum_same	precisionZrecallf1r   r   r   
compute_f1B   s   r1   c                    s   i }i }| D ]<}|j }dd |jD }|sdg}||vr$td|  q||  t fdd|D ||< t fdd|D ||< q||fS )zV
    Computes the exact and f1 scores from the examples and the model predictions
    c                 S   s    g | ]}t |d  r|d  qS r   )r!   )r   Zanswerr   r   r   
<listcomp>[   s     z"get_raw_scores.<locals>.<listcomp>r   zMissing prediction for c                 3       | ]}t | V  qd S r   )r&   r   aZ
predictionr   r   r   f       z!get_raw_scores.<locals>.<genexpr>c                 3   r3   r   )r1   r4   r6   r   r   r   g   r7   )qas_idanswersprintmax)examplespredsexact_scores	f1_scoresexampler8   Zgold_answersr   r6   r   get_raw_scoresR   s   rA   c                 C   sF   i }|   D ]\}}|| |k}|rt||  ||< q|||< q|S r   )itemsfloat)scoresna_probsqid_to_has_ansZna_prob_threshZ
new_scoresqidr    Zpred_nar   r   r   apply_no_ans_thresholdl   s   
rH   c              	      s   |s$t  }tddt   | fddt  | fd|fgS t |}tddt fdd|D  | fddtfdd|D  | fd|fgS )Nexact      Y@r0   totalc                 3       | ]} | V  qd S r   r   r   k)r>   r   r   r          z!make_eval_dict.<locals>.<genexpr>c                 3   rL   r   r   rM   )r?   r   r   r      rO   )r-   r)   OrderedDictr+   r,   )r>   r?   qid_listrK   r   )r>   r?   r   make_eval_dictw   s     rR   c                 C   s$   |D ]}|| | | d| < qd S )N_r   )	main_evalZnew_evalprefixrN   r   r   r   
merge_eval   s   rV   c                    s   t fddD }|}|}d}t  fddd}t|D ])\}	}
|
|vr(q|
 r1||
 }n	| |
 r8d}nd}||7 }||krH|} |
 }qd	\}}|D ]}
|
 sVqO|d
7 }|
|vr_qO|||
 7 }qOd| t| |d| | fS )Nc                 3       | ]	} | sd V  qdS    Nr   rM   rF   r   r   r      r   z&find_best_thresh_v2.<locals>.<genexpr>        c                        |  S r   r   rN   rE   r   r   <lambda>       z%find_best_thresh_v2.<locals>.<lambda>keyr   )r   r   rY   rJ   r'   r+   sorted	enumerater-   )r=   rD   rE   rF   
num_no_ans	cur_score
best_scorebest_threshrQ   irG   diffZhas_ans_scoreZhas_ans_cntr   rE   rF   r   find_best_thresh_v2   s6   
rn   c                 C   s\   t ||||\}}}t ||||\}	}
}|| d< || d< |	| d< |
| d< || d< || d< d S )N
best_exactbest_exact_threshbest_f1best_f1_threshhas_ans_exact
has_ans_f1)rn   )rT   r=   	exact_rawf1_rawrE   rF   ro   exact_threshrs   rq   	f1_threshrt   r   r   r   find_all_best_thresh_v2   s   ry   c                    s   t fddD }|}|}d}t  fddd}t|D ])\}	}
|
|vr(q|
 r1||
 }n	| |
 r8d}nd}||7 }||krH|} |
 }qd	| t| |fS )
Nc                 3   rW   rX   r   rM   rZ   r   r   r      r   z#find_best_thresh.<locals>.<genexpr>r[   c                    r\   r   r   r]   r^   r   r   r_      r`   z"find_best_thresh.<locals>.<lambda>ra   rc   r   rJ   rd   )r=   rD   rE   rF   rg   rh   ri   rj   rQ   rS   rG   rl   r   rm   r   find_best_thresh   s&   
rz   c           
      C   sH   t ||||\}}t ||||\}}	|| d< || d< || d< |	| d< d S )Nro   rp   rq   rr   )rz   )
rT   r=   ru   rv   rE   rF   ro   rw   rq   rx   r   r   r   find_all_best_thresh   s   r{   r'   c                 C   s   dd | D }dd |  D }dd |  D }|d u r#t|d}t| |\}}t||||}	t||||}
t|	|
}|rLt|	|
|d}t||d |r[t|	|
|d}t||d	 |rft|||||| |S )
Nc                 S   s   i | ]	}|j t|jqS r   )r8   boolr9   )r   r@   r   r   r   
<dictcomp>   s    z"squad_evaluate.<locals>.<dictcomp>c                 S   s   g | ]\}}|r|qS r   r   r   r8   Z
has_answerr   r   r   r2          z"squad_evaluate.<locals>.<listcomp>c                 S   s   g | ]\}}|s|qS r   r   r~   r   r   r   r2      r   r[   )rQ   ZHasAnsZNoAns)rB   dictfromkeysrA   rH   rR   rV   r{   )r<   r=   Zno_answer_probsZno_answer_probability_thresholdZqas_id_to_has_answerZhas_answer_qidsZno_answer_qidsrI   r0   Zexact_thresholdZf1_thresholdZ
evaluationZhas_ans_evalZno_ans_evalr   r   r   squad_evaluate   s(   
r   Fc                 C   sZ  dd }t |d}d||}|| }|dkr*|r(td|  d| d |S |t|  d	 }||\}	}
||\}}t|	t|krV|rTtd
|	 d| d |S i }| D ]\}}|||< q\d}||v rw|| }||
v rw|
| }|du r|rtd |S d}||v r|| }||
v r|
| }|du r|rtd |S |||d	  }|S )z;Project the tokenized prediction back to the original text.c                 S   sP   g }t  }t| D ]\}}|dkrq
||t|< || q
d|}||fS )Nr   r   )r)   rP   rf   r-   appendr   )r
   Zns_charsZns_to_s_maprk   cZns_textr   r   r   _strip_spaces  s   
z%get_final_text.<locals>._strip_spaces)do_lower_caser   rc   zUnable to find text: 'z' in ''rY   z*Length not equal after stripping spaces: 'z' vs 'NzCouldn't map start positionzCouldn't map end position)r   r   tokenizefindloggerinfor-   rB   )Z	pred_text	orig_textr   verbose_loggingr   	tokenizertok_textZstart_positionZend_positionZorig_ns_textZorig_ns_to_s_mapZtok_ns_textZtok_ns_to_s_mapZtok_s_to_ns_maprk   Z	tok_indexZorig_start_positionZns_start_positionZorig_end_positionZns_end_positionZoutput_textr   r   r   get_final_text   sL   




r   c                 C   sP   t t| dd dd}g }tt|D ]}||kr |S ||| d  q|S )z"Get the n-best logits from a list.c                 S   s   | d S )NrY   r   xr   r   r   r_   ^  r`   z#_get_best_indexes.<locals>.<lambda>Trb   reverser   )re   rf   ranger-   r   )Zlogitsn_best_sizeZindex_and_scoreZbest_indexesrk   r   r   r   _get_best_indexes\  s   r   c                 C   s|   | sg S d}| D ]}|du s||kr|}qg }d}| D ]}t || }|| ||7 }qg }|D ]	}|||  q2|S )z,Compute softmax probability over raw logits.Nr[   )mathexpr   )rD   Z	max_scoreZscoreZ
exp_scoresZ	total_sumr   probsr   r   r   _compute_softmaxh  s"   

r   c           8      C   s~  |r
t d|  |rt d|  |r |
r t d|  tt}|D ]
}||j | q'i }|D ]}|||j< q6tdg d}t	 }t	 }t	 }t
| D ]\}}|| }g }d}d}d}d}t
|D ]\}}||j }t|j|}t|j|}|
r|jd |jd  } | |k r| }|}|jd }|jd }|D ]M}!|D ]H}"|!t|jkrq|"t|jkrq|!|jvrq|"|jvrq|j|!dsq|"|!k rq|"|! d	 }#|#|krq||||!|"|j|! |j|" d qqqm|
r|||dd||d t|d
d dd}tdg d}$i }%g }&|D ]q}'t|&|kr! ng||'j }|'jdkrt|j|'j|'jd	  }(|j|'j })|j|'j }*|j|)|*d	  }+||(},|, },d|, },d|+}-t|,|-||	}.|.|%v roqd|%|.< nd}.d|%|.< |&|$|.|'j|'jd q|
rd|%vr|&|$d||d t|&d	kr|&d|$dddd |&s|&|$dddd t|&d	k rt dg }/d}0|&D ]}1|/|1j|1j  |0s|1j!r|1}0qt"|/}2g }3t
|&D ]#\}4}1t	 }5|1j!|5d< |2|4 |5d< |1j|5d< |1j|5d< |3|5 qt|3d	k rt d|
s&|3d d ||j#< n||0j |0j }6|6||j#< |6|kr>d||j#< n|0j!||j#< |3||j#< qV|rot$|d}7|7%t&j'|ddd  W d   n	1 sjw   Y  |rt$|d}7|7%t&j'|ddd  W d   n	1 sw   Y  |r|
rt$|d}7|7%t&j'|ddd  W d   |S 1 sw   Y  |S )zHWrite final predictions to the json file and log-odds of null if needed.Writing predictions to: zWriting nbest to: zWriting null_log_odds to: PrelimPrediction)feature_indexstart_index	end_indexstart_logit	end_logit@B r   FrY   c                 S      | j | j S r   )r   r   r   r   r   r   r_         z,compute_predictions_logits.<locals>.<lambda>Tr   NbestPrediction)r
   r   r   r   r   emptyr[   No valid predictionsNr
   probabilityr   r   w   indent
)(r   r   r)   defaultdictlistexample_indexr   	unique_id
namedtuplerP   rf   r   start_logits
end_logitsr-   tokenstoken_to_orig_maptoken_is_max_contextgetre   r   r   r   
doc_tokensconvert_tokens_to_stringstripr   r   r   r   r   insert
ValueErrorr
   r   r8   openwritejsondumps)8all_examplesall_featuresall_resultsr   max_answer_lengthr   output_prediction_fileoutput_nbest_fileoutput_null_log_odds_filer   version_2_with_negativeZnull_score_diff_thresholdr   example_index_to_featuresfeatureunique_id_to_resultresult_PrelimPredictionall_predictionsall_nbest_jsonscores_diff_jsonr   r@   featuresprelim_predictions
score_nullZmin_null_feature_indexZnull_start_logitZnull_end_logitr   Zstart_indexesZend_indexesZfeature_null_scorer   r   length_NbestPredictionseen_predictionsnbestpred
tok_tokensorig_doc_startorig_doc_endorig_tokensr   r   
final_texttotal_scoresbest_non_null_entryentryr   
nbest_jsonrk   output
score_diffwriterr   r   r   compute_predictions_logits  s.  





	

	









r   c           8      C   sR  t dg d}t dg d}td|  t t}|D ]
}||j | qi }|D ]}|||j< q.t 	 }t 	 }t 	 }t
| D ]u\}}|| }g }d}t
|D ]m\}}||j }|j}t||}t|D ]W}t|	D ]P}|j| }|j| } ||	 | }!|j|! }"|j|! }#| |jd krqr|#|jd krqr|j| dsqr|#| k rqr|#|  d }$|$|krqr|||| |#||"d qrqlqWt|d	d
 dd}i }%g }&|D ]n}'t|&|kr ne||'j }|j|'j|'jd  }(|j|'j })|j|'j }*|j|)|*d  }+||(},|, },d|,  },d|+}-t!|dr"|j"}.n|j#}.t$|,|-|.|}/|/|%v r2qd|%|/< |&||/|'j%|'j&d q|&sP|&|dddd g }0d}1|&D ]}2|0|2j%|2j&  |1sf|2}1qVt'|0}3g }4t
|&D ]#\}}2t 	 }5|2j(|5d< |3| |5d< |2j%|5d< |2j&|5d< |4|5 qrt|4dk rt)d|1du rt)d|}6|6||j*< |1j(||j*< |4||j*< qFt+|d}7|7,t-j.|ddd  W d   n	1 sw   Y  t+|d}7|7,t-j.|ddd  W d   n	1 sw   Y  |
r't+|d}7|7,t-j.|ddd  W d   |S 1 s"w   Y  |S )z
    XLNet write prediction logic (more complex than Bert's). Write final predictions to the json file and log-odds of
    null if needed.

    Requires utils_squad_evaluate.py
    r   )r   r   r   start_log_probend_log_probr   )r
   r   r   r   r   rY   Fc                 S   r   r   )r   r   r   r   r   r   r_     r   z/compute_predictions_log_probs.<locals>.<lambda>Tr   r   r   r   g    .Nr
   r   r   r   r   r   r   r   r   )/r)   r   r   r   r   r   r   r   r   rP   rf   Z
cls_logitsminr   r   Zstart_top_indexr   Zend_top_indexZparagraph_lenr   r   re   r-   r   r   r   r   r   r   r   r   r   r   hasattrr   Zdo_lowercase_and_remove_accentr   r   r   r   r
   r   r8   r   r   r   r   )8r   r   r   r   r   r   r   r   Zstart_n_topZ	end_n_topr   r   r   r   r   r   r   r   r   r   r   r   r   r@   r   r   r   r   Zcur_null_scorerk   jr   r   Zj_indexr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   compute_predictions_log_probsN  s   






$










r   r   )Nr'   )F)__doc__r)   r   r   r   r   Zmodels.bertr   utilsr   Z
get_logger__name__r   r!   r"   r&   r1   rA   rH   rR   rV   rn   ry   rz   r{   r   r   r   r   r   r   r   r   r   r   <module>   s8   	

"


^ P