o
    ZhZ                     @   s  d Z ddlZddlZddlmZ ddlmZ ddlmZm	Z	m
Z
 ddlmZ ddlmZmZ d	d
lmZmZmZ e r?ddlZeeZdZ				d2de
ee df dede	e fddZe rwedfdejjdede	e dejjfddZ				d2dee dede	e fddZG dd deZ G dd deZ!G dd deZ"G dd de"Z#G dd  d eZ$G d!d" d"eZ%G d#d$ d$eZ&G d%d& d&eZ'G d'd( d(eZ(G d)d* d*eZ)G d+d, d,eZ*d-dd-d-d	d-d-d-d-d.	Z+e$e"e#e!e%e&e'e(e)e*d/
Z,d0d0d0d0d0d1d0d0d0d0d/
Z-dS )3zGLUE processors and helpers    N)asdict)Enum)ListOptionalUnion   )PreTrainedTokenizer)is_tf_availablelogging   )DataProcessorInputExampleInputFeaturesu  This {0} will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets library. You can have a look at this example script for pointers: https://github.com/huggingface/transformers/blob/main/examples/pytorch/text-classification/run_glue.pyexamplesztf.data.Dataset	tokenizer
max_lengthc                 C   sZ   t tdt t r#t| tjj	r#|du rt
dt| |||dS t| |||||dS )a=  
    Loads a data file into a list of `InputFeatures`

    Args:
        examples: List of `InputExamples` or `tf.data.Dataset` containing the examples.
        tokenizer: Instance of a tokenizer that will tokenize the examples
        max_length: Maximum example length. Defaults to the tokenizer's max_len
        task: GLUE task
        label_list: List of labels. Can be obtained from the processor using the `processor.get_labels()` method
        output_mode: String indicating the output mode. Either `regression` or `classification`

    Returns:
        If the `examples` input is a `tf.data.Dataset`, will return a `tf.data.Dataset` containing the task-specific
        features. If the input is a list of `InputExamples`, will return a list of task-specific `InputFeatures` which
        can be fed to the model.

    functionNzWWhen calling glue_convert_examples_to_features from TF, the task parameter is required.r   task)r   r   
label_listoutput_mode)warningswarnDEPRECATION_WARNINGformatFutureWarningr	   
isinstancetfdataDataset
ValueError%_tf_glue_convert_examples_to_features"_glue_convert_examples_to_features)r   r   r   r   r   r    r#   P/var/www/auris/lib/python3.10/site-packages/transformers/data/processors/glue.py!glue_convert_examples_to_features)   s   r%   returnc                    s   t |  fdd| D } t| |||d |dkrtjntj} fdd}|j}tjj|t	
|tj|fdd |D tg fS )	zb
        Returns:
            A `tf.data.Dataset` containing the task-specific features.

        c                    s   g | ]
}   |qS r#   )Ztfds_mapget_example_from_tensor_dict.0example)	processorr#   r$   
<listcomp>Z   s    z9_tf_glue_convert_examples_to_features.<locals>.<listcomp>r   sts-bc                  3   s:     D ]} dd t |  D }|d}||fV  qd S )Nc                 S   s   i | ]\}}|d ur||qS Nr#   )r)   kvr#   r#   r$   
<dictcomp>`   s    zF_tf_glue_convert_examples_to_features.<locals>.gen.<locals>.<dictcomp>label)r   itemspop)exdr2   )featuresr#   r$   gen^   s   
z2_tf_glue_convert_examples_to_features.<locals>.genc                 S   s   i | ]	}|t d gqS r.   )r   TensorShaper)   r/   r#   r#   r$   r1   i       z9_tf_glue_convert_examples_to_features.<locals>.<dictcomp>)glue_processorsr%   r   Zfloat32Zint64Zmodel_input_namesr   r   Zfrom_generatordictfromkeysZint32r9   )r   r   r   r   Z
label_typer8   Zinput_namesr#   )r7   r+   r$   r!   N   s   
r!   c                    sh  |d u r|j }|d ur6t|  }|d u r#| }td| d|  d u r6t| td d|  dd t|D dtdtt	t
d f ffdd	fd
d| D }|dd | D |ddd g }tt| D ] fdd D }	tdi |	d| i}
||
 qnt| d d D ]\}td td|j  td|   q|S )NzUsing label list z
 for task zUsing output mode c                 S   s   i | ]\}}||qS r#   r#   )r)   ir2   r#   r#   r$   r1      s    z6_glue_convert_examples_to_features.<locals>.<dictcomp>r*   r&   c                    s:   | j d u rd S dkr | j  S dkrt| j S t)Nclassification
regression)r2   floatKeyError)r*   )	label_mapr   r#   r$   label_from_example   s   


z>_glue_convert_examples_to_features.<locals>.label_from_examplec                    s   g | ]} |qS r#   r#   r(   )rE   r#   r$   r,      s    z6_glue_convert_examples_to_features.<locals>.<listcomp>c                 S   s   g | ]}|j |jfqS r#   )text_atext_br(   r#   r#   r$   r,      s    r   T)r   paddingZ
truncationc                    s   i | ]	}| |  qS r#   r#   r:   )batch_encodingr?   r#   r$   r1      r;   r2      z*** Example ***zguid: z
features: r#   )Zmodel_max_lengthr<   
get_labelsloggerinfoglue_output_modes	enumerater   r   intrB   rangelenr   appendguid)r   r   r   r   r   r   r+   labelsr7   Zinputsfeaturer*   r#   )rI   r?   rE   rD   r   r$   r"   m   s:   
"	
r"   c                   @   s   e Zd ZdZdZdS )
OutputModer@   rA   N)__name__
__module____qualname__r@   rA   r#   r#   r#   r$   rW      s    rW   c                       P   e Zd ZdZ fddZdd Zdd Zdd	 Zd
d Zdd Z	dd Z
  ZS )MrpcProcessorz/Processor for the MRPC data set (GLUE version).c                    (   t  j|i | ttdt d S Nr+   super__init__r   r   r   r   r   selfargskwargs	__class__r#   r$   ra         zMrpcProcessor.__init__c                 C   >   t |d  |d  d|d  dt|d  S See base class.idxZ	sentence1utf-8Z	sentence2r2   r   numpydecodestrrc   Ztensor_dictr#   r#   r$   r'         
z*MrpcProcessor.get_example_from_tensor_dictc                 C   s6   t dtj|d  | | tj|ddS )rk   zLOOKING AT 	train.tsvtrain)rL   rM   ospathjoin_create_examples	_read_tsvrc   data_dirr#   r#   r$   get_train_examples   s   z MrpcProcessor.get_train_examplesc                 C      |  | tj|ddS rk   zdev.tsvdevry   rz   rv   rw   rx   r{   r#   r#   r$   get_dev_examples      zMrpcProcessor.get_dev_examplesc                 C   r~   rk   ztest.tsvtestr   r{   r#   r#   r$   get_test_examples   r   zMrpcProcessor.get_test_examplesc                 C      ddgS rk   01r#   rc   r#   r#   r$   rK         zMrpcProcessor.get_labelsc           
   	   C   sl   g }t |D ]-\}}|dkrq| d| }|d }|d }|dkr$dn|d }	|t||||	d q|S )5Creates examples for the training, dev and test sets.r   -r      r   NrT   rF   rG   r2   rO   rS   r   
rc   linesset_typer   r?   linerT   rF   rG   r2   r#   r#   r$   ry      s   zMrpcProcessor._create_examplesrX   rY   rZ   __doc__ra   r'   r}   r   r   rK   ry   __classcell__r#   r#   rf   r$   r\      s    	r\   c                       r[   )MnliProcessorz3Processor for the MultiNLI data set (GLUE version).c                    r]   r^   r_   rb   rf   r#   r$   ra      rh   zMnliProcessor.__init__c                 C   ri   )rk   rl   Zpremiserm   Z
hypothesisr2   rn   rr   r#   r#   r$   r'      rs   z*MnliProcessor.get_example_from_tensor_dictc                 C   r~   rk   rt   ru   r   r{   r#   r#   r$   r}      r   z MnliProcessor.get_train_examplesc                 C   r~   )rk   zdev_matched.tsvZdev_matchedr   r{   r#   r#   r$   r      r   zMnliProcessor.get_dev_examplesc                 C   r~   )rk   ztest_matched.tsvZtest_matchedr   r{   r#   r#   r$   r      r   zMnliProcessor.get_test_examplesc                 C   s   g dS )rk   )Zcontradiction
entailmentZneutralr#   r   r#   r#   r$   rK      r   zMnliProcessor.get_labelsc           
   	   C   sr   g }t |D ]0\}}|dkrq| d|d  }|d }|d }|dr'dn|d }	|t||||	d q|S )	r   r   r      	   r   Nr   )rO   
startswithrS   r   r   r#   r#   r$   ry      s   zMnliProcessor._create_examplesr   r#   r#   rf   r$   r          	r   c                       s0   e Zd ZdZ fddZdd Zdd Z  ZS )MnliMismatchedProcessorz>Processor for the MultiNLI Mismatched data set (GLUE version).c                    r]   r^   r_   rb   rf   r#   r$   ra   	  rh   z MnliMismatchedProcessor.__init__c                 C   r~   )rk   zdev_mismatched.tsvZdev_mismatchedr   r{   r#   r#   r$   r     r   z(MnliMismatchedProcessor.get_dev_examplesc                 C   r~   )rk   ztest_mismatched.tsvZtest_mismatchedr   r{   r#   r#   r$   r     r   z)MnliMismatchedProcessor.get_test_examples)rX   rY   rZ   r   ra   r   r   r   r#   r#   rf   r$   r     s
    r   c                       r[   )ColaProcessorz/Processor for the CoLA data set (GLUE version).c                    r]   r^   r_   rb   rf   r#   r$   ra     rh   zColaProcessor.__init__c                 C   0   t |d  |d  ddt|d  S rk   rl   sentencerm   Nr2   rn   rr   r#   r#   r$   r'        
z*ColaProcessor.get_example_from_tensor_dictc                 C   r~   r   r   r{   r#   r#   r$   r}   &  r   z ColaProcessor.get_train_examplesc                 C   r~   r   r   r{   r#   r#   r$   r   *  r   zColaProcessor.get_dev_examplesc                 C   r~   r   r   r{   r#   r#   r$   r   .  r   zColaProcessor.get_test_examplesc                 C   r   r   r#   r   r#   r#   r$   rK   2  r   zColaProcessor.get_labelsc              	   C   sz   |dk}|r|dd }|rdnd}g }t |D ]"\}}| d| }|| }	|r+dn|d }
|t||	d|
d q|S )r   r   r   Nr   r   r   r   )rc   r   r   	test_mode
text_indexr   r?   r   rT   rF   r2   r#   r#   r$   ry   6  s   zColaProcessor._create_examplesr   r#   r#   rf   r$   r     r   r   c                       r[   )Sst2Processorz0Processor for the SST-2 data set (GLUE version).c                    r]   r^   r_   rb   rf   r#   r$   ra   H  rh   zSst2Processor.__init__c                 C   r   r   rn   rr   r#   r#   r$   r'   L  r   z*Sst2Processor.get_example_from_tensor_dictc                 C   r~   r   r   r{   r#   r#   r$   r}   U  r   z Sst2Processor.get_train_examplesc                 C   r~   r   r   r{   r#   r#   r$   r   Y  r   zSst2Processor.get_dev_examplesc                 C   r~   r   r   r{   r#   r#   r$   r   ]  r   zSst2Processor.get_test_examplesc                 C   r   r   r#   r   r#   r#   r$   rK   a  r   zSst2Processor.get_labelsc           
   	   C   st   g }|dkrdnd}t |D ])\}}|dkrq| d| }|| }|dkr(dn|d }	|t||d|	d q|S )r   r   r   r   r   Nr   r   )
rc   r   r   r   r   r?   r   rT   rF   r2   r#   r#   r$   ry   e  s   zSst2Processor._create_examplesr   r#   r#   rf   r$   r   E  r   r   c                       r[   )StsbProcessorz0Processor for the STS-B data set (GLUE version).c                    r]   r^   r_   rb   rf   r#   r$   ra   v  rh   zStsbProcessor.__init__c                 C   ri   rj   rn   rr   r#   r#   r$   r'   z  rs   z*StsbProcessor.get_example_from_tensor_dictc                 C   r~   r   r   r{   r#   r#   r$   r}     r   z StsbProcessor.get_train_examplesc                 C   r~   r   r   r{   r#   r#   r$   r     r   zStsbProcessor.get_dev_examplesc                 C   r~   r   r   r{   r#   r#   r$   r     r   zStsbProcessor.get_test_examplesc                 C   s   dgS )rk   Nr#   r   r#   r#   r$   rK     s   zStsbProcessor.get_labelsc           
   	   C   p   g }t |D ]/\}}|dkrq| d|d  }|d }|d }|dkr&dn|d }	|t||||	d q|S )	r   r   r      r   r   Nr   r   r   r   r#   r#   r$   ry        zStsbProcessor._create_examplesr   r#   r#   rf   r$   r   s  r   r   c                       r[   )QqpProcessorz.Processor for the QQP data set (GLUE version).c                    r]   r^   r_   rb   rf   r#   r$   ra     rh   zQqpProcessor.__init__c                 C   ri   )rk   rl   Z	question1rm   Z	question2r2   rn   rr   r#   r#   r$   r'     rs   z)QqpProcessor.get_example_from_tensor_dictc                 C   r~   r   r   r{   r#   r#   r$   r}     r   zQqpProcessor.get_train_examplesc                 C   r~   r   r   r{   r#   r#   r$   r     r   zQqpProcessor.get_dev_examplesc                 C   r~   r   r   r{   r#   r#   r$   r     r   zQqpProcessor.get_test_examplesc                 C   r   r   r#   r   r#   r#   r$   rK     r   zQqpProcessor.get_labelsc              	   C   s   |dk}|rdnd}|rdnd}g }t |D ]9\}}|dkrq| d|d  }	z|| }
|| }|r5dn|d	 }W n	 tyC   Y qw |t|	|
||d
 q|S )r   r   r   r      r   r   r   NrJ   r   )rO   
IndexErrorrS   r   )rc   r   r   r   Zq1_indexZq2_indexr   r?   r   rT   rF   rG   r2   r#   r#   r$   ry     s"   zQqpProcessor._create_examplesr   r#   r#   rf   r$   r     r   r   c                       r[   )QnliProcessorz/Processor for the QNLI data set (GLUE version).c                    r]   r^   r_   rb   rf   r#   r$   ra     rh   zQnliProcessor.__init__c                 C   ri   )rk   rl   questionrm   r   r2   rn   rr   r#   r#   r$   r'     rs   z*QnliProcessor.get_example_from_tensor_dictc                 C   r~   r   r   r{   r#   r#   r$   r}     r   z QnliProcessor.get_train_examplesc                 C   r~   r   r   r{   r#   r#   r$   r     r   zQnliProcessor.get_dev_examplesc                 C   r~   r   r   r{   r#   r#   r$   r     r   zQnliProcessor.get_test_examplesc                 C   r   rk   r   Znot_entailmentr#   r   r#   r#   r$   rK     r   zQnliProcessor.get_labelsc           
   	   C   r   	r   r   r   r   r   r   Nr   r   r   r   r#   r#   r$   ry     r   zQnliProcessor._create_examplesr   r#   r#   rf   r$   r     r   r   c                       r[   )RteProcessorz.Processor for the RTE data set (GLUE version).c                    r]   r^   r_   rb   rf   r#   r$   ra     rh   zRteProcessor.__init__c                 C   ri   rj   rn   rr   r#   r#   r$   r'   
  rs   z)RteProcessor.get_example_from_tensor_dictc                 C   r~   r   r   r{   r#   r#   r$   r}     r   zRteProcessor.get_train_examplesc                 C   r~   r   r   r{   r#   r#   r$   r     r   zRteProcessor.get_dev_examplesc                 C   r~   r   r   r{   r#   r#   r$   r     r   zRteProcessor.get_test_examplesc                 C   r   r   r#   r   r#   r#   r$   rK     r   zRteProcessor.get_labelsc           
   	   C   r   r   r   r   r#   r#   r$   ry   #  r   zRteProcessor._create_examplesr   r#   r#   rf   r$   r     r   r   c                       r[   )WnliProcessorz/Processor for the WNLI data set (GLUE version).c                    r]   r^   r_   rb   rf   r#   r$   ra   4  rh   zWnliProcessor.__init__c                 C   ri   rj   rn   rr   r#   r#   r$   r'   8  rs   z*WnliProcessor.get_example_from_tensor_dictc                 C   r~   r   r   r{   r#   r#   r$   r}   A  r   z WnliProcessor.get_train_examplesc                 C   r~   r   r   r{   r#   r#   r$   r   E  r   zWnliProcessor.get_dev_examplesc                 C   r~   r   r   r{   r#   r#   r$   r   I  r   zWnliProcessor.get_test_examplesc                 C   r   r   r#   r   r#   r#   r$   rK   M  r   zWnliProcessor.get_labelsc           
   	   C   r   r   r   r   r#   r#   r$   ry   Q  r   zWnliProcessor._create_examplesr   r#   r#   rf   r$   r   1  r   r   r   )	colamnlimrpcsst-2r-   qqpqnlirtewnli)
r   r   zmnli-mmr   r   r-   r   r   r   r   r@   rA   )NNNN).r   rv   r   dataclassesr   enumr   typingr   r   r   Ztokenization_utilsr   utilsr	   r
   r   r   r   Z
tensorflowr   Z
get_loggerrX   rL   r   rP   r%   rq   r   r   r!   r"   rW   r\   r   r   r   r   r   r   r   r   r   Zglue_tasks_num_labelsr<   rN   r#   r#   r#   r$   <module>   s   


#
"
7/./..4../
