
    eThZ                        S r SSKrSSKrSSKJr  SSKJr  SSKJrJ	r	J
r
  SSKJr  SSKJrJr  S	S
KJrJrJr  \" 5       (       a  SSKr\R*                  " \5      rSr    S/S\
\\   S4   S\S\	\   4S jjr\" 5       (       a<  \S4S\R8                  R:                  S\S\	\   S\R8                  R:                  4S jjr    S/S\\   S\S\	\   4S jjr " S S\5      r  " S S\5      r! " S S\5      r" " S S\"5      r# " S S\5      r$ " S S\5      r% " S  S!\5      r& " S" S#\5      r' " S$ S%\5      r( " S& S'\5      r) " S( S)\5      r*S*SS*S*S	S*S*S*S*S+.	r+\$\"\#\!\%\&\'\(\)\*S,.
r,S-S-S-S-S-S.S-S-S-S-S,.
r-g)0zGLUE processors and helpers    N)asdict)Enum)ListOptionalUnion   )PreTrainedTokenizer)is_tf_availablelogging   )DataProcessorInputExampleInputFeaturesu  This {0} will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets library. You can have a look at this example script for pointers: https://github.com/huggingface/transformers/blob/main/examples/pytorch/text-classification/run_glue.pyexamplesztf.data.Dataset	tokenizer
max_lengthc           	         [         R                  " [        R                  S5      [        5        [        5       (       aA  [        U [        R                  R                  5      (       a  Uc  [        S5      e[        XX#S9$ [        XX#XES9$ )a	  
Loads a data file into a list of `InputFeatures`

Args:
    examples: List of `InputExamples` or `tf.data.Dataset` containing the examples.
    tokenizer: Instance of a tokenizer that will tokenize the examples
    max_length: Maximum example length. Defaults to the tokenizer's max_len
    task: GLUE task
    label_list: List of labels. Can be obtained from the processor using the `processor.get_labels()` method
    output_mode: String indicating the output mode. Either `regression` or `classification`

Returns:
    If the `examples` input is a `tf.data.Dataset`, will return a `tf.data.Dataset` containing the task-specific
    features. If the input is a list of `InputExamples`, will return a list of task-specific `InputFeatures` which
    can be fed to the model.

functionzWWhen calling glue_convert_examples_to_features from TF, the task parameter is required.r   task)r   r   
label_listoutput_mode)warningswarnDEPRECATION_WARNINGformatFutureWarningr
   
isinstancetfdataDataset
ValueError%_tf_glue_convert_examples_to_features"_glue_convert_examples_to_features)r   r   r   r   r   r   s         Y/var/www/auris/envauris/lib/python3.13/site-packages/transformers/data/processors/glue.py!glue_convert_examples_to_featuresr&   )   sn    2 MM%,,Z8-HZ"''//BB<vww4XU_kk-
*     returnc                 ,  ^
 [         U   " 5       nU  Vs/ s H"  oTR                  UR                  U5      5      PM$     n n[        XX2S9m
US:X  a  [        R
                  O[        R                  nU
4S jnUR                  n[        R                  R                  R                  U[        R                  U[        R                  5      U4U V	s0 s H  o[        R                  " S/5      _M     sn	[        R                  " / 5      45      $ s  snf s  sn	f )zJ
Returns:
    A `tf.data.Dataset` containing the task-specific features.

r   sts-bc               3      >#    T HK  n [        U 5      R                  5        VVs0 s H  u  pUc  M
  X_M     nnnUR                  S5      nX44v   MM     g s  snnf 7f)Nlabel)r   itemspop)exkvdr,   featuress        r%   gen2_tf_glue_convert_examples_to_features.<locals>.gen^   sS     &,Rj&6&6&8J&8daATQT&8Jgj  Js   "A	AA$AN)glue_processorstfds_mapget_example_from_tensor_dictr&   r   float32int64model_input_namesr    r!   from_generatordictfromkeysint32TensorShape)r   r   r   r   	processorexample
label_typer4   input_namesr0   r3   s             @r%   r#   r#   N   s     $D)+	gopgo\c&&y'M'Mg'VWgop4XU_k#'7?RZZ
	!  11ww--]];1:>1<=A''=r~~b?QR
 	
 q >s   )D!Dc                   ^^ Uc  UR                   nUbc  [        U   " 5       nUc+  UR                  5       n[        R	                  SU SU 35        Tc$  [
        U   m[        R	                  ST SU 35        [        U5       VVs0 s H  u  pxX_M	     snnmS[        S[        [        [        S 4   4UU4S jjn	U  V
s/ s H
  o" U
5      PM     nn
U" U  V
s/ s H  oR                  U
R                  4PM     sn
USSS	9n/ n[        [        U 5      5       H<  nU Vs0 s H
  oX   U   _M     nn[        S0 UDS
X   0D6nUR!                  U5        M>     [        U S S 5       HV  u  pz[        R	                  S5        [        R	                  SU
R"                   35        [        R	                  SX    35        MX     U$ s  snnf s  sn
f s  sn
f s  snf )NzUsing label list z
 for task zUsing output mode rB   r(   c                    > U R                   c  g TS:X  a  TU R                      $ TS:X  a  [        U R                   5      $ [        T5      e)Nclassification
regression)r,   floatKeyError)rB   	label_mapr   s    r%   label_from_example>_glue_convert_examples_to_features.<locals>.label_from_example   sJ    == **W]]++L(''{##r'   r   T)r   padding
truncationr,      z*** Example ***zguid: z
features:  )model_max_lengthr6   
get_labelsloggerinfoglue_output_modes	enumerater   r   intrI   text_atext_brangelenr   appendguid)r   r   r   r   r   r   rA   ir,   rL   rB   labelsbatch_encodingr3   r0   inputsfeaturerK   s        `           @r%   r$   r$   m   s    //
#D)+	"--/JKK+J<z$HI+D1KKK,[MD6JK*3J*?@*?ha*?@I$L $U3t;K5L $ $ :BBg )FB9ABg..'..	)B	N H3x=!3AB>a^&q))>B:&:	: 	 "  !-
%&fW\\N+,j./ .
 OA A C 	C Cs   G?G!GGc                       \ rS rSrSrSrSrg)
OutputMode   rG   rH   rQ   N)__name__
__module____qualname____firstlineno__rG   rH   __static_attributes__rQ   r'   r%   re   re      s    %NJr'   re   c                   P   ^  \ rS rSrSrU 4S jrS rS rS rS r	S r
S	 rS
rU =r$ )MrpcProcessor   z/Processor for the MRPC data set (GLUE version).c                    > [         TU ]  " U0 UD6  [        R                  " [        R                  S5      [        5        g NrA   super__init__r   r   r   r   r   selfargskwargs	__class__s      r%   rs   MrpcProcessor.__init__   /    $)&))00=}Mr'   c           	          [        US   R                  5       US   R                  5       R                  S5      US   R                  5       R                  S5      [        US   R                  5       5      5      $ See base class.idx	sentence1utf-8	sentence2r,   r   numpydecodestrru   tensor_dicts     r%   r8   *MrpcProcessor.get_example_from_tensor_dict   n    $$&$**,33G<$**,33G<G$**,-	
 	
r'   c                     [         R                  S[        R                  R	                  US5       35        U R                  U R                  [        R                  R	                  US5      5      S5      $ )r}   zLOOKING AT 	train.tsvtrain)rT   rU   ospathjoin_create_examples	_read_tsvru   data_dirs     r%   get_train_examples MrpcProcessor.get_train_examples   sQ    k"'',,x"E!FGH$$T^^BGGLL;4W%XZabbr'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r}   zdev.tsvdevr   r   r   r   r   r   s     r%   get_dev_examplesMrpcProcessor.get_dev_examples   .    $$T^^BGGLL94U%VX]^^r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r}   ztest.tsvtestr   r   s     r%   get_test_examplesMrpcProcessor.get_test_examples   .    $$T^^BGGLL:4V%WY_``r'   c                 
    SS/$ r}   01rQ   ru   s    r%   rS   MrpcProcessor.get_labels       Szr'   c           
          / n[        U5       HD  u  pEUS:X  a  M  U SU 3nUS   nUS   nUS:X  a  SOUS   n	UR                  [        XgXS95        MF     U$ )5Creates examples for the training, dev and test sets.r   -r      r   Nr^   rY   rZ   r,   rW   r]   r   
ru   linesset_typer   r_   liner^   rY   rZ   r,   s
             r%   r   MrpcProcessor._create_examples   sp     'GAAvZq$D!WF!WF$.DDGEOOLd&^_ ( r'   rQ   rg   rh   ri   rj   __doc__rs   r8   r   r   r   rS   r   rk   __classcell__rx   s   @r%   rm   rm      s2    9N
c
_a r'   rm   c                   P   ^  \ rS rSrSrU 4S jrS rS rS rS r	S r
S	 rS
rU =r$ )MnliProcessor   z3Processor for the MultiNLI data set (GLUE version).c                    > [         TU ]  " U0 UD6  [        R                  " [        R                  S5      [        5        g rp   rq   rt   s      r%   rs   MnliProcessor.__init__   rz   r'   c           	          [        US   R                  5       US   R                  5       R                  S5      US   R                  5       R                  S5      [        US   R                  5       5      5      $ )r}   r~   premiser   
hypothesisr,   r   r   s     r%   r8   *MnliProcessor.get_example_from_tensor_dict   sn    $$&	"((*11':%++-44W=G$**,-	
 	
r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r}   r   r   r   r   s     r%   r    MnliProcessor.get_train_examples   .    $$T^^BGGLL;4W%XZabbr'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ )r}   zdev_matched.tsvdev_matchedr   r   s     r%   r   MnliProcessor.get_dev_examples   s/    $$T^^BGGLLK\4]%^`mnnr'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ )r}   ztest_matched.tsvtest_matchedr   r   s     r%   r   MnliProcessor.get_test_examples   s/    $$T^^BGGLLK]4^%_aoppr'   c                 
    / SQ$ )r}   )contradiction
entailmentneutralrQ   r   s    r%   rS   MnliProcessor.get_labels   s    99r'   c           
          / n[        U5       HW  u  pEUS:X  a  M  U SUS    3nUS   nUS   nUR                  S5      (       a  SOUS   n	UR                  [        XgXS95        MY     U$ )	r   r   r      	   r   Nr   )rW   
startswithr]   r   r   s
             r%   r   MnliProcessor._create_examples   s}     'GAAvZqa	*D!WF!WF$//77DT"XEOOLd&^_ ( r'   rQ   r   r   s   @r%   r   r      s2    =N
coq: r'   r   c                   8   ^  \ rS rSrSrU 4S jrS rS rSrU =r	$ )MnliMismatchedProcessori  z>Processor for the MultiNLI Mismatched data set (GLUE version).c                    > [         TU ]  " U0 UD6  [        R                  " [        R                  S5      [        5        g rp   rq   rt   s      r%   rs    MnliMismatchedProcessor.__init__	  rz   r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ )r}   zdev_mismatched.tsvdev_mismatchedr   r   s     r%   r   (MnliMismatchedProcessor.get_dev_examples  s/    $$T^^BGGLLK_4`%acsttr'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ )r}   ztest_mismatched.tsvtest_mismatchedr   r   s     r%   r   )MnliMismatchedProcessor.get_test_examples  s/    $$T^^BGGLLK`4a%bduvvr'   rQ   )
rg   rh   ri   rj   r   rs   r   r   rk   r   r   s   @r%   r   r     s    HNuw wr'   r   c                   P   ^  \ rS rSrSrU 4S jrS rS rS rS r	S r
S	 rS
rU =r$ )ColaProcessori  z/Processor for the CoLA data set (GLUE version).c                    > [         TU ]  " U0 UD6  [        R                  " [        R                  S5      [        5        g rp   rq   rt   s      r%   rs   ColaProcessor.__init__  rz   r'   c           	          [        US   R                  5       US   R                  5       R                  S5      S[        US   R                  5       5      5      $ r}   r~   sentencer   Nr,   r   r   s     r%   r8   *ColaProcessor.get_example_from_tensor_dict  U    $$&
#))+227;G$**,-	
 	
r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r   r   r   s     r%   r    ColaProcessor.get_train_examples&  r   r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r   r   r   s     r%   r   ColaProcessor.get_dev_examples*  r   r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r   r   r   s     r%   r   ColaProcessor.get_test_examples.  r   r'   c                 
    SS/$ r   rQ   r   s    r%   rS   ColaProcessor.get_labels2  r   r'   c           
          US:H  nU(       a  USS nU(       a  SOSn/ n[        U5       H8  u  pgU SU 3nXt   n	U(       a  SOUS   n
UR                  [        XSU
S95        M:     U$ )r   r   r   Nr   r   r   r   )ru   r   r   	test_mode
text_indexr   r_   r   r^   rY   r,   s              r%   r   ColaProcessor._create_examples6  s{    &	!"IE#Q
 'GAZq$D%F%D47EOOLd$V[\]	 (
 r'   rQ   r   r   s   @r%   r   r     s2    9N
c_a r'   r   c                   P   ^  \ rS rSrSrU 4S jrS rS rS rS r	S r
S	 rS
rU =r$ )Sst2ProcessoriE  z0Processor for the SST-2 data set (GLUE version).c                    > [         TU ]  " U0 UD6  [        R                  " [        R                  S5      [        5        g rp   rq   rt   s      r%   rs   Sst2Processor.__init__H  rz   r'   c           	          [        US   R                  5       US   R                  5       R                  S5      S[        US   R                  5       5      5      $ r   r   r   s     r%   r8   *Sst2Processor.get_example_from_tensor_dictL  r   r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r   r   r   s     r%   r    Sst2Processor.get_train_examplesU  r   r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r   r   r   s     r%   r   Sst2Processor.get_dev_examplesY  r   r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r   r   r   s     r%   r   Sst2Processor.get_test_examples]  r   r'   c                 
    SS/$ r   rQ   r   s    r%   rS   Sst2Processor.get_labelsa  r   r'   c           
          / nUS:X  a  SOSn[        U5       H?  u  pVUS:X  a  M  U SU 3nXd   nUS:X  a  SOUS   n	UR                  [        XxSU	S95        MA     U$ )r   r   r   r   r   Nr   r   )
ru   r   r   r   r   r_   r   r^   rY   r,   s
             r%   r   Sst2Processor._create_examplese  sw    "f,Q!
 'GAAvZq$D%F$.DDGEOOLd$V[\] ( r'   rQ   r   r   s   @r%   r   r   E  s2    :N
c_a r'   r   c                   P   ^  \ rS rSrSrU 4S jrS rS rS rS r	S r
S	 rS
rU =r$ )StsbProcessoris  z0Processor for the STS-B data set (GLUE version).c                    > [         TU ]  " U0 UD6  [        R                  " [        R                  S5      [        5        g rp   rq   rt   s      r%   rs   StsbProcessor.__init__v  rz   r'   c           	          [        US   R                  5       US   R                  5       R                  S5      US   R                  5       R                  S5      [        US   R                  5       5      5      $ r|   r   r   s     r%   r8   *StsbProcessor.get_example_from_tensor_dictz  r   r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r   r   r   s     r%   r    StsbProcessor.get_train_examples  r   r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r   r   r   s     r%   r   StsbProcessor.get_dev_examples  r   r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r   r   r   s     r%   r   StsbProcessor.get_test_examples  r   r'   c                     S/$ )r}   NrQ   r   s    r%   rS   StsbProcessor.get_labels  s	    vr'   c           
          / n[        U5       HG  u  pEUS:X  a  M  U SUS    3nUS   nUS   nUS:X  a  SOUS   n	UR                  [        XgXS95        MI     U$ )	r   r   r      r   r   Nr   r   r   r   s
             r%   r   StsbProcessor._create_examples  t     'GAAvZqa	*D!WF!WF$.DDHEOOLd&^_ ( r'   rQ   r   r   s   @r%   r  r  s  s2    :N
c_a r'   r  c                   P   ^  \ rS rSrSrU 4S jrS rS rS rS r	S r
S	 rS
rU =r$ )QqpProcessori  z.Processor for the QQP data set (GLUE version).c                    > [         TU ]  " U0 UD6  [        R                  " [        R                  S5      [        5        g rp   rq   rt   s      r%   rs   QqpProcessor.__init__  rz   r'   c           	          [        US   R                  5       US   R                  5       R                  S5      US   R                  5       R                  S5      [        US   R                  5       5      5      $ )r}   r~   	question1r   	question2r,   r   r   s     r%   r8   )QqpProcessor.get_example_from_tensor_dict  r   r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r   r   r   s     r%   r   QqpProcessor.get_train_examples  r   r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r   r   r   s     r%   r   QqpProcessor.get_dev_examples  r   r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r   r   r   s     r%   r   QqpProcessor.get_test_examples  r   r'   c                 
    SS/$ r   rQ   r   s    r%   rS   QqpProcessor.get_labels  r   r'   c           
         US:H  nU(       a  SOSnU(       a  SOSn/ n[        U5       HG  u  pxUS:X  a  M  U SUS    3n	 X   n
X   nU(       a  SOUS	   nUR                  [        XXS
95        MI     U$ ! [         a     MZ  f = f)r   r   r   r      r   r   r   NrP   r   )rW   
IndexErrorr]   r   )ru   r   r   r   q1_indexq2_indexr   r_   r   r^   rY   rZ   r,   s                r%   r   QqpProcessor._create_examples  s    &	!1q!1q 'GAAvZqa	*D )tAw OOLd&^_ (   s   A66
BBrQ   r   r   s   @r%   r  r    s2    8N
c_a r'   r  c                   P   ^  \ rS rSrSrU 4S jrS rS rS rS r	S r
S	 rS
rU =r$ )QnliProcessori  z/Processor for the QNLI data set (GLUE version).c                    > [         TU ]  " U0 UD6  [        R                  " [        R                  S5      [        5        g rp   rq   rt   s      r%   rs   QnliProcessor.__init__  rz   r'   c           	          [        US   R                  5       US   R                  5       R                  S5      US   R                  5       R                  S5      [        US   R                  5       5      5      $ )r}   r~   questionr   r   r,   r   r   s     r%   r8   *QnliProcessor.get_example_from_tensor_dict  sn    $$&
#))+227;
#))+227;G$**,-	
 	
r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r   r   r   s     r%   r    QnliProcessor.get_train_examples  r   r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r   r   r   s     r%   r   QnliProcessor.get_dev_examples  r   r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r   r   r   s     r%   r   QnliProcessor.get_test_examples  r   r'   c                 
    SS/$ r}   r   not_entailmentrQ   r   s    r%   rS   QnliProcessor.get_labels      .//r'   c           
          / n[        U5       HG  u  pEUS:X  a  M  U SUS    3nUS   nUS   nUS:X  a  SOUS   n	UR                  [        XgXS95        MI     U$ 	r   r   r   r   r)  r   Nr   r   r   r   s
             r%   r   QnliProcessor._create_examples  r  r'   rQ   r   r   s   @r%   r/  r/    s2    9N
c_a0 r'   r/  c                   P   ^  \ rS rSrSrU 4S jrS rS rS rS r	S r
S	 rS
rU =r$ )RteProcessori  z.Processor for the RTE data set (GLUE version).c                    > [         TU ]  " U0 UD6  [        R                  " [        R                  S5      [        5        g rp   rq   rt   s      r%   rs   RteProcessor.__init__  rz   r'   c           	          [        US   R                  5       US   R                  5       R                  S5      US   R                  5       R                  S5      [        US   R                  5       5      5      $ r|   r   r   s     r%   r8   )RteProcessor.get_example_from_tensor_dict
  r   r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r   r   r   s     r%   r   RteProcessor.get_train_examples  r   r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r   r   r   s     r%   r   RteProcessor.get_dev_examples  r   r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r   r   r   s     r%   r   RteProcessor.get_test_examples  r   r'   c                 
    SS/$ r<  rQ   r   s    r%   rS   RteProcessor.get_labels  r?  r'   c           
          / n[        U5       HG  u  pEUS:X  a  M  U SUS    3nUS   nUS   nUS:X  a  SOUS   n	UR                  [        XgXS95        MI     U$ rA  r   r   s
             r%   r   RteProcessor._create_examples#  r  r'   rQ   r   r   s   @r%   rD  rD    s2    8N
c_a0 r'   rD  c                   P   ^  \ rS rSrSrU 4S jrS rS rS rS r	S r
S	 rS
rU =r$ )WnliProcessori1  z/Processor for the WNLI data set (GLUE version).c                    > [         TU ]  " U0 UD6  [        R                  " [        R                  S5      [        5        g rp   rq   rt   s      r%   rs   WnliProcessor.__init__4  rz   r'   c           	          [        US   R                  5       US   R                  5       R                  S5      US   R                  5       R                  S5      [        US   R                  5       5      5      $ r|   r   r   s     r%   r8   *WnliProcessor.get_example_from_tensor_dict8  r   r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r   r   r   s     r%   r    WnliProcessor.get_train_examplesA  r   r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r   r   r   s     r%   r   WnliProcessor.get_dev_examplesE  r   r'   c                     U R                  U R                  [        R                  R	                  US5      5      S5      $ r   r   r   s     r%   r   WnliProcessor.get_test_examplesI  r   r'   c                 
    SS/$ r   rQ   r   s    r%   rS   WnliProcessor.get_labelsM  r   r'   c           
          / n[        U5       HG  u  pEUS:X  a  M  U SUS    3nUS   nUS   nUS:X  a  SOUS   n	UR                  [        XgXS95        MI     U$ rA  r   r   s
             r%   r   WnliProcessor._create_examplesQ  r  r'   rQ   r   r   s   @r%   rT  rT  1  s2    9N
c_a r'   rT  r)  )	colamnlimrpcsst-2r*   qqpqnlirtewnli)
rc  rd  zmnli-mmre  rf  r*   rg  rh  ri  rj  rG   rH   )NNNN).r   r   r   dataclassesr   enumr   typingr   r   r   tokenization_utilsr	   utilsr
   r   r   r   r   
tensorflowr   
get_loggerrg   rT   r   rX   r&   r   r    r!   r#   r$   re   rm   r   r   r   r   r  r  r/  rD  rT  glue_tasks_num_labelsr6   rV   rQ   r'   r%   <module>rs     s    " 	    ( ( 5 - = = 			H	%m  !%	 D&(99: "   F 
 $(	
''//
&
 SM	

 

D !%	4< 4"4 4n 
,M ,^+M +\wm w ,M ,^+M +\+M +\1= 1h+M +\+= +\+M +^ 
  &  r'   