
    eThk                        S SK r S SKrS SKrS SKJrJr  S SKJr  S SKJ	r	J
r
Jr  S SKrS SKJr  S SKJr  SSKJr  SS	KJrJr  S
SKJrJrJr  S
SKJr  \R8                  " \5      r\ " S S5      5       r " S S\5      r  " S S\5      r!g)    N)	dataclassfield)Enum)ListOptionalUnion)FileLock)Dataset   )PreTrainedTokenizerBase)check_torch_load_is_safelogging   )!glue_convert_examples_to_featuresglue_output_modesglue_processors)InputFeaturesc                       \ rS rSr% Sr\" SSSR                  \R                  " 5       5      -   0S9r	\
\S'   \" SS0S9r\
\S	'   \" S
SS0S9r\\S'   \" SSS0S9r\\S'   S rSrg)GlueDataTrainingArguments#   z
Arguments pertaining to what data we are going to input our model for training and eval.

Using `HfArgumentParser` we can turn this class into argparse arguments to be able to specify them on the command
line.
helpz"The name of the task to train on: z, )metadata	task_namezUThe input data dir. Should contain the .tsv files (or other data files) for the task.data_dir   zThe maximum total input sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded.)defaultr   max_seq_lengthFz1Overwrite the cached training and evaluation setsoverwrite_cachec                 B    U R                   R                  5       U l         g N)r   lowerselfs    W/var/www/auris/envauris/lib/python3.13/site-packages/transformers/data/datasets/glue.py__post_init__'GlueDataTrainingArguments.__post_init__=   s    --/    )r   N)__name__
__module____qualname____firstlineno____doc__r   joinr   keysr   str__annotations__r   r   intr   boolr%   __static_attributes__ r'   r$   r   r   #   s     V-QTXT]T]^m^r^r^tTu-u$vwIswqrHc   Q
NC  ")\ ]OT 0r'   r   c                        \ rS rSrSrSrSrSrg)SplitA   traindevtestr4   N)r(   r)   r*   r+   r8   r9   r:   r3   r4   r'   r$   r6   r6   A   s    E
CDr'   r6   c                       \ rS rSr% Sr\\S'   \\S'   \\	   \S'   S\
R                  S4S\S\S\\   S	\\\
4   S
\\   4
S jjrS rS\	4S jrS rSrg)GlueDatasetG   z@
This will be superseded by a framework-agnostic approach soon.
argsoutput_modefeaturesN	tokenizerlimit_lengthmode	cache_dirc                    [         R                  " S[        5        Xl        [        UR
                     " 5       U l        [        UR
                     U l        [        U[        5      (       a
   [        U   n[        R                  R                  Ub  UOUR                   SUR"                   SUR$                  R&                   SUR(                   SUR
                   35      nU R                  R+                  5       nUR
                  S;   a+  UR$                  R&                  S;   a  US   US   sUS'   US'   Xpl        US	-   n[/        U5         [        R                  R1                  U5      (       a|  UR2                  (       dk  [4        R4                  " 5       n	[7        5         [8        R:                  " US
S9U l        [>        RA                  SU S3[4        R4                  " 5       U	-
  5        GOR[>        RA                  SUR                    35        U[        RB                  :X  a&  U R                  RE                  UR                   5      n
O_U[        RF                  :X  a&  U R                  RI                  UR                   5      n
O%U R                  RK                  UR                   5      n
Ub  U
S U n
[M        U
UUR(                  UU R                  S9U l        [4        R4                  " 5       n	[8        RN                  " U R<                  U5        [>        RA                  SU S[4        R4                  " 5       U	-
  S S35        S S S 5        g ! [         a    [        S5      ef = f! , (       d  f       g = f)Nu  This dataset will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets library. You can have a look at this example script for pointers: https://github.com/huggingface/transformers/blob/main/examples/pytorch/text-classification/run_glue.pyzmode is not a valid split namecached__)mnlizmnli-mm)RobertaTokenizerRobertaTokenizerFastXLMRobertaTokenizerBartTokenizerBartTokenizerFastr      z.lockT)weights_onlyz"Loading features from cached file z [took %.3f s]z'Creating features from dataset file at )
max_length
label_listr?   z!Saving features into cached file z [took z.3fz s])(warningswarnFutureWarningr>   r   r   	processorr   r?   
isinstancer/   r6   KeyErrorospathr-   r   value	__class__r(   r   
get_labelsrQ   r	   existsr   timer   torchloadr@   loggerinfor9   get_dev_examplesr:   get_test_examplesget_train_examplesr   save)r#   r>   rA   rB   rC   rD   cached_features_filerQ   	lock_pathstartexampless              r$   __init__GlueDataset.__init__P   s    	u 		
 	(8:,T^^<dC  AT{  "ww||".IDMMdjj\9#6#6#?#?"@$BUBUAVVWX\XfXfWgh 
 ^^..0
>>00Y5H5H5Q5Q V
 6
 ,6a=*Q-(JqM:a=$ )72	i ww~~233D<P<P		(* %

+?d S89M8Nn]_c_h_h_jmr_r Edmm_UV599$#~~>>t}}MHUZZ'#~~??NH#~~@@OH+'6H A#22) $ 0 0! 		

4==*>?78L7MWUYU^U^U`chUhilTmmpq; ! -  A?@@A, ! s   -	M
 G3M#
M #
M1c                 ,    [        U R                  5      $ r    )lenr@   r"   s    r$   __len__GlueDataset.__len__   s    4==!!r'   returnc                      U R                   U   $ r    )r@   )r#   is     r$   __getitem__GlueDataset.__getitem__   s    }}Qr'   c                     U R                   $ r    )rQ   r"   s    r$   r\   GlueDataset.get_labels   s    r'   )r>   r@   rQ   r?   rU   )r(   r)   r*   r+   r,   r   r0   r/   r   r   r6   r8   r   r   r1   r   rk   ro   rt   r\   r3   r4   r'   r$   r<   r<   G   s     $#=!! '+"'++#'I'I +I sm	I
 CJI C=IV"   r'   r<   )"rX   r^   rR   dataclassesr   r   enumr   typingr   r   r   r_   filelockr	   torch.utils.datar
   tokenization_utils_baser   utilsr   r   processors.gluer   r   r   processors.utilsr   
get_loggerr(   ra   r   r6   r<   r4   r'   r$   <module>r      sy    
   (  ( (   $ > 6 c c , 
		H	% 0 0 0:D [' [r'   