
    eTh8                         S SK r S SKJrJr  S SKJr  SSKJr  SSKJ	r	J
r
JrJr  \R                  " \5      r\
" 5       (       a
  S SKrSSKJr  \ " S S	\5      5       rg)
    N)	dataclassfield)Optional   )TrainingArguments)cached_propertyis_tf_availableloggingrequires_backends)kerasc                   b   \ rS rSr% SrSr\" SSS0S9r\\	   \
S'   \" SSS	0S9r\\	   \
S
'   \" SSS0S9r\\	   \
S'   \" SSS0S9r\\
S'   \" SSS0S9r\\
S'   \S\S\4   4S j5       r\SS j5       r\S\4S j5       r\S 5       r\S\4S j5       r\S\4S j5       r\S\4S j5       rSrg)TFTrainingArguments   a   
TrainingArguments is the subset of the arguments we use in our example scripts **which relate to the training loop
itself**.

Using [`HfArgumentParser`] we can turn this class into
[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the
command line.

Parameters:
    output_dir (`str`):
        The output directory where the model predictions and checkpoints will be written.
    overwrite_output_dir (`bool`, *optional*, defaults to `False`):
        If `True`, overwrite the content of the output directory. Use this to continue training if `output_dir`
        points to a checkpoint directory.
    do_train (`bool`, *optional*, defaults to `False`):
        Whether to run training or not. This argument is not directly used by [`Trainer`], it's intended to be used
        by your training/evaluation scripts instead. See the [example
        scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details.
    do_eval (`bool`, *optional*):
        Whether to run evaluation on the validation set or not. Will be set to `True` if `eval_strategy` is
        different from `"no"`. This argument is not directly used by [`Trainer`], it's intended to be used by your
        training/evaluation scripts instead. See the [example
        scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details.
    do_predict (`bool`, *optional*, defaults to `False`):
        Whether to run predictions on the test set or not. This argument is not directly used by [`Trainer`], it's
        intended to be used by your training/evaluation scripts instead. See the [example
        scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details.
    eval_strategy (`str` or [`~trainer_utils.IntervalStrategy`], *optional*, defaults to `"no"`):
        The evaluation strategy to adopt during training. Possible values are:

            - `"no"`: No evaluation is done during training.
            - `"steps"`: Evaluation is done (and logged) every `eval_steps`.
            - `"epoch"`: Evaluation is done at the end of each epoch.

    per_device_train_batch_size (`int`, *optional*, defaults to 8):
        The batch size per GPU/TPU core/CPU for training.
    per_device_eval_batch_size (`int`, *optional*, defaults to 8):
        The batch size per GPU/TPU core/CPU for evaluation.
    gradient_accumulation_steps (`int`, *optional*, defaults to 1):
        Number of updates steps to accumulate the gradients for, before performing a backward/update pass.

        <Tip warning={true}>

        When using gradient accumulation, one step is counted as one step with backward pass. Therefore, logging,
        evaluation, save will be conducted every `gradient_accumulation_steps * xxx_step` training examples.

        </Tip>

    learning_rate (`float`, *optional*, defaults to 5e-5):
        The initial learning rate for Adam.
    weight_decay (`float`, *optional*, defaults to 0):
        The weight decay to apply (if not zero).
    adam_beta1 (`float`, *optional*, defaults to 0.9):
        The beta1 hyperparameter for the Adam optimizer.
    adam_beta2 (`float`, *optional*, defaults to 0.999):
        The beta2 hyperparameter for the Adam optimizer.
    adam_epsilon (`float`, *optional*, defaults to 1e-8):
        The epsilon hyperparameter for the Adam optimizer.
    max_grad_norm (`float`, *optional*, defaults to 1.0):
        Maximum gradient norm (for gradient clipping).
    num_train_epochs(`float`, *optional*, defaults to 3.0):
        Total number of training epochs to perform.
    max_steps (`int`, *optional*, defaults to -1):
        If set to a positive number, the total number of training steps to perform. Overrides `num_train_epochs`.
        For a finite dataset, training is reiterated through the dataset (if all data is exhausted) until
        `max_steps` is reached.
    warmup_ratio (`float`, *optional*, defaults to 0.0):
        Ratio of total training steps used for a linear warmup from 0 to `learning_rate`.
    warmup_steps (`int`, *optional*, defaults to 0):
        Number of steps used for a linear warmup from 0 to `learning_rate`. Overrides any effect of `warmup_ratio`.
    logging_dir (`str`, *optional*):
        [TensorBoard](https://www.tensorflow.org/tensorboard) log directory. Will default to
        *runs/**CURRENT_DATETIME_HOSTNAME***.
    logging_strategy (`str` or [`~trainer_utils.IntervalStrategy`], *optional*, defaults to `"steps"`):
        The logging strategy to adopt during training. Possible values are:

            - `"no"`: No logging is done during training.
            - `"epoch"`: Logging is done at the end of each epoch.
            - `"steps"`: Logging is done every `logging_steps`.

    logging_first_step (`bool`, *optional*, defaults to `False`):
        Whether to log and evaluate the first `global_step` or not.
    logging_steps (`int`, *optional*, defaults to 500):
        Number of update steps between two logs if `logging_strategy="steps"`.
    save_strategy (`str` or [`~trainer_utils.SaveStrategy`], *optional*, defaults to `"steps"`):
        The checkpoint save strategy to adopt during training. Possible values are:

            - `"no"`: No save is done during training.
            - `"epoch"`: Save is done at the end of each epoch.
            - `"steps"`: Save is done every `save_steps`.

    save_steps (`int`, *optional*, defaults to 500):
        Number of updates steps before two checkpoint saves if `save_strategy="steps"`.
    save_total_limit (`int`, *optional*):
        If a value is passed, will limit the total amount of checkpoints. Deletes the older checkpoints in
        `output_dir`.
    no_cuda (`bool`, *optional*, defaults to `False`):
        Whether to not use CUDA even when it is available or not.
    seed (`int`, *optional*, defaults to 42):
        Random seed that will be set at the beginning of training.
    fp16 (`bool`, *optional*, defaults to `False`):
        Whether to use 16-bit (mixed) precision training (through NVIDIA Apex) instead of 32-bit training.
    fp16_opt_level (`str`, *optional*, defaults to 'O1'):
        For `fp16` training, Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']. See details on
        the [Apex documentation](https://nvidia.github.io/apex/amp).
    local_rank (`int`, *optional*, defaults to -1):
        During distributed training, the rank of the process.
    tpu_num_cores (`int`, *optional*):
        When training on TPU, the number of TPU cores (automatically passed by launcher script).
    debug (`bool`, *optional*, defaults to `False`):
        Whether to activate the trace to record computation graphs and profiling information or not.
    dataloader_drop_last (`bool`, *optional*, defaults to `False`):
        Whether to drop the last incomplete batch (if the length of the dataset is not divisible by the batch size)
        or not.
    eval_steps (`int`, *optional*, defaults to 1000):
        Number of update steps before two evaluations.
    past_index (`int`, *optional*, defaults to -1):
        Some models like [TransformerXL](../model_doc/transformerxl) or :doc*XLNet <../model_doc/xlnet>* can make
        use of the past hidden states for their predictions. If this argument is set to a positive int, the
        `Trainer` will use the corresponding output (usually index 2) as the past state and feed it to the model at
        the next training step under the keyword argument `mems`.
    tpu_name (`str`, *optional*):
        The name of the TPU the process is running on.
    tpu_zone (`str`, *optional*):
        The zone of the TPU the process is running on. If not specified, we will attempt to automatically detect
        from metadata.
    gcp_project (`str`, *optional*):
        Google Cloud Project name for the Cloud TPU-enabled project. If not specified, we will attempt to
        automatically detect from metadata.
    run_name (`str`, *optional*):
        A descriptor for the run. Notably used for wandb, mlflow, comet and swanlab logging.
    xla (`bool`, *optional*):
        Whether to activate the XLA compilation or not.
tfNhelpzName of TPU)defaultmetadatatpu_namezZone of TPUtpu_zonez!Name of Cloud TPU-enabled projectgcp_projectg      ?z,Power for the Polynomial decay LR scheduler.
poly_powerFz.Whether to activate the XLA compilation or notxlareturntf.distribute.Strategyc                 0   [        U S/5        [        R                  S5        [        R                  R                  S5      nU R                  (       a  [        R                  R                  S5        U R                  (       a  [        R                  R                  SS9nU$  U R                  (       aH  [        R                  R                  R                  U R                  U R                   U R"                  S9nO)[        R                  R                  R                  5       n U(       a  U R                  (       a  [        R                  R                  S
5        [        R                  R)                  U5        [        R*                  R,                  R/                  U5        [        R                  R1                  U5      nU$ [3        U5      S:X  a  [        R                  R                  SS9nU$ [3        U5      S:X  a  [        R                  R                  SS9nU$ [3        U5      S:  a   [        R                  R5                  5       nU$ [%        S5      e! [$         a0    U R                  (       a  ['        SU R                   S	35      eS n GNof = f)Nr   zTensorflow: setting up strategyGPUmixed_float16z/cpu:0)device)zoneprojectzCouldn't connect to TPU !mixed_bfloat16r   r   z/gpu:0zJCannot find the proper strategy, please check your environment properties.)r   loggerinfor   configlist_physical_devicesfp16r   mixed_precisionset_global_policyno_cuda
distributeOneDeviceStrategyr   cluster_resolverTPUClusterResolverr   r   
ValueErrorRuntimeErrorexperimental_connect_to_clustertpuexperimentalinitialize_tpu_systemTPUStrategylenMirroredStrategy)selfgpusstrategyr2   s       U/var/www/auris/envauris/lib/python3.13/site-packages/transformers/training_args_tf.py_setup_strategy#TFTrainingArguments._setup_strategy   s   $'56yy..u5 99!!33OD<<}}66h6GHF C==--88KKDMM4CSCS L C --88KKMC 99));;<LM		99#>##99#>==44S9  Ta==::(:K  Ta==::(:K  TQ==99;  !!mnn1  ==&)A$--PQ'RSSC	s   #AI <(I 6JJc                 4    [        U S/5        U R                  $ )z-
The strategy used for distributed training.
r   )r   r<   r8   s    r;   r:   TFTrainingArguments.strategy   s    
 	$'###    c                 H    [        U S/5        U R                  R                  $ )I
The number of replicas (CPUs, GPUs or TPU cores) used in this training.
r   )r   r<   num_replicas_in_syncr?   s    r;   
n_replicasTFTrainingArguments.n_replicas   s"    
 	$'##888rA   c                     g)z8
Whether or not the current process should produce log.
F r?   s    r;   
should_logTFTrainingArguments.should_log   s    
 rA   c                     U R                   (       a  [        R                  S5        U R                   =(       d    U R                  nXR                  -  $ )zj
The actual batch size for training (may differ from `per_gpu_train_batch_size` in distributed training).
zUsing deprecated `--per_gpu_train_batch_size` argument which will be removed in a future version. Using `--per_device_train_batch_size` is preferred.)per_gpu_train_batch_sizer#   warningper_device_train_batch_sizerE   r8   per_device_batch_sizes     r;   train_batch_size$TFTrainingArguments.train_batch_size  sE    
 ((NNO !% = = aAaAa$66rA   c                     U R                   (       a  [        R                  S5        U R                   =(       d    U R                  nXR                  -  $ )zk
The actual batch size for evaluation (may differ from `per_gpu_eval_batch_size` in distributed training).
zUsing deprecated `--per_gpu_eval_batch_size` argument which will be removed in a future version. Using `--per_device_eval_batch_size` is preferred.)per_gpu_eval_batch_sizer#   rM   per_device_eval_batch_sizerE   rO   s     r;   eval_batch_size#TFTrainingArguments.eval_batch_size  sE    
 ''NNN !% < < _@_@_$66rA   c                 ~    [        U S/5        [        R                  " S[        5        U R                  R
                  $ )rC   r   zaThe n_gpu argument is deprecated and will be removed in a future version, use n_replicas instead.)r   warningswarnFutureWarningr<   rD   r?   s    r;   n_gpuTFTrainingArguments.n_gpu!  s6    
 	$'o	
 ##888rA   rH   )r   r   )__name__
__module____qualname____firstlineno____doc__	frameworkr   r   r   str__annotations__r   r   r   floatr   boolr   tupleintr<   propertyr:   rE   rI   rQ   rV   r\   __static_attributes__rH   rA   r;   r   r      sf   EN I#-(Hhsm 
 $-(Hhsm 
 "'=>"K# 
 HIJ 
 ev7g.hiCi.'?'D!E . .` $ $ 9C 9 9   
7# 
7 
7 
7 
7 
7 	9s 	9 	9rA   r   )rY   dataclassesr   r   typingr   training_argsr   utilsr   r	   r
   r   
get_loggerr^   r#   
tensorflowr   modeling_tf_utilsr   r   rH   rA   r;   <module>rs      sY     (  , O O 
		H	%( K9+ K9 K9rA   