
    eTh5                     F   S r SSKrSSKrSSKrSSKrSSKrSSKrSSKrSSKrSSK	r	SSK
JrJrJrJr  SSKrSSKJrJrJrJrJrJrJrJrJrJrJrJrJr  \" 5       (       a  SSKrS\ S\ S\ 4S	 jr!SDS
\ S\"4S jjr#SDS
\ S\"4S jjr$S r% " S S5      r& " S S\5      r' " S S\5      r( " S S\5      r)Sr*\RV                  " S\*-   S-   5      r,S r- " S S\5      r. " S S\5      r/ " S  S!\5      r0 " S" S#\5      r1 " S$ S%\5      r2S&\3\4\54   S'\54S( jr6S'\3\4\54   4S) jr7S'\3\4\54   4S* jr8S+ r9S'\3\4\54   4S, jr: " S- S.\5      r;S/ r<S0 r=SES1 jr> " S2 S3\5      r? " S4 S55      r@S6 rAS7 rBS8 rC SFS9\\D   S:\ S;\"4S< jjrE " S= S>\5      rF " S? S@5      rGSDSA\4SB\"4SC jjrHg)Gz6
PyTorch-independent utilities for the Trainer class.
    N)Any
NamedTupleOptionalUnion   )ExplicitEnumis_psutil_availableis_tf_availableis_torch_availableis_torch_cuda_availableis_torch_hpu_availableis_torch_mlu_availableis_torch_mps_availableis_torch_musa_availableis_torch_npu_availableis_torch_xla_availableis_torch_xpu_availablerequires_backends	worker_idnum_workersrankc                 X    [         R                  " 5       S-  nX-  U-   n[        U5        g)zF
Helper function to set worker seed during Dataloader initialization.
l        N)torchinitial_seedset_seed)r   r   r   	init_seedworker_seeds        R/var/www/auris/envauris/lib/python3.13/site-packages/transformers/trainer_utils.pyseed_workerr   4   s,     ""$u,I$y0K[    seed	warn_onlyc                    [        U 5        [        5       (       a  S[        R                  S'   S[        R                  S'   S[        R                  S'   S[        R                  S'   S[        R                  S'   [        R
                  " SUS	9  S[        R                  R                  l        S
[        R                  R                  l	        [        5       (       a)  SSKnUR                  R                  R                  5         gg)z
Helper function for reproducible behavior during distributed training. See
- https://pytorch.org/docs/stable/notes/randomness.html for pytorch
- https://www.tensorflow.org/api_docs/python/tf/config/experimental/enable_op_determinism for tensorflow
1CUDA_LAUNCH_BLOCKINGz:16:8CUBLAS_WORKSPACE_CONFIGASCEND_LAUNCH_BLOCKINGHCCL_DETERMINISTICFLASH_ATTENTION_DETERMINISTICT)r"   Fr   N)r   r   osenvironr   use_deterministic_algorithmsbackendscudnndeterministic	benchmarkr
   
tensorflowconfigexperimentalenable_op_determinism)r!   r"   tfs      r   enable_full_determinismr6   =   s     TN .1

)*07

,-/2

+,+.

'(69

23**49E .2*).&
		446 r    r/   c                    [         R                  " U 5        [        R                   R                  U 5        [        5       (       aR  [        R
                  " U 5        [        R                  R                  U 5        U(       a  [        R                  " S5        [        5       (       a  [        R                  R                  U 5        [        5       (       a  [        R                  R                  U 5        [        5       (       a  [        R                  R                  U 5        [        5       (       a  [        R                   R                  U 5        [#        5       (       a  [        R$                  R                  U 5        ['        5       (       aL  SSKnUR                   R+                  U 5        U(       a%  UR,                  R.                  R1                  5         ggg)a=  
Helper function for reproducible behavior to set the seed in `random`, `numpy`, `torch` and/or `tf` (if installed).

Args:
    seed (`int`):
        The seed to set.
    deterministic (`bool`, *optional*, defaults to `False`):
        Whether to use deterministic algorithms where available. Can slow down training.
Tr   N)randomr!   npr   r   manual_seedcudamanual_seed_allr,   r   mlur   musar   npur   hpur   xpur
   r1   r   r2   r3   r4   )r!   r/   r5   s      r   r   r   ]   s"    KKIINN4$

""4(..t4		!!$'  

""4(		!!$'		!!$'		!!$'
		4 II""88: 	 r    c                 0   U R                   (       a  [        R                  " UR                  S5      UR                  S5      -  5      nU R                  [        R
                  " U5      -  nU[        R                  " U5      R                  U* U5      -   nU$ )a  
Implements the NEFTune forward pass for the model using forward hooks. Note this works only for torch.nn.Embedding
layers. This method is slightly adapted from the original source code that can be found here:
https://github.com/neelsjain/NEFTune Simply add it to your model as follows:
```python
model = ...
model.embed_tokens.neftune_noise_alpha = 0.1
model.embed_tokens.register_forward_hook(neftune_post_forward_hook)
```
Args:
    module (`torch.nn.Module`):
        The embedding module where the hook is attached. Note that you need to set `module.neftune_noise_alpha` to
        the desired noise alpha value.
    input (`torch.Tensor`):
        The input tensor to the model.
    output (`torch.Tensor`):
        The output tensor of the model (i.e. the embeddings).
r      )trainingr   tensorsizeneftune_noise_alphasqrt
zeros_likeuniform_)moduleinputoutputdimsmag_norms        r   neftune_post_forward_hookrP      sp    & ||FKKNV[[^;<--

40@@%**62;;XIxPPMr    c                   2   \ rS rSrSr  SS\\R                  \\R                     4   S\\R                  \\R                     4   S\	\\R                  \\R                     4      S\	\\R                  \\R                     4      4S jjr
S	 rS
 rSrg)EvalPrediction   a^  
Evaluation output (always contains labels), to be used to compute metrics.

Parameters:
    predictions (`np.ndarray`): Predictions of the model.
    label_ids (`np.ndarray`): Targets to be matched.
    inputs (`np.ndarray`, *optional*): Input data passed to the model.
    losses (`np.ndarray`, *optional*): Loss values computed during evaluation.
Npredictions	label_idsinputslossesc                 $   Xl         X l        X0l        X@l        U R                   U R                  4U l        U R                  b   U =R                  U R                  4-  sl        U R                  b!  U =R                  U R                  4-  sl        g g N)rT   rU   rV   rW   elements)selfrT   rU   rV   rW   s        r   __init__EvalPrediction.__init__   sp     '"))4>>:;;"MMdkk^+M;;"MMdkk^+M #r    c                 ,    [        U R                  5      $ rY   )iterrZ   r[   s    r   __iter__EvalPrediction.__iter__   s    DMM""r    c                 t    US:  d  U[        U R                  5      :  a  [        S5      eU R                  U   $ )Nr   ztuple index out of range)lenrZ   
IndexError)r[   idxs     r   __getitem__EvalPrediction.__getitem__   s4    7cS//788}}S!!r    )rZ   rV   rU   rW   rT   )NN)__name__
__module____qualname____firstlineno____doc__r   r9   ndarraytupler   r\   ra   rg   __static_attributes__ r    r   rR   rR      s     BFAE,2::uRZZ'889, U2::%667, rzz5+<<=>	,
 rzz5+<<=>,"#"r    rR   c                       \ rS rSr% \\R                  \\R                     4   \S'   \	\\R                  \\R                     4      \S'   \	\
\\4      \S'   \	\   \S'   Srg)EvalLoopOutput   rT   rU   metricsnum_samplesrq   N)ri   rj   rk   rl   r   r9   rn   ro   __annotations__r   dictstrfloatintrp   rq   r    r   rs   rs      s_    rzz5#4455bjj%

*;;<==d3:&''#r    rs   c                       \ rS rSr% \\R                  \\R                     4   \S'   \	\\R                  \\R                     4      \S'   \	\
\\4      \S'   Srg)PredictionOutput   rT   rU   ru   rq   N)ri   rj   rk   rl   r   r9   rn   ro   rw   r   rx   ry   rz   rp   rq   r    r   r}   r}      sU    rzz5#4455bjj%

*;;<==d3:&''r    r}   c                   >    \ rS rSr% \\S'   \\S'   \\\4   \S'   Sr	g)TrainOutput   global_steptraining_lossru   rq   N)
ri   rj   rk   rl   r{   rw   rz   rx   ry   rp   rq   r    r   r   r      s    #u*r    r   
checkpoint^z\-(\d+)$c           	         [         R                  " U 5      nU Vs/ s H`  n[        R                  U5      c  M  [         R                  R                  [         R                  R                  X5      5      (       d  M^  UPMb     nn[        U5      S:X  a  g [         R                  R                  U [        US S95      $ s  snf )Nr   c                 `    [        [        R                  U 5      R                  5       S   5      $ )Nr   )r{   _re_checkpointsearchgroups)xs    r   <lambda>%get_last_checkpoint.<locals>.<lambda>   s$    s>CXCXYZC[CbCbCdefCg?hr    )key)	r*   listdirr   r   pathisdirjoinrd   max)foldercontentr   checkpointss       r   get_last_checkpointr      s    jj G D  & 	79ww}}RWW\\RXE_7` 	  
 ;177<<K5h ijjs   B=?B=<B=c                        \ rS rSrSrSrSrSrg)IntervalStrategy   nostepsepochrq   Nri   rj   rk   rl   NOSTEPSEPOCHrp   rq   r    r   r   r          	BEEr    r   c                   $    \ rS rSrSrSrSrSrSrg)SaveStrategy   r   r   r   bestrq   N)	ri   rj   rk   rl   r   r   r   BESTrp   rq   r    r   r   r      s    	BEEDr    r   c                        \ rS rSrSrSrSrSrg)EvaluationStrategy   r   r   r   rq   Nr   rq   r    r   r   r      r   r    r   c                   $    \ rS rSrSrSrSrSrSrg)HubStrategy   end
every_saver   all_checkpointsrq   N)	ri   rj   rk   rl   END
EVERY_SAVE
CHECKPOINTALL_CHECKPOINTSrp   rq   r    r   r   r      s    
CJJ'Or    r   c                   f    \ rS rSr% Sr\\S'   \\\	\   4   \S'   \
\\4   \S'   Sr\\   \S'   Srg)	BestRun   a3  
The best run found by a hyperparameter search (see [`~Trainer.hyperparameter_search`]).

Parameters:
    run_id (`str`):
        The id of the best run (if models were saved, the corresponding checkpoint will be in the folder ending
        with run-{run_id}).
    objective (`float`):
        The objective that was obtained for this run.
    hyperparameters (`Dict[str, Any]`):
        The hyperparameters picked to get this run.
    run_summary (`Optional[Any]`):
        A summary of tuning experiments. `ray.tune.ExperimentAnalysis` object for Ray backend.
run_id	objectivehyperparametersNrun_summaryrq   )ri   rj   rk   rl   rm   ry   rw   r   rz   listrx   r   r   r   rp   rq   r    r   r   r      s>     KUDK'((#s(^#!%K#%r    r   ru   returnc                    [         R                  " U 5      n U R                  SS5      nU R                  SS5      nU R                  5        Vs/ s HI  nUR	                  S5      (       d.  UR	                  S5      (       d  UR	                  S5      (       d  MG  UPMK     nnU H  nU R                  US5      nM     [        U 5      S:X  a  U$ [        U R                  5       5      $ s  snf )aN  
The default objective to maximize/minimize when doing an hyperparameter search. It is the evaluation loss if no
metrics are provided to the [`Trainer`], the sum of all metrics otherwise.

Args:
    metrics (`Dict[str, float]`): The metrics returned by the evaluate method.

Return:
    `float`: The objective to minimize or maximize
	eval_lossNr   _runtime_per_second_compilation_timer   )copydeepcopypopkeysendswithrd   sumvalues)ru   loss_mspeed_metricssms         r   default_compute_objectiver     s     mmG$G;;{D)DGT"A A::j!!QZZ%>%>!**M`Ba 	
  
 KKD! w<1$4?#gnn.>*??s   AC$C$c                     SSK Jn  U" 5       (       d   S5       eU R                  SSSSS9U R                  S	SS
5      U R                  SSS5      U R	                  S/ SQ5      S.$ )Nr   )is_optuna_availablez:This function needs Optuna installed: `pip install optuna`learning_rateư>-C6?T)lognum_train_epochs   r!   (   per_device_train_batch_size             @   r   r   r!   r   )integrationsr   suggest_floatsuggest_intsuggest_categorical)trialr   s     r   default_hp_space_optunar   ,  sr    1  ^"^^ ,,_dDd,S!--.@!QG!!&!R0','@'@A^`r's	 r    c           	          SSK Jn  U" 5       (       d   S5       eSSKJn  UR	                  SS5      UR                  [        [        SS5      5      5      UR                  SS	5      UR                  / S
Q5      S.$ )Nr   )is_ray_tune_availablez:This function needs ray installed: `pip install ray[tune]`r   )tuner   r      r   r   r   )	r   r   rayr   
loguniformchoicer   rangeuniform)r   r   r   s      r   default_hp_space_rayr   8  si    3 ""`$``" t4 KKU1a[(9:Q#'+{{3E'F	 r    c                 F    SSS.SSSS.SS	S.S
SS.SSS.SSS./ SQSSS./$ )Nr   r   )minr   r   doubler   )boundsnametypetransformationr   r   r   r{   )r   r   r   r   r!   )48163264r   categorical)categorical_valuesr   r   rq   )r   s    r   default_hp_space_sigoptr   F  sL    -PXlqrQ'1CUSR(&%H">1!	
		 	r    c                 x    SSK Jn  U" 5       (       d  [        S5      eSSSS.SS	S
S.SSSS.SSSS.S/ SQ0S.S.$ )Nr   )is_wandb_availablez8This function needs wandb installed: `pip install wandb`r8   r   minimize)r   goalr   r   r   )distributionr   r   int_uniformr   r   r   r   r   )methodmetric
parameters)r   r  ImportError)r   r  s     r   default_hp_space_wandbr
  S  s^    0TUU &
;.7TR1>qQR S%21RH,46H+I	
	 	r    c                   $    \ rS rSrSrSrSrSrSrg)HPSearchBackendie  optunar   sigoptwandbrq   N)	ri   rj   rk   rl   OPTUNARAYSIGOPTWANDBrp   rq   r    r   r  r  e  s    F
CFEr    r  c                 \    [        5       (       a  SSKJn  UR                  5       S:H  $ U S;   $ )z
Whether or not the current process is the local process, based on `xr.global_ordinal()` (for TPUs) first, then on
`local_rank`.
r   N)r   )r   torch_xla.runtimeruntimeglobal_ordinal)
local_rankxrs     r   is_main_processr  l  s/    
 &  "a''  r    c                     [        5       (       a  SSKJn  UR                  5       $ U S:w  a-  [	        5       (       a  SSKnUR                  R                  5       $ g)z_
Return the number of processes launched in parallel. Works with `torch.distributed` and TPUs.
r   Nr  r   )r   r  r  
world_sizer   r   distributedget_world_size)r  r  r   s      r   total_processes_numberr   x  sH     &}}	r	022  //11r    c                     [         R                   " 5       U-
  nU  S3[        US5      0nUS:X  a  U$ Ub  X%-  n[        US5      X` S3'   Ub  X5-  n[        US5      X` S3'   Ub  XE-  n	[        U	S5      X` S3'   U$ )a  
Measure and return speed performance metrics.

This function requires a time snapshot `start_time` before the operation to be measured starts and this function
should be run immediately after the operation to be measured has completed.

Args:
- split: name to prefix metric (like train, eval, test...)
- start_time: operation start time
- num_samples: number of samples processed
- num_steps: number of steps processed
- num_tokens: number of tokens processed
r   r   r      _samples_per_second_steps_per_second_tokens_per_second)timeround)
split
start_timerv   	num_steps
num_tokensr  resultsamples_per_secondsteps_per_secondtokens_per_seconds
             r   r   r     s     iikJ&Gx %"34F!|(2056H!0L+,-$..34Da.H)*+&0/45F/J*+,Mr    c                   @    \ rS rSrSrSrSrSrSrSr	Sr
S	rS
rSrSrSrg)SchedulerTypei  a  
Scheduler names for the parameter `lr_scheduler_type` in [`TrainingArguments`].
By default, it uses "linear". Internally, this retrieves `get_linear_schedule_with_warmup` scheduler from [`Trainer`].
Scheduler types:
   - "linear" = get_linear_schedule_with_warmup
   - "cosine" = get_cosine_schedule_with_warmup
   - "cosine_with_restarts" = get_cosine_with_hard_restarts_schedule_with_warmup
   - "polynomial" = get_polynomial_decay_schedule_with_warmup
   - "constant" =  get_constant_schedule
   - "constant_with_warmup" = get_constant_schedule_with_warmup
   - "inverse_sqrt" = get_inverse_sqrt_schedule
   - "reduce_lr_on_plateau" = get_reduce_on_plateau_schedule
   - "cosine_with_min_lr" = get_cosine_with_min_lr_schedule_with_warmup
   - "warmup_stable_decay" = get_wsd_schedule
linearcosinecosine_with_restarts
polynomialconstantconstant_with_warmupinverse_sqrtreduce_lr_on_plateaucosine_with_min_lrwarmup_stable_decayrq   N)ri   rj   rk   rl   rm   LINEARCOSINECOSINE_WITH_RESTARTS
POLYNOMIALCONSTANTCONSTANT_WITH_WARMUPINVERSE_SQRTREDUCE_ON_PLATEAUCOSINE_WITH_MIN_LRWARMUP_STABLE_DECAYrp   rq   r    r   r1  r1    s@      FF1JH1!L.-/r    r1  c                   `    \ rS rSrSrSSSSSS.rSS jrS	 rS
 rS r	S r
S rS rSS jrSrg)TrainerMemoryTrackeri  a  
A helper class that tracks cpu and gpu memory.

This class will silently skip unless `psutil` is available. Install with `pip install psutil`.

When a stage completes, it can pass metrics dict to update with the memory metrics gathered during this stage.

Example :

```python
self._memory_tracker = TrainerMemoryTracker(self.args.skip_memory_metrics)
self._memory_tracker.start()
# code ...
metrics = {"train_runtime": 10.5}
self._memory_tracker.stop_and_update_metrics(metrics)
```

At the moment GPU tracking is only for `pytorch`, but can be extended to support `tensorflow`.

To understand this class' intricacies please read the documentation of [`~Trainer.log_metrics`].
inittrainevaltest)r\   rI  _inner_training_loopevaluatepredictc                 P   Xl         [        5       (       d  SU l         U R                   (       a  g SS Kn[        5       (       d  [	        5       (       d  [        5       (       a  SS KnX0l        0 U l        O[        5       (       a  SS KnX0l        0 U l        Oj[        5       (       a  SS KnX0l        0 U l        OI[        5       (       a  SS KnX0l        0 U l        O([        5       (       a  SS KnX0l        0 U l        OS U l        UR                  5       U l        S U l        0 U l        SU l        g )NTr   F)skip_memory_metricsr	   psutilr   r   r   r   gpur   r   r   r   Processprocess	cur_stagecpuinit_reported)r[   rP  rQ  r   s       r   r\   TrainerMemoryTracker.__init__  s    #6 "$$'+D$##"$$(>(@(@D[D]D]JDH#%%JDH#%%JDH#%%JDH#%%JDHDJ~~'"r    c                 
   [         R                  " 5       R                  R                  R                  R                  nXR
                  ;   a  U R
                  U   $ [        SU SU R
                  R                  5        35      e)z+derives the stage/caller name automaticallyzwas called from z+, but only expect to be called from one of )inspectcurrentframef_backf_codeco_namestages
ValueErrorr   )r[   callers     r   derive_stage!TrainerMemoryTracker.derive_stage  sp    %%'..55<<DD[[ ;;v&&"6(*UVZVaVaVfVfVhUij r    c                 J    U R                   R                  5       R                  $ )z4get resident set size memory for the current process)rT  memory_inforssr`   s    r   cpu_mem_used!TrainerMemoryTracker.cpu_mem_used  s    ||'')---r    c                     SU l          [        U R                  5       U R                   5      U l         U R                  (       d  g M=  )Nr  )cpu_mem_used_peakr   rg  peak_monitoringr`   s    r   peak_monitor_func&TrainerMemoryTracker.peak_monitor_func  s>    !#%():):)<d>T>T%UD"
 '' r    c                 H	   U R                   (       a  gU R                  5       nU R                  b  U R                  U:w  a  gXl        [        R                  " 5         U R
                  Gb6  [
        R                  R                  5       (       aJ  U R
                  R                  R                  5         U R
                  R                  R                  5         GO[        5       (       aJ  U R
                  R                  R                  5         U R
                  R                  R                  5         GOp[        5       (       aJ  U R
                  R                  R                  5         U R
                  R                  R                  5         GO[        5       (       aI  U R
                  R                  R                  5         U R
                  R                  R                  5         O[!        5       (       aI  U R
                  R"                  R                  5         U R
                  R"                  R                  5         Og[%        5       (       a%  U R
                  R&                  R                  5         O3[)        5       (       a$  U R
                  R*                  R                  5         U R
                  Gb  [
        R                  R                  5       (       a+  U R
                  R                  R-                  5       U l        GOV[        5       (       a+  U R
                  R                  R-                  5       U l        GO[        5       (       a*  U R
                  R                  R-                  5       U l        O[        5       (       a*  U R
                  R                  R-                  5       U l        O[!        5       (       a*  U R
                  R"                  R-                  5       U l        Oq[%        5       (       a*  U R
                  R&                  R-                  5       U l        O8[)        5       (       a)  U R
                  R*                  R1                  5       U l        U R3                  5       U l        SU l        [8        R:                  " U R<                  S9nSUl        URA                  5         g)z%start tracking for the caller's stageNT)target)!rP  rb  rU  gccollectr   r;   is_availablereset_peak_memory_statsempty_cacher   r=   r   r>   r   rA   r   r?   r   r@   r   mpsmemory_allocatedgpu_mem_used_at_startcurrent_allocated_memoryrg  cpu_mem_used_at_startrk  	threadingThreadrl  daemonstart)r[   stagepeak_monitor_threads      r   r}  TrainerMemoryTracker.start*  s   ##!!#>>%$..E*A


::!zz&&((

779

++-'))

668

**,(**

779

++-'))

668

**,'))

668

**,'))

668 ())

**, ::!zz&&((-1ZZ__-M-M-O*'))-1ZZ^^-L-L-N*(**-1ZZ__-M-M-O*'))-1ZZ^^-L-L-N*'))-1ZZ^^-L-L-N*'))-1ZZ^^-L-L-N*'))-1ZZ^^-T-T-V* &*%6%6%8"#'..d6L6LM%)"!!#r    c                 0   U R                   b  U R                   U:w  a  gSU l        [        R                  " 5         U R                  Gb\  [        R
                  R                  5       (       a&  U R                  R
                  R                  5         GO[        5       (       a%  U R                  R                  R                  5         O[        5       (       a%  U R                  R                  R                  5         O[        5       (       a%  U R                  R                  R                  5         Ow[        5       (       a%  U R                  R                  R                  5         OC[!        5       (       a  O3[#        5       (       a$  U R                  R$                  R                  5         U R                  Gb^  [        R
                  R                  5       (       aT  U R                  R
                  R'                  5       U l        U R                  R
                  R+                  5       U l        GO8[        5       (       aT  U R                  R                  R'                  5       U l        U R                  R                  R+                  5       U l        GO[        5       (       aT  U R                  R                  R'                  5       U l        U R                  R                  R+                  5       U l        GOr[        5       (       aT  U R                  R                  R'                  5       U l        U R                  R                  R+                  5       U l        GO[        5       (       aS  U R                  R                  R'                  5       U l        U R                  R                  R+                  5       U l        O[!        5       (       aS  U R                  R.                  R'                  5       U l        U R                  R.                  R+                  5       U l        OK[#        5       (       a1  U R                  R$                  R1                  5       U l        SU l        O[3        S5      eU R4                  U R(                  U R(                  U R4                  -
  S.U R6                  U R                   '   U R,                  b>  [9        SU R,                  U R(                  -
  5      U R6                  U R                      S'   OSU R6                  U R                      S'   U R;                  5       U l        U R>                  U R<                  U R<                  U R>                  -
  [9        SU R@                  U R<                  -
  5      S.U RB                  U R                   '   SU l         g)	z"stop tracking for the passed stageNFzNo available GPU device found!)beginr   allocr   peakedzNot available)r  r   r  r  )"rU  rk  rp  rq  r   r;   rr  rt  r   r=   r   r>   r   rA   r   r?   r   r   ru  rv  gpu_mem_used_nowmax_memory_allocatedgpu_mem_used_peakr@   rx  r`  rw  rR  r   rg  cpu_mem_used_nowry  rj  rV  )r[   r~  s     r   stopTrainerMemoryTracker.stoph  s    >>%$..E*A  % 	

::!zz&&((

++-'))

**,(**

++-'))

**,'))

**,')) '))

**, ::!zz&&(((,

(H(H(J%)-)M)M)O&'))(,

(G(G(I%)-)L)L)N&(**(,

(H(H(J%)-)M)M)O&'))(,

(G(G(I%)-)L)L)N&'))(,

(G(G(I%)-)L)L)N&'))(,

(G(G(I%)-)L)L)N&'))(,

(O(O(Q%)-& !!ABB 33,,//$2L2LL(DHHT^^$
 %%158D<R<RUYUjUj<j5k(25D(2 !% 1 1 3//((++d.H.HH!T33d6K6KKL	$
  r    c                 x   U R                   (       a  gU R                  b  U R                  U:w  a  gU/nU R                  (       d  UR                  SS5        SU l        U H  nS H  nXR                  ;   a,  X@R                  U   ;   a  U R                  U   U   X! SU S3'   U R
                  c  MM  XR                  ;   d  M^  X@R                  U   ;   d  Mr  U R                  U   U   X! SU S3'   M     M     US   S:X  a9  U R                  S   S	   US
'   U R
                  b  U R                  S   S	   US'   ggg)zupdates the metricsNr   rH  T)r  r  	_mem_cpu__delta	_mem_gpu_r  before_init_mem_cpubefore_init_mem_gpu)rP  rU  rW  insertrV  r   rR  )r[   r~  ru   r_  ts        r   update_metrics#TrainerMemoryTracker.update_metrics  s4   ## >>%$..E*A !!MM!V$!%DE(HH$hhuo)=<@HHUOA<NGgYqc89::)exx.?ARWDX<@HHUOA<NGgYqc89	 )  !9-1XXf-=g-FG)*zz%15&1A'1J-. & r    Nc                     U R                   (       a  gU R                  5       nU R                  U5        Ub  U R                  X!5        gg)z<combine stop and metrics update in one call for simpler codeN)rP  rb  r  r  )r[   ru   r~  s      r   stop_and_update_metrics,TrainerMemoryTracker.stop_and_update_metrics  sC    ##!!#		% / r    )rV  ry  r  rj  rU  rR  rw  r  r  rW  rk  rT  rP  r   FrY   )ri   rj   rk   rl   rm   r_  r\   rb  rg  rl  r}  r  r  r  rp   rq   r    r   rG  rG    sM    0  'F,#\.
<$|Tl!KR
0r    rG  c                 T     [        U 5      SL$ ! [         a     g[         a     gf = f)zJ
Checks if the dataset implements __len__() and it doesn't raise an error
NF)rd   	TypeErrorAttributeError)datasets    r   
has_lengthr    s3    7|4''  s    
'	''c           
          [        U [        [        45      (       a  [        U 5      " S U  5       5      $ [        U [        5      (       a>  [        U 5      " U R                  5        VVs0 s H  u  pU[        U5      _M     snn5      $ [        U [        R                  5      (       a  U R                  5       $ [        5       (       aC  [        U [        R                  5      (       a$  U R                  5       S:X  a  U R                  5       $ U $ s  snnf )zE
Recursively calls `.item()` on the element of the dictionary passed
c              3   8   #    U  H  n[        U5      v   M     g 7frY   )denumpify_detensorize).0r   s     r   	<genexpr>(denumpify_detensorize.<locals>.<genexpr>  s     Gw!2155ws   r   )
isinstancer   ro   r   rx   itemsr  r9   genericitemr   r   Tensornumel)ru   kvs      r   r  r    s     'D%=))G}GwGGG	GT	"	"G}gmmoVodaa!6q!99oVWW	GRZZ	(	(||~			*Well"C"C[\H\||~N Ws   'D

c                 T   [        U [        R                  5      (       aa  [        [        R
                  " U R                  5      R                  5      nU[        U R                  5      -
  [        U R                  5      -
  $ [        [        R
                  " U 5      R                  5      $ )zY
Return the number of arguments of the passed function, even if it's a partial function.
)
r  	functoolspartialrd   rZ  	signaturefuncr  argskeywords)r  
total_argss     r   number_of_argumentsr    ss     $	))****4995@@A
C		N*S-???w  &1122r    functionstarting_batch_sizeauto_find_batch_sizec                     U c  [         R                  " [        UUS9$ U(       a  [        [        S5        SSKJn  U" XS9$ [         R                  " XS9$ )a  
Args:
A basic decorator that will try to execute `function`. If it fails from exceptions related to out-of-memory or
CUDNN, the batch size is cut in half and passed to `function`. `function` must take in a `batch_size` parameter as
its first argument.
    function (`callable`, *optional*)
        A function to wrap
    starting_batch_size (`int`, *optional*)
        The batch size to try and fit into memory
    auto_find_batch_size (`bool`, *optional*)
        If False, will just execute `function`
)r  r  
accelerater   )find_executable_batch_size)r  r  )
batch_size)r  r  r  r   accelerate.utils)r  r  r  %accelerate_find_executable_batch_sizes       r   r  r    sU       & 3!5
 	
 4lCh4hppXFFr    c                   0    \ rS rSrSrSrSrSrSrSr	Sr
S	rg
)
FSDPOptioni:  
full_shardshard_grad_opno_shardhybrid_shardhybrid_shard_zero2offload	auto_wraprq   N)ri   rj   rk   rl   
FULL_SHARDSHARD_GRAD_OPNO_SHARDHYBRID_SHARDHYBRID_SHARD_ZERO2OFFLOAD	AUTO_WRAPrp   rq   r    r   r  r  :  s&    J#MH!L-GIr    r  c                   f    \ rS rSrSr   SS\\   S\\   4S jjrS\S\4S	 jr	S
\
\   4S jrSrg)RemoveColumnsCollatoriD  zWWrap the data collator to remove unused columns before they are passed to the collator.N
model_namedescriptionc                 N    Xl         X l        X0l        XPl        X@l        SU l        g )NF)data_collatorsignature_columnsloggerr  r  message_logged)r[   r  r  r  r  r  s         r   r\   RemoveColumnsCollator.__init__G  s(     +!2&$#r    featurer   c                    [        U[        5      (       d  U$ U R                  (       d  U R                  (       a  U R                  (       a  [        [        UR                  5       5      [        U R                  5      -
  5      n[        U5      S:  a  U R                  c  SOSU R                   S3nU R                  R                  SU SU R                   SSR                  U5       S	SR                  U5       S
U R                   S35        SU l        UR                  5        VVs0 s H  u  pEX@R                  ;   d  M  XE_M     snn$ s  snnf )Nr    zin the z setzThe following columns z) don't have a corresponding argument in `z!.forward` and have been ignored: z, z. If z are not expected by `z/.forward`,  you can safely ignore this message.T)r  rx   r  r  r  r   setr   r  rd   r  infor   r  )r[   r  ignored_columnsdset_descriptionr  r  s         r   _remove_columns%RemoveColumnsCollator._remove_columnsV  s!   '4((N""t{{t"3w||~#6T=S=S9T#TUO?#a')-)9)9)A2QUQaQaPbbfGg   ,-=,> ?((I$))TcJdIe f99_566LT__L] ^;; '+#!(PA9O9O4OPPPs   (EEfeaturesc                 n    U Vs/ s H  o R                  U5      PM     nnU R                  U5      $ s  snf rY   )r  r  )r[   r  r  s      r   __call__RemoveColumnsCollator.__call__f  s6    AIJg((1J!!(++ Ks   2)r  r  r  r  r  r  NNN)ri   rj   rk   rl   rm   r   ry   r\   rx   r  r   r  rp   rq   r    r   r  r  D  sW    a $(%)$
 SM$ c]$Qt Q Q ,d ,r    r  r   return_is_regexc                 ,  ^ SnSn[        U [        5      (       a+  [        [        R                  " U T5      5      nU T:X  d  SOSnODTU ;   a  SnO;[        U4S jU  5       5      (       a  SnO[        U4S jU  5       5      (       a  SnSnU(       a  X44$ U$ )a  A helper method to check if the passed module's key name matches any of the target modules in the optim_target_modules.

Args:
    optim_target_modules (`Union[str, List[str]]`):
        A list of strings to try to match. Can be also a full string.
    key (`str`):
        A key to search any matches in optim_target_modules
    return_is_regex (`bool`):
        If set to `True`, the method will return whether the passed `optim_target_modules`
        is a regex or not.

Returns:
    `bool` : True of match object if key matches any target modules from config, False or
    None if no match found
    `bool` : If the matched target module is a regex to silence out the warnings in Trainer
    for extra modules being found (only if `target_module_found=True` for an array of regex).
FTc              3   ,   >#    U  H	  oT;   v   M     g 7frY   rq   )r  
target_keyr   s     r   r  -check_target_module_exists.<locals>.<genexpr>  s     F1E:31Es   c              3   d   >#    U  H%  n[        [        R                  " UT5      5      v   M'     g 7frY   )boolre	fullmatch)r  optim_target_moduler   s     r   r  r    s(     jUi>QT",,2C899Uis   -0)r  ry   r  r  r  any)optim_target_modulesr   r  target_module_foundis_regexs    `   r   check_target_module_existsr  k  s    $  H&,,"2<<0Dc#JK3s:4	$	$"	F1EF	F	F"	jUij	j	j"",,r    r  r  )N   F)Irm   r   r  rp  rZ  r*   r8   r  rz  r&  typingr   r   r   r   numpyr9   utilsr   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r{   r   r  r6   r   rP   rR   rs   r}   r   PREFIX_CHECKPOINT_DIRcompiler   r   r   r   r   r   r   rx   ry   rz   r   r   r   r   r
  r  r  r   r   r1  rG  r  r  r  callabler  r  r  r  rq   r    r   <module>r     s)     	  	  	   3 3    " 3 S  7# 7$ 7@!;3 !;t !;H4"" ""JZ (z (*  % D#88;FG	k| <  (, (&j &,@tCJ'7 @E @2	d3:&6 	4U
#3 
T#u*%5 $l 	!<0L 0:o0 o0d	3 glGx G>AG_cG> $, $,N$# $PT $r    