o
    ZhT                     @   s  d Z ddlZddlmZ ddlZddlZddlm	Z	 ddl
mZ ddlmZmZmZ e r7ddlZddlmZ eeZdd	 Ze rMe rMdd
lmZ nddlmZ G dd deZG dd deZdadd Zdd Zdd Zdd Z dd Z!dd Z"d"ddZ#d#d d!Z$dS )$z
Integration with Deepspeed
    N)partialmethod   )dep_version_check)is_accelerate_availableis_torch_availablelogging)nnc                  C   s@   t jdd u} | rztd}W dS  tjy   Y dS w d S )N	deepspeedTF)	importlibutil	find_specimportlib_metadatametadataZPackageNotFoundError)Zpackage_exists_ r   R/var/www/auris/lib/python3.10/site-packages/transformers/integrations/deepspeed.pyis_deepspeed_available$   s   
r   )HfDeepSpeedConfig)objectc                       s    e Zd ZdZ fddZ  ZS )r   aJ  
    This object contains a DeepSpeed configuration dictionary and can be quickly queried for things like zero stage.

    A `weakref` of this object is stored in the module's globals to be able to access the config from areas where
    things like the Trainer object is not available (e.g. `from_pretrained` and `_get_resized_embeddings`). Therefore
    it's important that this object remains alive while the program is still running.

    [`Trainer`] uses the `HfTrainerDeepSpeedConfig` subclass instead. That subclass has logic to sync the configuration
    with values of [`TrainingArguments`] by replacing special placeholder values: `"auto"`. Without this special logic
    the DeepSpeed configuration is not modified in any way.

    Args:
        config_file_or_dict (`Union[str, Dict]`): path to DeepSpeed config file or dict.

    c                    s(   t |  td td t | d S )NZ
accelerater	   )set_hf_deepspeed_configr   super__init__selfZconfig_file_or_dict	__class__r   r   r   J   s   zHfDeepSpeedConfig.__init__)__name__
__module____qualname____doc__r   __classcell__r   r   r   r   r   9   s    r   c                       sX   e Zd ZdZ fddZdd Zdd Zdd
dZeeddZ	dddZ
dd Z  ZS )HfTrainerDeepSpeedConfigz
    The `HfTrainerDeepSpeedConfig` object is meant to be created during `TrainingArguments` object creation and has the
    same lifespan as the latter.
    c                    s   t  | d | _g | _d S N)r   r   _dtype
mismatchesr   r   r   r   r   X   s   
z!HfTrainerDeepSpeedConfig.__init__c                 C   s   | j d u r	td| j S )Nz8trainer_config_process() wasn't called yet to tell dtype)r#   
ValueErrorr   r   r   r   dtype]   s   
zHfTrainerDeepSpeedConfig.dtypec                 C   s   |  |}|d u rdS |dkS )NFauto)	get_value)r   ds_key_longvalr   r   r   is_autob   s   
z HfTrainerDeepSpeedConfig.is_autoNTc              
   C   s   |  |\}}|du rdS ||dkr|||< dS |sdS ||}|dur?||krA| jd| d| d| d|  dS dS dS )a  
        A utility method that massages the config file and can optionally verify that the values match.

        1. Replace "auto" values with `TrainingArguments` value.

        2. If it wasn't "auto" and `must_match` is true, then check that DS config matches Trainer
        config values and if mismatched add the entry to `self.mismatched` - will assert during
        `trainer_config_finalize` for one or more mismatches.

        Nr(   z- ds =z vs hf )Zfind_config_nodegetr$   append)r   r*   Zhf_valZhf_key
must_matchconfigZds_keyZds_valr   r   r   
fill_matchi   s   
(z#HfTrainerDeepSpeedConfig.fill_matchF)r0   c                 C   s  |j |j |j }| d|jd|  | d|jd | d|d|  | d|jd | d|jd	 | d
|j|jgd | d|jd | d|j	d | 
dd | d|jd	 |jsc|jrm|jdkrjdnd}nd}|jr| jdi | jd< |j| jd d< | d|js|jo|dkd | d|dkd | d|jd | d|jp|jd | drtj| _dS | drtj| _dS tj| _dS ) z
        Adjust the config with `TrainingArguments` values. This stage is run during `TrainingArguments` object
        creation.
        Ztrain_micro_batch_size_per_gpuper_device_train_batch_sizegradient_accumulation_stepstrain_batch_sizeztrain_batch_size (calculated)Zgradient_clippingmax_grad_normzoptimizer.params.lrlearning_ratezoptimizer.params.betaszadam_beta1+adam_beta2zoptimizer.params.epsadam_epsilonzoptimizer.params.weight_decayweight_decayzscheduler.params.warmup_min_lrr   zscheduler.params.warmup_max_lrZapexampN
checkpointZuse_node_local_storagezfp16.enabledz%fp16|fp16_full_eval+fp16_backend(amp)zamp.enabledzfp16+fp16_backend(apex)zamp.opt_levelfp16_opt_levelzbf16.enabledzbf16|bf16_full_eval)Z
world_sizer3   r4   r2   r6   r7   Z
adam_beta1Z
adam_beta2r8   r9   	fill_onlyZfp16Zfp16_full_evalfp16_backendZsave_on_each_noder1   r.   r<   Zbf16Zbf16_full_evalZis_truetorchZbfloat16r#   Zis_falseZfloat32Zfloat16)r   argsZauto_find_batch_sizer5   r>   r   r   r   trainer_config_process   s`   


z/HfTrainerDeepSpeedConfig.trainer_config_processc                    sF  g d} fdd|D }t |dkr{t|jdr|jj}n=t|jdr+t|jj}n0t|jdr>t|jjdr>|jjj}nt|jdrSt|jjdrSt|jjj}ntd| d	 d
||   	 r{ dt
d| |   dd|   d|d  d||d t  jdkrd j}td| ddS )z
        This stage is run after we have the model and know num_training_steps.

        Now we can complete the configuration process.
        )$zero_optimization.reduce_bucket_size-zero_optimization.stage3_prefetch_bucket_size4zero_optimization.stage3_param_persistence_thresholdc                    s   g | ]	}  |r|qS r   )r,   ).0xr&   r   r   
<listcomp>       zDHfTrainerDeepSpeedConfig.trainer_config_finalize.<locals>.<listcomp>r   hidden_sizehidden_sizestext_configzThe model's config file has neither `hidden_size` nor `hidden_sizes` entry, therefore it's not possible to automatically fill out the following `auto` entries in the DeepSpeed config file: zb. You can fix that by replacing `auto` values for these keys with an integer value of your choice.rB   rC   g?rD   
   z scheduler.params.total_num_stepsznum_training_steps (calculated)z!scheduler.params.warmup_num_stepsZwarmup_steps
z]Please correct the following DeepSpeed config values that mismatch TrainingArguments values:
zF
The easiest method is to set these DeepSpeed config values to 'auto'.N)lenhasattrr1   rI   maxrJ   rK   r%   r=   is_zero3intr2   Zget_warmup_stepsr$   join)r   r@   modelnum_training_stepsZhidden_size_based_keysZhidden_size_auto_keysrI   r$   r   r&   r   trainer_config_finalize   sX   	
z0HfTrainerDeepSpeedConfig.trainer_config_finalize)NTF)r   r   r   r   r   r'   r,   r2   r   r=   rA   rV   r    r   r   r   r   r!   R   s    

Jr!   c                 C   s   t | ad S r"   )weakrefref_hf_deepspeed_config_weak_ref)Zhf_deepspeed_config_objr   r   r   r     s   r   c                   C   s   d a d S r"   )rZ   r   r   r   r   unset_hf_deepspeed_config  s   r[   c                   C   s    t d urt  d urt   S dS )NF)rZ   rQ   r   r   r   r   is_deepspeed_zero3_enabled$  s   
r\   c                   C   s   t d urt  d urt  jS d S r"   )rZ   r1   r   r   r   r   deepspeed_config+  s   r]   c                    sR   t |dd| }dur|_g  d	dtjf fdd| |dd  S )
z
    Loads state dict into a model specifically for Zero3, since DeepSpeed does not support the `transformers`
    tensor parallelism API.

    Nearly identical code to PyTorch's `_load_from_state_dict`
    	_metadataN Fmodulec           
         s  d u ri n	 d d i }||d< ||dg g f}t rstfdd|D dkrsdd l}t| jd d dd  fd	d| D }t|dkrs|jj|dd
 t	j
 dkrd| j|  W d    n1 snw   Y  | j D ]\}}	|	d ur|	|| d | qxd S )Nassign_to_params_buffersTc                    s   g | ]	}|  r|qS r   )
startswith)rE   key)prefixr   r   rG   J  rH   zC_load_state_dict_into_zero3_model.<locals>.load.<locals>.<listcomp>r   F)re   recursec                    s   g | ]
}| v r | qS r   r   )rE   k)named_parametersr   r   rG   P  s    )Zmodifier_rank.)r.   r\   rN   r	   dictrh   keyszeroZGatheredParametersr?   distributedZget_rankZ_load_from_state_dictZ_modulesitems)
r`   
state_dictre   rb   Zlocal_metadatar@   r	   Zparams_to_gathernamechild
error_msgsloadr   )rh   re   r   rt   C  s$     
z/_load_state_dict_into_zero3_model.<locals>.load)rb   )r_   F)getattrcopyr^   r   Module)Zmodel_to_loadro   r   rr   r   !_load_state_dict_into_zero3_model2  s   rx   c                    s   ddl m}m} |j}d}d|v r|jrtd||d}n| r'td 	 }d|d	< d}	d
|v r=||}	||	fS t
||rS fdd}
|||
d}	||	fS j |d}	||	fS )zY
    A convenience wrapper that deals with optimizer and lr scheduler configuration.
    r   )
DummyOptimDummySchedulerN	optimizerz|--adafactor was passed, but also found `optimizer` configured in the DeepSpeed config. Only one optimizer can be configured.)paramszDetected ZeRO Offload and non-DeepSpeed optimizers: This combination should work as long as the custom optimizer has both CPU and GPU implementation (except LAMB)TZzero_allow_untested_optimizerZ	schedulerc                    s"   t  }d |_|j | d}|S )NrU   r{   )rv   lr_schedulercreate_scheduler)r{   Ztrainer_copyr~   rU   trainerr   r   _lr_scheduler_callable  s   
z5deepspeed_optim_sched.<locals>._lr_scheduler_callable)Zlr_scheduler_callabler}   )Zaccelerate.utilsry   rz   r1   Z	adafactorr%   Z
is_offloadloggerinfoZcreate_optimizer
isinstancer   )r   hf_deepspeed_configr@   rU   model_parametersry   rz   r1   r{   r~   r   r   r   r   deepspeed_optim_schedb  s2   

r   Fc                 C   s   ddl m} | j}| j}| jjjj}|||| |	|
  |r>| s*td|d |d d\}}d}	||fS d| _|jdi d	d}
|
d
kr^ddl}|j||
| d}ttdd | }	t| ||||	\}}||fS )a  
    Init DeepSpeed, after updating the DeepSpeed configuration with any relevant Trainer's args.

    If `resume_from_checkpoint` was passed then an attempt to resume from a previously saved checkpoint will be made.

    Args:
        trainer: Trainer object
        num_training_steps: per single gpu
        resume_from_checkpoint: path to a checkpoint if to resume from after normal DeepSpeedEngine load
        inference: launch in inference mode (no optimizer and no lr scheduler)
        auto_find_batch_size: whether to ignore the `train_micro_batch_size_per_gpu` argument as it's being
            set automatically by the auto batch size finder

    Returns: optimizer, lr_scheduler

    We may use `deepspeed_init` more than once during the life of Trainer, when we do - it's a temp hack based on:
    https://github.com/deepspeedai/DeepSpeed/issues/1394#issuecomment-937405374 until Deepspeed fixes a bug where it
    can't resume from a checkpoint after it did some stepping https://github.com/deepspeedai/DeepSpeed/issues/1612

    r   )r   zMZeRO inference only makes sense with ZeRO Stage 3 - please adjust your configr{   r~   )NNNZtensor_parallelZautotp_size   )rT   tp_sizer'   c                 S   s   | j S r"   )Zrequires_grad)pr   r   r   <lambda>  s    z deepspeed_init.<locals>.<lambda>)Zdeepspeed.utilsr   rT   r@   ZacceleratorstateZdeepspeed_pluginZhf_ds_configrV   setLevelZget_process_log_levelrQ   r%   Zdel_config_sub_treer{   r1   r.   r	   Ztp_model_initr'   listfilter
parametersr   )r   rU   Z	inferenceZ	ds_loggerrT   r@   r   r{   r~   r   r   r	   r   r   r   deepspeed_init  s0   


r   Tc                 C   sv   dd l }t| | d}t|dkr4td|  | j||ddd\}}|d u r2td| d S td| )Nr   z/global_step*zAttempting to resume from T)load_module_strictZload_optimizer_statesZload_lr_scheduler_statesz-[deepspeed] failed to resume from checkpoint z!Can't find a valid checkpoint at )globsortedrN   r   r   Zload_checkpointr%   )Zdeepspeed_engineZcheckpoint_pathr   r   Zdeepspeed_checkpoint_dirsZ	load_pathr   r   r   r   deepspeed_load_checkpoint  s   
r   rW   )T)%r   rv   importlib.metadatar   r   importlib.utilr
   rX   	functoolsr   Zdependency_versions_checkr   utilsr   r   r   r?   r   Z
get_loggerr   r   r   Zaccelerate.utils.deepspeedr   ZDeepSpeedConfigbuiltinsr   r!   rZ   r   r[   r\   r]   rx   r   r   r   r   r   r   r   <module>   s8   
 B0
=>