o
    Zh                     @   s   d dl Zd dlZd dlZd dlZd dlmZmZ d dlZddl	m
Z
 ddlmZmZmZ eeZdd Ze rEd dlm  mZ e  eG dd	 d	e
ZdS )
    N)	dataclassfield   )TrainingArguments)cached_propertyis_sagemaker_dp_enabledloggingc                  C   s   t dd} zt| } d| vrW dS W n tjy   Y dS w t dd}zt|}|dds5W dS W n tjyA   Y dS w tjdd uS )NZSM_HP_MP_PARAMETERSz{}Z
partitionsFZSM_FRAMEWORK_PARAMSZsagemaker_mpi_enabledsmdistributed)	osgetenvjsonloadsJSONDecodeErrorget	importlibutil	find_spec)Zsmp_optionsZmpi_options r   V/var/www/auris/lib/python3.10/site-packages/transformers/sagemaker/training_args_sm.py%is_sagemaker_model_parallel_available    s&   

r   c                       sl   e Zd ZU edddidZeed<  fddZedd
dZ	e
 fddZe
dd Ze
dd Z  ZS )SageMakerTrainingArguments helpzTUsed by the SageMaker launcher to send mp-specific args. Ignored in SageMakerTrainer)defaultmetadatamp_parametersc                    s   t    tdt d S )Nz~`SageMakerTrainingArguments` is deprecated and will be removed in v5 of Transformers. You can use `TrainingArguments` instead.)super__post_init__warningswarnFutureWarningself	__class__r   r   r   E   s
   
z(SageMakerTrainingArguments.__post_init__returntorch.devicec                 C   s,  t d tj rtj r| jdkrt d | jr%t	d}d| _
ndt r6t }t	d|}d| _
nSt rYdd l}tjjd| jd	 ttd
| _t	d| j}d| _
n0| jdkrqt	tj rgdnd}tj | _
ntj stjjd| jd	 t	d| j}d| _
|jdkrtj| |S )NzPyTorch: setting up devicesztorch.distributed process group is initialized, but local_rank == -1. In order to use Torch DDP, launch your script with `python -m torch.distributed.launchcpur   cuda   Zsmddp)backendtimeoutZSMDATAPARALLEL_LOCAL_RANKzcuda:0Znccl)loggerinfotorchdistributedZis_availableZis_initialized
local_rankwarningZno_cudadeviceZ_n_gpur   smpr   Z,smdistributed.dataparallel.torch.torch_smddpZinit_process_groupZddp_timeout_deltaintr
   r   r)   Zdevice_counttypeZ
set_device)r"   r3   r1   r	   r   r   r   _setup_devicesM   s8   




z)SageMakerTrainingArguments._setup_devicesc                    s   t  rt S t jS N)r   r4   Zdp_sizer   
world_sizer!   r#   r   r   r9   {   s   z%SageMakerTrainingArguments.world_sizec                 C   s   t   S r8   )r   r!   r   r   r   place_model_on_device   s   z0SageMakerTrainingArguments.place_model_on_devicec                 C   s   dS )NFr   r!   r   r   r   !_no_sync_in_gradient_accumulation   s   z<SageMakerTrainingArguments._no_sync_in_gradient_accumulation)r%   r&   )__name__
__module____qualname__r   r   str__annotations__r   r   r7   propertyr9   r:   r;   __classcell__r   r   r#   r   r   >   s   
 -
r   )importlib.utilr   r   r
   r   dataclassesr   r   r/   Ztraining_argsr   utilsr   r   r   Z
get_loggerr<   r-   r   Z!smdistributed.modelparallel.torchZmodelparallelr4   initr   r   r   r   r   <module>   s   
