o
    Zh                     @   s  d dl Z d dlZd dlZd dlmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZmZmZmZ d dlmZ d dlmZmZmZmZ dd	 Z	dd
ejdedededejdeej defddZd
ejdededefddZd
edee dejfddZdS )    N)Optional)_get_device_module)distributed_c10d)ShardShardedTensorShardedTensorMetadataTensorPropertiesShardMetadata)
DeviceMeshDTensor	Replicater   c                 C   s`   |  dkrd|  d| S |  dkr#d|  d| dt|  S d|  d| d| |  S )Ncpuzrank:/Zhpu:)lowerr   Zcurrent_device)rankdevice_typenum_devices_per_node r   R/var/www/auris/lib/python3.10/site-packages/torch/distributed/fsdp/_shard_utils.py_get_remote_device_str   s
   r   tensorr   
world_sizer   pgdevicereturnc              
      sx  | j |dd}t||kr4||  }dd |  D t|  d | | d< t||g}ng }dd |D }	dgtt	
dd |	D dd  }
dgt|	d d	  fd
d|
D }|du rntjn|j  fddtt|	D }t|	t|  krt|ksJ  J dd t||	|D }t||  t| j| jdtj|  dd}tj||dS )z
    Shard a tensor to chunks along the first dimension. The local rank will gets its
    corresponding chunk as the local shard to create a ShardedTensor.
    r   )dimc                 S   s   g | ]}d qS r   r   .0_r   r   r   
<listcomp>-   s    z0_create_chunk_sharded_tensor.<locals>.<listcomp>c                 S   s   g | ]}t | qS r   )listsize)r    chunkr   r   r   r"   4   s    c                 S   s   g | ]}|d  qS r   r   )r    
chunk_sizer   r   r   r"   6   s    N   c                    s   g | ]}|g  qS r   r   )r    Zd0)offsetsr   r   r"   9   s    c                    s    g | ]}t t| qS r   )r   distZget_global_rank)r    r)r   r   r   r   r   r"   ?   s    
c                 S   s   g | ]\}}}t |||qS r   r	   )r    offsetr$   Z	placementr   r   r   r"   H   s    
F)dtypelayoutZrequires_gradZmemory_formatZ
pin_memory)Zshards_metadatar$   Ztensor_properties)sharded_tensor_metadataZprocess_group)r%   lencloner$   mathceilr   Zfrom_tensor_and_offsetsr#   	itertools
accumulater   Z_get_pg_default_devicetyperangezipr   r   r-   r.   torchZcontiguous_format	is_pinnedr   Z+_init_from_local_shards_and_global_metadata)r   r   r   r   r   r   chunksZlocal_shardZlocal_shardsZchunk_sizesZdim0_offsetsZchunk_offsets
placementsZshard_metadatar/   r   )r   r   r)   r   r   _create_chunk_sharded_tensor   sP   
(
r=   device_meshc                 C   sZ   |    } dd t|jD }dd t|jD }td|d< tj| ||ddj|dS )	z
    Shard a tensor to chunks along the first dimension. The local rank will gets its
    corresponding chunk as the local tensor to create a DTensor.
    c                 S      g | ]}t  qS r   r   r   r   r   r   r"   j       z)_create_chunk_dtensor.<locals>.<listcomp>c                 S   r?   r   r@   r   r   r   r   r"   k   rA   r   r'   F)Z	run_check)r<   )detachr1   r7   ndimDShardr   Z
from_localredistribute)r   r   r>   Zreplicate_placementsZshard_placementsr   r   r   _create_chunk_dtensor\   s   
rF   	root_meshc                 C   sD   || j ks	J dtt| j}t |d< | j| j |d} |  S )zT
    All gather a DTensor in its sharded dimension and return the local tensor.
    z2The device mesh of a tensor should be a root mesh.r'   )r>   r<   )r>   r#   copydeepcopyr<   r   rE   Zto_local)r   rG   r<   r   r   r   _all_gather_dtensoru   s   
rJ   )N) rH   r4   r2   typingr   r9   Ztorch.distributeddistributedr*   Ztorch._utilsr   r   Z'torch.distributed._shard.sharded_tensorr   r   r   r   Z&torch.distributed._shard.sharding_specr
   Ztorch.distributed.tensorr   r   r   rD   r   ZTensorintZProcessGroupr   r=   rF   rJ   r   r   r   r   <module>   sX   
>
