o
    Zh9                     @   s"  d dl Z d dlmZ d dlmZmZ d dlZd dlm  m	Z
 d dlm  m  mZ d dlmZmZ d dlmZ d dlmZmZmZmZ e eZG dd deZd	ed
edee fddZed	ed
edee fddZddddej dedede!de!dej fddZ"G dd dej#j$Z%dS )    N)cache)cast
NamedTuple)DTensorSpec
TensorMeta)
DeviceMesh)Partial	Placement	ReplicateShardc                   @   s2   e Zd ZU eed< eeef ed< ee ed< dS )_TransformInfomesh_dimsrc_dst_placementslogical_shapeN)__name__
__module____qualname__int__annotations__tupler	   list r   r   U/var/www/auris/lib/python3.10/site-packages/torch/distributed/tensor/_redistribute.pyr      s   
 r   src_specdst_specreturnc                 C   s  g }| j }| }|dusJ t| j}|g}|jdkr0|td| jd |jd f|d |S t| jD ]<\}}|| }	t	|t
rl||jd k rk|j|d}
||	|j |
|| \}}t|	}|||j< || q5||	 q5t| j}t|j}| jdkrttt|D ]\}|| }|| }t	|t
r|j}g g }}tt||D ] \}\}}||kr n||r|| ||r|| q||krt }||kr|t|||f|| d |||< qtt||D ]\}\}}||kr
|t|||f|| d |||< q|S )a  
    Generate the transform infos from the source placements to the target placements.

    To transform from source to target placement it might have multiple steps, i.e. it
    might decompose Si -> Sj into Si -> R -> Sj.
    This would detect if there're mis-aligned/nested shardings between src/dst placements.
    E.g. Suppose the redistribution to perform is (Shard(0), Shard(0)) -> (Replicate(), Shard(0)),
    in this case Shard(0) -> Shard(0) for mesh dimension 1 actually needs resharding, because in
    the former is a nested-sharding of a tensor already already sharded dimension 0, whereras
    the latter is the first sharding on tensor dimension 0.
    N   r   )r   r   r   r   )device_meshget_coordinater   shapendimappendr   
placements	enumerate
isinstancer   sizeZ_local_shard_size_on_dimdimZ
num_shardsreversedrangelenzipis_shardr
   )r   r   transform_infosr   my_coordinateZinitial_logical_shapeZmesh_dims_to_logical_shapeisrcZcurrent_logical_shapeZmesh_dim_sizeZlocal_shard_size_Znew_logical_shapeZcurrent_placementsZtarget_placementsr   currenttargetZ	shard_dimZcurrent_mesh_shardingZtarget_mesh_shardingspr   r   r   _gen_transform_infos_non_cached   s   














r6   c                 C   s
   t | |S N)r6   )r   r   r   r   r   _gen_transform_infos   s   
r8   Fasync_opis_backwardlocal_tensorcurrent_spectarget_specr:   r;   c                C   sv  |j |j kr
tdd}|j }| }|du r| S tdd |jD p,tdd |jD }|r5t||}	nt||}	|	D ]}
|
j}|
j\}}|j	|d ||krS| }q<t
d||| | r| rptt|}|| ||}n| rtt|}|| |||
j}ntd| d| d	| rtt|}| rtt|}|| |||}no| r|| |||| }n`| sJ d
| tt|}|j|jkr|| |||
j|j}n>| r| rtt|}|s|| ||n| }n$| r|std| d| d	tt|}|| |||
j}n| }|dusJ |} q<|dus+J d|s9t|tjr9| }|S )z
    This redistribute the local tensor (torch.Tensor) from the current DTensorSpec to
    the target DTensorSpec, which involves the necessary collective calls to transform
    the local shard of the DTensor from its current spec to the target spec.
    z)Cross device mesh comm not supported yet!Nc                 s   s    | ]	}t |tjV  qd S r7   )r%   torchZSymInt).0r4   r   r   r   	<genexpr>   s    z,redistribute_local_tensor.<locals>.<genexpr>r   z)redistribute from %s to %s on mesh dim %szredistribute from z to z not supported yetz,Current placement should be shard but found zredistribute failed!)ZmeshNotImplementedErrorr   anyr    r6   r8   r   r   r&   loggerdebugZis_replicate
is_partialr   r   Z_reduce_valuer,   r   Z_to_replicate_tensorr   RuntimeErrorZ_reduce_shard_valueZ_replicate_to_shardr'   Z_to_new_shard_dimZ_partition_valuer%   funcolZAsyncCollectiveTensorwait)r<   r=   r>   r:   r;   Znew_local_tensorr   r.   Zhas_symintsr-   Ztransform_infor/   r2   r3   Zpartial_specZcurrent_placementZtarget_placementZ
shard_specr   r   r   redistribute_local_tensor   s   














rJ   c                
   @   sD   e Zd Ze	ddddedeedf defdd	ZedddZ	dS )RedistributeFinputdtensor.DTensorr   r#   .r:   c           	      C   sb   |j }|| _|| _|j|kr#t|||j jd}|j}t||||d}n|j}|}tj	|||j
dS )Ntensor_meta)r:   requires_grad)_specr=   r:   r#   r   rO   _local_tensorrJ   dtensorDTensorrQ   )	ctxrL   r   r#   r:   r=   r>   r<   outputr   r   r   forward  s$   	

zRedistribute.forwardgrad_outputc                 C   s   | j }|j}| j}|j}t||||dd}g }|jD ]}| r'|t  q|| qt	|j
t|t|j| |jdd}	tj||	|jd}
|
d d d fS )NTr9   )r    stridedtyperN   rP   )r=   rR   r:   rS   rJ   r#   rF   r"   r
   r   r   r   r   r    rZ   r[   rT   rU   rQ   )rV   rY   Zprevious_specr=   r:   r<   rW   Znormalized_placementsZprevious_placementspecZoutput_dtensorr   r   r   backward@  sF   
	zRedistribute.backwardN)F)rY   rM   )
r   r   r   staticmethodr   r   r	   boolrX   r]   r   r   r   r   rK     s    
 rK   )&logging	functoolsr   typingr   r   r?   Z)torch.distributed._functional_collectivesdistributedZ_functional_collectivesrH   Ztorch.distributed.tensor._apiZtensorZ_apirT   Z&torch.distributed.tensor._dtensor_specr   r   Z$torch.distributed.tensor.device_meshr   Z(torch.distributed.tensor.placement_typesr   r	   r
   r   	getLoggerr   rD   r   r   r6   r8   ZTensorr_   rJ   ZautogradFunctionrK   r   r   r   r   <module>   sV   

v
 