o
    ‡ZŽh\  ã                   @   s°  U d dl mZmZ d dlmZmZ d dlZd dlmZ	 d dl
mZ d dlmZ d dlmZmZmZ d dlmZmZ G dd	„ d	eƒZdaee ed
< deddfdd„Z	d$dejdee deejee f fdd„Z	d$dejdedee dejfdd„Z	d$dejdededede	jdee dejfdd„Z	d$dejdededee dejf
dd„Z 	d$dejdee deeje!e f fdd „Z"	d$ded!ee dee dejfd"d#„Z#dS )%é    )ÚABCÚabstractmethod)ÚAnyÚOptionalN)ÚShardedTensor)ÚShard)Ú_all_gather_dtensorÚ_create_chunk_dtensorÚ_create_chunk_sharded_tensor)Ú
DeviceMeshÚDTensorc                   @   sö   e Zd ZdZedejdeejee	 f fdd„ƒZ
edejde	dejfdd„ƒZe		ddejd
edededejdeej dejfdd„ƒZedejd
ededejfdd„ƒZedejdeejee f fdd„ƒZededee dejfdd„ƒZd	S )ÚFSDPExtensionszä
    This enables some customizable hooks to enable composability with tensor
    parallelism. To activate these hooks, use :func:`_set_fsdp_extensions` to
    set a custom :class:`FSDPExtensions` that implements the hooks.
    ÚtensorÚreturnc                 C   ó   dS )z6E.g. converting ``DistributedTensor`` to local tensor.N© ©Úselfr   r   r   úV/var/www/auris/lib/python3.10/site-packages/torch/distributed/fsdp/_fsdp_extensions.pyÚpre_flatten_transform   s   z$FSDPExtensions.pre_flatten_transformÚparam_extensionc                 C   r   )z6E.g. converting local tensor to ``DistributedTensor``.Nr   )r   r   r   r   r   r   Úpost_unflatten_transform   s   z'FSDPExtensions.post_unflatten_transformNÚrankÚ
world_sizeÚnum_devices_per_nodeÚpgÚdevicec                 C   r   )z6Shards a tensor to chunks and returns the local chunk.Nr   )r   r   r   r   r   r   r   r   r   r   Úchunk_tensor(   ó   zFSDPExtensions.chunk_tensorÚdevice_meshc                 C   r   )zAShards a tensor/DTensor to DTensor and returns the local DTensor.Nr   )r   r   r   r   r   r   r   Úchunk_dtensor5   s   zFSDPExtensions.chunk_dtensorc                 C   r   )z£
        This is to be called before loading a *sharded* model state dict and
        should return the tensor and list of shards from which to load data.
        Nr   r   r   r   r   Úpre_load_state_dict_transform?   s   	z,FSDPExtensions.pre_load_state_dict_transformÚparent_meshc                 C   r   )z³
        This is to be called before loading a *sharded* DTensor state dict.
        This gathers tensor in FSDP dimension and returns local tensor of
        TP DTensor.
        Nr   )r   r   r"   r   r   r   Úall_gather_dtensorJ   r   z!FSDPExtensions.all_gather_dtensor©N)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   ÚtorchÚTensorÚtupler   r   r   r   ÚintÚdistÚProcessGroupr   r   r   r    Úlistr   r!   r   r#   r   r   r   r   r      sr    þýþýüùþýüûúùøþýüû	þý
þýür   Ú_extensionsÚ	flattenerr   c                 C   s   | a d S r$   )r0   )r1   r   r   r   Ú_set_fsdp_extensions[   s   r2   r   Úfsdp_extensionc                 C   s.   |d ur|  | ¡\}}|d ur||fS | d fS r$   )r   )r   r3   Z
new_tensorr   r   r   r   Ú_ext_pre_flatten_transform`   s
   r4   r   c                 C   s    |d ur|d ur|  | |¡S | S r$   )r   )r   r   r3   r   r   r   Ú_ext_post_unflatten_transformk   s   r5   r   r   r   r   c                 C   s"   |d ur|j nt}|| ||||ƒS r$   )r   r
   )r   r   r   r   r   r3   Zchunk_tensor_fnr   r   r   Ú_ext_chunk_tensoru   s   
ÿýûr6   r   c                 C   s   |d ur|j nt}|| ||ƒS r$   )r    r	   )r   r   r   r3   Zchunk_dtensor_fnr   r   r   Ú_ext_chunk_dtensor‹   s   ÿýýr7   c                 C   s2   |d ur	|  | ¡S t| ƒtu sJ ‚|  ¡ }| |fS r$   )r!   Útyper   Zlocal_shards)r   r3   Zshardsr   r   r   Ú"_ext_pre_load_state_dict_transform   s
   
r9   r"   c                 C   s   |d ur|j nt}|| |ƒS r$   )r#   r   )r   r"   r3   Zall_gather_dtensor_fnr   r   r   Ú_ext_all_gather_dtensor©   s
   ÿý
r:   r$   )$Úabcr   r   Útypingr   r   r)   Ztorch.distributedÚdistributedr-   Z+torch.distributed._shard.sharded_tensor.apir   Z-torch.distributed._shard.sharded_tensor.shardr   Z#torch.distributed.fsdp._shard_utilsr   r	   r
   Ztorch.distributed.tensorr   r   r   r0   Ú__annotations__r2   r*   r+   r4   r5   r,   r.   r6   r7   r/   r9   r:   r   r   r   r   Ú<module>   s–    Hþÿþ
ýýÿþý
üúÿþýüûú
ùüÿþýü
ûþÿþ
ýýÿþýü