o
    Zh0                     @   s$  d dl mZ d dlmZ d dlmZmZ G dd dZG dd deZG dd	 d	eZ	G d
d deZ
G dd deZG dd deZdedee defddZdedeee	f de
fddZdee deee	f deee
f fddZdededefdd Zdee d!eee
f defd"d#Zd$S )%    )Enum)
NamedTuple)map_argNodec                   @   sB   e Zd ZdZdeddfddZdd Zd	d
 Zdd Zdd Z	dS )	PartitionzPartition class contains all the information about an individual partition.
    It also provides necessary methods for manipulation the partition.
    partition_idreturnNc                 C   s4   t  | _|| _t  | _t  | _d| _d| _g | _d S )Nr   )setnodesr   parentschildrenZ	bfs_levelused_mem_byteslogical_device_ids)selfr    r   V/var/www/auris/lib/python3.10/site-packages/torch/fx/experimental/partitioner_utils.py__init__   s   
zPartition.__init__c                 C   s
   t | jS N)strr   r   r   r   r   __str__   s   
zPartition.__str__c                 C   s,   d| _ | jD ]}|  j t|| j7  _ qd S )Nr   )r   r   get_extra_size_of)r   noder   r   r   recalculate_mem_size   s   
zPartition.recalculate_mem_sizec                 C   sX   i }t |j|j t |j|j |D ]}|jdv r| j| q| j| |   d S )N>   get_attrplaceholder)r   args
setdefaultkwargsopr   addr   )r   r   input_nodesnr   r   r   add_node   s   
zPartition.add_nodec                    s~   | j v r= j | i }t|j|j t|j|j |D ]}t fdd|jD r6|jdv r6 j | q 	  d S d S )Nc                 3   s    | ]}| j vV  qd S r   )r   .0r#   r   r   r   	<genexpr>5   s    

z(Partition.remove_node.<locals>.<genexpr>>   r   r   )
r   remover   r   r   r   allusersr    r   )r   r   r"   Z
input_noder   r   r   remove_node)   s   

zPartition.remove_node)
__name__
__module____qualname____doc__intr   r   r   r$   r+   r   r   r   r   r      s    	r   c                   @   s&   e Zd ZU eed< eed< eed< dS )DevicenameZavailable_mem_bytesZ
logical_idN)r,   r-   r.   r   __annotations__r0   r   r   r   r   r1   <   s   
 r1   c                   @   s   e Zd ZU eed< eed< dS )NodeLatencymem_latency_seccomputer_latency_secNr,   r-   r.   floatr3   r   r   r   r   r4   B   s   
 r4   c                   @   s&   e Zd ZU eed< eed< eed< dS )PartitionLatencyr5   r6   overall_latency_secNr7   r   r   r   r   r9   I   s   
 r9   c                   @   s    e Zd ZdZdZdZdZdZdS )PartitionModer               N)r,   r-   r.   
size_basedZ	sparse_nnZ
cost_awareZkl_basedZ	aot_basedr   r   r   r   r;   R   s    r;   c                   @   s   e Zd ZU ee ed< ejZeed< dZ	e
ed< i Zeeef ed< i Zeeef ed< i Zeeee f ed< dZeed	< d
S )PartitionerConfigZdevicesmode        transfer_rate_bytes_per_secnode_to_latency_mappingnode_to_partition_mapping#partition_to_logical_device_mappingFsaturate_hostN)r,   r-   r.   listr1   r3   r;   r@   rB   rD   r8   rE   dictr   r4   rF   r0   rG   rH   boolr   r   r   r   rA   Z   s   
 rA   r   r   r   c                 C   s   i }t | j|j t | j|j d}|D ]}||vr,t|dd}|r(||j7 }qtdqt| dd}|r<||j7 }|S td)zGiven a node and a set of nodes,
    this function return the extra size that needed
    if this node is included in this set.
    r   
size_bytesNznode has no size_bytes attr)r   r   r   r   getattroutput_sizeRuntimeError
total_size)r   r   r"   Ztotal_size_of_input_nodesr#   rL   r   r   r   r   e   s    
r   	partitionrE   c                    sx   dt dtt fdd}dtdtf fdd |}tdddd	}|D ]} |tdddd	}|j|jkr9|}q%|S )
zVGiven a partition and its nodes' latency, return a PartitionLatency for this partitionrQ   r   c                    s`   g } j D ](}|jdv rqi }t|j|j t|j|j t fdd|D s-|| q|S )z>Given a partition, return a list of nodes on the top bfs level>   r   r   c                 3   s$    | ]}| j v o|jd vV  qdS )>   r   r   N)r   r    r%   rQ   r   r   r'      s
    
zFget_latency_of_one_partition.<locals>.get_top_nodes.<locals>.<genexpr>)r   r    r   r   r   r   anyappend)rQ   	top_nodesr   r"   r   rR   r   get_top_nodes   s   


z3get_latency_of_one_partition.<locals>.get_top_nodesr   c           
         s   |  }|j t|j|j }|j|j }|j|j }t| jj}|rDtdddd}|D ]} |t|||}	|	j |j krA|	}q.|S t|||S )zyGiven a top node of a partition, this function returns
        the latency of the critical path in the partition
        rC   r5   r6   r:   )	r:   maxr6   r5   r
   r*   intersectionr   r9   )
r   partition_latencyZnode_latencyr:   r5   r6   r*   Zmax_latencyr#   Znew_partition_latency
dfs_helperrE   rQ   r   r   r\      s:   

z0get_latency_of_one_partition.<locals>.dfs_helperrC   rW   )r   rI   r   r9   r:   )rQ   rE   rV   rU   Zcritical_path_latencyr   rZ   r   r[   r   get_latency_of_one_partition   s&   ,r]   
partitionsc                 C   s$   i }| D ]}t ||}|||< q|S )zGiven all the partitions and node_to_latency_mapping dictionary,
    return a mapping dictionary of each partition to its overall latency
    )r]   )r^   rE   partition_to_latency_mappingrQ   rZ   r   r   r    get_partition_to_latency_mapping   s   
r`   parent_partitionchild_partitionrD   c           	      C   s   | j g kr|j g kr| j |j krdS d}t }|jD ]4}i }t|j|j t|j|j |D ]}|| jv rM||vrMt|dd}|durH||j7 }|	| q.q|| S )zfGiven two partitions (parent and child),
    calculate the communication latency between the two.
    rC   r   rL   N)
r   r
   r   r   r   r   r   rM   rN   r!   )	ra   rb   rD   Z	comm_sizeZvisited_nodesr   r"   r#   rL   r   r   r   get_comm_latency_between   s&   





rc   r_   c                    sh   dt dtdtf fdd dtt  dtt  fdd}|| }d	}|D ]} |d	}||kr1|}q$|S )
zGiven all partitions in a graph, find the critical path among all partitions
    and return its latency as the latency of the whole graph
    rQ   latency_so_far_secr   c                    sR   ||  j 7 }| jr'd}| jD ]}t| |} ||| }||kr$|}q|S |S )zJThis function helps to recursively get the latency of a path of partitionsrC   )r:   r   rc   )rQ   rd   Zmax_latency_secchildZcomm_latency_secZnew_latency_secr\   r_   rD   r   r   r\     s&   
z4get_latency_of_partitioned_graph.<locals>.dfs_helperr^   c                 S   s   dd | D }|S )zvThis function is to return all the partitions without parents
        as the starting points of all the paths
        c                 S   s   g | ]}t |jd kr|qS )r   )lenr   )r&   rQ   r   r   r   
<listcomp>2  s    zPget_latency_of_partitioned_graph.<locals>.get_top_partitions.<locals>.<listcomp>r   )r^   top_partitionsr   r   r   get_top_partitions-  s   z<get_latency_of_partitioned_graph.<locals>.get_top_partitionsrC   )r   r8   rI   )r^   r_   rD   rj   ri   Zcritical_path_latency_secrQ   Zlatency_secr   rf   r    get_latency_of_partitioned_graph  s   	

rk   N)enumr   typingr   Ztorch.fx.noder   r   r   r1   r4   r9   r;   rA   r
   r0   r   rJ   r]   rI   r`   r8   rc   rk   r   r   r   r   <module>   sL   4	

Y



$
