a
    hzB                     @   s  U d Z ddlZddlZddlmZ ddlmZmZ ddlm	Z	 ddl
Z
ddlZ
ddlmZ ddlmZ ddlmZ d	d
lmZmZ d	dlmZmZmZ eeef ZeeZdae	e eee f  e!d< e e"e
j#j$f dddZ%e
j#j&eeeee  e'e e"e eee f e eee f dd	ddZ(ee eee f dddZ)e'e e'e dddZ*ee+e ddddZ,e
j#j&ee'ee  dddZ-e
j#j&e'e ddd d!Z.ee'e ee
j#j&e'ee  f d"d#d$Z/e
j#j&e eee f dd%d&d'Z0e
j#j&e eee f e eee f d(d)d*Z1e
j#j&e eee f dd%d+d,Z2e eee f e eee f dd-d.d/Z3ee'e e'e e4d0d1d2Z5dS )3a"  
This module implements graph deduplication functionality for TorchDynamo's optimization pipeline.
Graph deduplication identifies identical subgraphs in the computational graph and merges them
to reduce redundancy and improve performance. The process involves analyzing regions of the graph,
identifying structurally equivalent regions, and replacing them with a single shared implementation.
This optimization is particularly effective for models with repeated patterns or similar computational
structures across different parts of the network.
    N)defaultdict)	GeneratorIterable)Optional)config)StorageWeakRef)
OrderedSet   )NodeRegion)_detect_cycles_get_flat_args_get_flat_args_uniquelast_node_to_additional_deps)returnc                 C   s   | j | j}| j j}t| j| j j}i }|D ]}t|}|d }t||\}}	t|	sZq.tj	
| j|}
| d|
}|
||< | j " | jd|di }W d   n1 s0    Y  |D ]}t| j|||	|||| qq.|at| j| |S )a  
    This is the main entry point for applying the graph deduplication pass. Deduplication occurs in two phases:
    1. Subgraph creation:
        Subgraph creation works by taking one representative region from each region group and creating a subgraph from it, which will then be used to replace all regions in the group. This is implemented by first copying all nodes of the region to the new subgraph and then finding all inputs which are not within the region and creating placeholders for them. For the outputs, all regions in a region group need to be scanned to ensure the largest set of outputs is found, and then an output node is created which returns a tuple of all outputs.

    2. Graph replacement:
        To replace each region with the extracted subgraph, the node index in the region and argument index within the node's flattened args and kwargs are recorded once during subgraph creation. This allows us to determine which (external to the region) nodes and in which order these nodes are passed as inputs. For the outputs, getitem nodes are created for each output, and all nodes in the region with external outputs are replaced by the proper getitem node. Finally, all original nodes are erased (there should be no uses of these left in the graph).

The deduplication mutates the output_graph argument in place.

Returns a mapping of nodes to their subgraph output replacement node to remap outputs
when they are created in output_graph.
    r   subgraphZget_attr N)Zregion_trackerZget_identical_regionsgraphnode_to_mutated_arg_positions_populate_additional_deps_get_all_output_indices_create_subgraphlisttorchfxGraphModuleZ
nn_modulesZinstall_subgraphZinserting_beforecreate_node_replace_region_with_subgraphr   _stable_topological_sort)Zoutput_graphZduplicated_region_groupsr   node_to_additional_depsZsub_gmsZregion_groupinds_with_external_usersregionr   external_node_usagesZsub_gmsubgraph_nameget_subgraph_noder   r   O/var/www/auris/lib/python3.9/site-packages/torch/_dynamo/graph_deduplication.pyapply_graph_deduplication%   sT    
"r&   )	r   r!   r$   r"   r    r#   r   r   r   c              
   C   s|  g }|D ]r}	t t|	\}
}||
 }t|i }|	D ]8\}}|| }||v r2||| v r2td|   d S q2|||  qt|||rd S ||g|R }| dtj	j
j|i }t|D ]4\}}|| }| dtj||fi }|j|dd qt|D ]X}| | ||d  | D ]4}z|| || W n tyH   Y n0 qqtjrxtt| | t| | |   d S )Nz3NYI: Failed to substitute region %s due to mutationcall_functionT)Zpropagate_meta)nextiterr   logdebugappend_has_aliasingr   r   opsZhigher_orderZinvoke_subgraph	enumerateoperatorgetitemZreplace_all_uses_withreversedZ
erase_nodepopvaluesremoveaddKeyErrorr   Zgraph_deduplication_lintprintr   r   Zlint)r   r!   r$   r"   r    r#   r   r   Zsub_argsZusagesnode_indZ	usage_indnodeflattened_args_kwargsZuser_indZnode_usage_induserZinvoke_argsZinvoke_subgraph_nodeindZexternal_user_indZsubgraph_outputdepsr   r   r%   r   x   sT    





r   )r!   r   c                 C   sr   t ttt f t}t| }t| D ]H\}}t|i }t|D ],\}}t|tr>||vr>|| ||f q>q$|S N)	r   r
   r   
UsageIndexsetr/   r   
isinstancer6   )r!   Zexternal_node_to_usagesZregion_uniquer9   r:   r;   Zarg_indZin_noder   r   r%   _get_external_inputs   s    
rC   )regionsr   c                 C   s"   t  }| D ]}t|| q
t|S r?   )rA   _get_inds_with_external_userssorted)rD   r    r!   r   r   r%   r      s    r   )r!   inds_uniquer   c                 C   s<   t | D ].\}}|jD ]}|| vr||vr|| qqd S r?   )r/   usersr6   )r!   rG   r=   r:   r<   r   r   r%   rE      s
    
rE   )r   r!   r   c                    s   t |}ttt   }i | D ],\}}| d|j }||< || q"ttdfdd |D ] }| 	| fdd}||< qh|S )NZsubgraph_input_)r:   r   c                    s   |  v r |  S | S d S r?   r   )r:   )region_to_subgraph_noder   r%   map_arg   s    z-_copy_nodes_and_remap_inputs.<locals>.map_argc                    s    | S r?   r   )old)rJ   r   r%   <lambda>       z._copy_nodes_and_remap_inputs.<locals>.<lambda>)
rC   r   r   r@   itemsplaceholdernamer,   r
   Z	node_copy)r   r!   Zexternal_input_to_usagesr"   r:   Zusage_indicesrO   Zsubgraph_noder   )rJ   rI   r%   _copy_nodes_and_remap_inputs   s    
rQ   )r   inds_to_outputr   c                    s4   dd | j D  t fdd|D }| | d S )Nc                 S   s   g | ]}|j d vr|qS ))rO   output)op.0nr   r   r%   
<listcomp>   rM   z,_create_subgraph_outputs.<locals>.<listcomp>c                 3   s   | ]} | V  qd S r?   r   )rV   r=   Z	node_listr   r%   	<genexpr>   rM   z+_create_subgraph_outputs.<locals>.<genexpr>)nodestuplerS   )r   rR   Zout_tupr   rY   r%   _create_subgraph_outputs   s    r]   )r!   r    r   c                 C   s&   t j }t|| }t|| ||fS r?   )r   r   GraphrQ   r]   )r!   r    r   r"   r   r   r%   r      s    


r   )r   r   r   c                    s   t t| j}tt   tt }tt  }d }|r| }|jdkr^|| |j	r.J dq. fddt
||D }|r||d  | q. | |r|j|ur|| |}|t||d q. | |st t| jksJ d S )NrS   z!output nodes should have no usersc                    s   g | ]}| vr|qS r   r   )rV   xreadyr   r%   rX   (  s   z,_stable_topological_sort.<locals>.<listcomp>r   )r   r2   r[   r   r
   r   r3   targetr6   rH   r   r,   r(   extendupdatelen)r   r   pendingZwaitingoutputscursorr:   Zwaiting_forr   r`   r%   r   	  s.    







r   )r   r   r   c                 C   s    t t}t|| t| | |S r?   )r   r   _add_mutation_dependencies_add_global_state_dependencies)r   r   r   r   r   r%   r   >  s    

r   c                    s   dd l }t| j}|jj|jjh}g }tt tttt tf d d f ddd}||D ]n\}}t	|i   fdd|D }	|	r|| }
|

|	 |j|v rX|| }
|

 fdd|D  || qXd S )	Nr   )	all_nodesr   c                 s   s6   g }t t| }|r2| }||fV  || qd S r?   )r   r2   r3   r,   )rl   
prev_nodesZ
next_nodescur_noder   r   r%   prev_cur_nodesR  s    
z6_add_global_state_dependencies.<locals>.prev_cur_nodesc                    s   g | ]}| vr|qS r   r   rU   Zargs_uniquer   r%   rX   _  rM   z2_add_global_state_dependencies.<locals>.<listcomp>c                 3   s   | ]}| vr|V  qd S r?   r   rU   rp   r   r%   rZ   g  rM   z1_add_global_state_dependencies.<locals>.<genexpr>)Z	torch.ampr   r[   ampZ_enter_autocastZ_exit_autocastr
   r   r\   r   re   rc   r,   )r   r   r   rl   Zglobal_state_targetsZall_nodes_dep_onro   rm   rn   Znew_depsZadditional_depsr   rp   r%   rk   G  s"    



rk   )r   r   r   c                 C   sx   |   D ]j\}}t|i }|D ]R}|| }|jD ]>}||u r@q0q0||k rX|| | q0||kr0|| | q0qqd S r?   )rN   r   rH   r6   )r   r   r:   indicesZflat_args_kwargsindexZmutated_argr<   r   r   r%   rj   k  s    

rj   )r!   inputsr    r   c           
         s.  t   |D ]P}|jd }t|tjr
t| }| v rRtd|  | |  dS | |< q
t  |D ]j}| | }|rf|jd }t|t	rJ t|tjrft| }|v rtd| | |  dS ||< qf 
 
 @ }t|dkr* fdd|D }	dd	d |	D }	td
| |	 dS dS )Nexample_valuezYNYI: Failed to substitute region %s due to input-output aliasing detected at nodes %s, %sTzZNYI: Failed to substitute region %s due to output-output aliasing detected at nodes %s, %sr   c                    s   g | ]} | | fqS r   r   )rV   sZinput_storagesZoutput_storagesr   r%   rX     s   z!_has_aliasing.<locals>.<listcomp>z, c                 S   s   g | ]\}}| d | qS )z and r   )rV   ior   r   r%   rX     rM   zUNYI: Failed to substitute region %s due to input-output aliasing detected at nodes %sF)dictmetarB   r   ZTensorr   Z_typed_storager*   r+   r   keysrf   join)
r!   rt   r    r:   ru   Zstoragerx   Zout_nodeZintersected_storagesaliasedr   rw   r%   r-     sX    



r-   )6__doc__loggingr0   collectionsr   collections.abcr   r   typingr   r   Ztorch.fxZtorch._dynamor   Z torch.multiprocessing.reductionsr   Ztorch.utils._ordered_setr   Zgraph_region_trackerr
   r   Zgraph_utilsr   r   r   r\   intr@   	getLogger__name__r*   r   rz   __annotations__strr   r   r&   r^   r   r   rC   r   rA   rE   rQ   r]   r   r   r   rk   rj   boolr-   r   r   r   r%   <module>   st   	
T
D
	
	6
%