o
    Zh$                     @   s  d Z ddlZddlZddlmZ ddlmZ ddlZddl	m
Z
 ddlmZ ddlmZmZ eeZd	eeef fd
dZded	ee fddZdejjdededeeeef  dee dejjdedeeef d	dfddZded	eeeeef f fddZdee d	ee fddZdede e d	dfdd Z!d!ejjded	eeeef ef fd"d#Z"d!ejjd$ee d	dfd%d&Z#dedee d	eejjeeeef ef f fd'd(Z$dS ))a"  
This module implements graph deduplication functionality for TorchDynamo's optimization pipeline.
Graph deduplication identifies identical subgraphs in the computational graph and merges them
to reduce redundancy and improve performance. The process involves analyzing regions of the graph,
identifying structurally equivalent regions, and replacing them with a single shared implementation.
This optimization is particularly effective for models with repeated patterns or similar computational
structures across different parts of the network.
    N)IterableAny)%has_potential_input_alias_or_mutation)tree_flatten   )NodeRegionreturnc                 C   s   | j | j}i }|D ]P}t|}|d }t||\}}tj| j|}| 	d|}	| j
  | jd|	di }
W d   n1 sCw   Y  |D ]}t| j||
| |||	| qJq|S )a  
    This is the main entry point for applying the graph deduplication pass. Deduplication occurs in two phases:
    1. Subgraph creation:
        Subgraph creation works by taking one representative region from each region group and creating a subgraph from it, which will then be used to replace all regions in the group. This is implemented by first copying all nodes of the region to the new subgraph and then finding all inputs which are not within the region and creating placeholders for them. For the outputs, all regions in a region group need to be scanned to ensure the largest set of outputs is found, and then an output node is created which returns a tuple of all outputs.

    2. Graph replacement:
        To replace each region with the extracted subgraph, the node index in the region and argument index within the node's flattened args and kwargs are recorded once during subgraph creation. This allows us to determine which (external to the region) nodes and in which order these nodes are passed as inputs. For the outputs, getitem nodes are created for each output, and all nodes in the region with external outputs are replaced by the proper getitem node. Finally, all original nodes are erased (there should be no uses of these left in the graph).

The deduplication mutates the output_graph argument in place.

Returns a mapping of nodes to their subgraph output replacement node to remap outputs
when they are created in output_graph.
    r   subgraphZget_attr N)Zregion_trackerZget_identical_regionsgraph_get_all_output_indices_create_subgraphtorchfxGraphModuleZ
nn_modulesZinstall_subgraphZinserting_beforecreate_node_replace_region_with_subgraphkeys)Zoutput_graphZduplicated_region_groupsoutput_replacementsZregion_groupinds_with_external_usersregionr   Znode_ind_arg_indssub_gmsubgraph_nameget_subgraph_noder   r   P/var/www/auris/lib/python3.10/site-packages/torch/_dynamo/graph_deduplication.pyapply_graph_deduplication   s>   r   argsc                    s(   g dt dd f fdd  |  S )Nr   r
   c                    sP   t | \}}|D ]}t|tr |j}|j}|j} |||f q| qd S N)r   
isinstanceslicestartstopstepappend)r   Z	flattened_argr"   r#   r$   flattenZfully_flattenedr   r   r)   a   s   
z%_flatten_args_kwargs.<locals>.flattenr   )r   r   r(   r   _flatten_args_kwargs^   s   r*   r   r   r   node_ind_arg_indr   r   r   r   c              	   C   sD  g }|D ]\}	}
||	 }t |j|jf}|||
  q||t|f}dd |D }t||r7td| d S |d }| |X | 	dt
jjj|i }| |+ t|D ]\}}|| }| 	dtj||fi }|||< |j|dd qVW d    n1 sw   Y  t|D ]}| | qW d    d S 1 sw   Y  d S )Nc                 S   s   g | ]}|j d  qS )Zexample_value)meta).0noder   r   r   
<listcomp>   s    z1_replace_region_with_subgraph.<locals>.<listcomp>zBNYI: Failed to substitute region %s due to input alias or mutationZcall_functionT)Zpropagate_meta)r*   r   kwargsr%   tupler   logdebugZinserting_afterr   r   opsZhigher_orderZinvoke_subgraph	enumerateoperatorgetitemZreplace_all_uses_withreversedZ
erase_node)r   r   r   r+   r   r   r   r   Zsub_argsnode_indarg_indr.   flattened_args_kwargsZinvoke_argsZfake_inputsZlatest_region_nodeZinvoke_subgraph_nodeindZexternal_user_indZsubgraph_outputr   r   r   r   q   s@   


"r   c                 C   sl   t  }t| }t| D ](\}}t|j|jf}t|D ]\}}t|tr2||vr2||vr2||f||< qq|S r   )dictsetr6   r*   r   r1   r    r   )r   Zexternal_node_to_indicesZregion_uniquer:   r.   r<   r;   Zin_noder   r   r   _get_external_inputs   s   r@   regionsc                 C   s"   t  }| D ]}t|| qt|S r   )r?   _get_inds_with_external_userssorted)rA   r   r   r   r   r   r      s   r   inds_uniquec                 C   s<   t | D ]\}}|jD ]}|| vr||vr|| qqd S r   )r6   Zusersadd)r   rD   r=   r.   userr   r   r   rB      s   

rB   r   c                    s   t |}i }i | D ]}| d|j }||< || }d ||< qdtdtffdd |D ]}| | fdd}||< q1|S )NZsubgraph_input_r.   r
   c                    s   |  v r |  S | S r   r   )r.   )region_to_subgraph_noder   r   map_arg   s   z-_copy_nodes_and_remap_inputs.<locals>.map_argc                    s    | S r   r   )old)rH   r   r   <lambda>   s    z._copy_nodes_and_remap_inputs.<locals>.<lambda>)r@   r   placeholdernamer   Z	node_copy)r   r   Zexternal_inputs_to_indicesZindices_to_placeholder_indr.   rK   Zarg_indicesZsubgraph_noder   )rH   rG   r   _copy_nodes_and_remap_inputs   s   

rM   inds_to_outputc                    s4   dd | j D  t fdd|D }| | d S )Nc                 S   s   g | ]	}|j d vr|qS ))rK   output)op)r-   nr   r   r   r/      s    z,_create_subgraph_outputs.<locals>.<listcomp>c                 3   s    | ]} | V  qd S r   r   )r-   r=   Z	node_listr   r   	<genexpr>   s    z+_create_subgraph_outputs.<locals>.<genexpr>)nodesr2   rO   )r   rN   Zout_tupr   rR   r   _create_subgraph_outputs   s   rU   c                 C   s&   t j }t|| }t|| ||fS r   )r   r   GraphrM   rU   )r   r   r   Znode_ind_input_indsr   r   r   r      s   


r   )%__doc__loggingr7   collections.abcr   typingr   Ztorch.fxr   Ztorch._higher_order_ops.utilsr   Ztorch.utils._pytreer   Zgraph_region_trackerr   r	   	getLogger__name__r3   r>   r   listr*   r   rV   r2   intr   strr   r@   r   r?   rB   rM   rU   r   r   r   r   r   <module>   sv    	
E
	
-



