a
    hE                     @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZmZmZ d dlZd dlmZ d dlmZ d dlm  mZ d dlm  mZ d dlmZmZ d dlmZ d dlm Z m!Z! g d	Z"e#e$e#e#f d
ddZ%ee& ej'e(e#ef dddZ)ej'e(e#ef ejj*dddZ+d2ejj*ejj*dddZ,ej*ej*dddZ-ej*e.ej' e.ej' e.ej' dddZ/ej0ej1ej2ej3ej4ej5ej6ej7ej8ej9ej7ej:ej;gZ<ej=ej>gZ?ej0ej@ej1ejAej2dd iZBe.ej' e(e#ej*f ddd ZCe.ej' e(e#ej*f e(ej*ej*f d!d"d#ZDG d$d% d%ZEd3d(d)ZFeEeGd*d+d,ZHG d-d. d.ZIdejJfejj*ee(e#ef  e&ejJ ejj*d/d0d1ZKdS )4    N)defaultdict)Iterable)Enum)AnycastOptional)ArgumentTarget)	ShapeProp)fuse_conv_bn_evalfuse_linear_bn_eval)matches_module_patternreplace_node_modulefuseremove_dropoutextract_subgraphmodules_to_mkldnnreset_modulesMklSubgraphgen_mkl_autotuneruse_mkl_length	UnionFindoptimize_for_inference)targetreturnc                 C   s&   |  dd^ }}|r|d nd|fS )zp
    Splits a qualname into parent path and last atom.
    For example, `foo.bar.baz` -> (`foo.bar`, `baz`)
    .   r    )rsplit)r   parentname r!   P/var/www/auris/lib/python3.9/site-packages/torch/fx/experimental/optimization.py_parent_name%   s    r#   )patternnodemodulesc                 C   s   t |jdkrdS |jd |f}t| |D ]d\}}t|tjsD dS |jdkrT dS t|jtsf dS |j|vrv dS t	||j |ur* dS q*dS )Nr   Fcall_moduleT)
lenargszip
isinstancefxNodeopr   strtype)r$   r%   r&   nodesexpected_typeZcurrent_noder!   r!   r"   r   /   s    

r   )r%   r&   
new_modulec                 C   s<   t | jtsJ t| j\}}||| j< t|| || d S N)r+   r   r/   r#   setattr)r%   r&   r3   parent_namer    r!   r!   r"   r   C   s    
r   F)modelr   c                 C   s4  t jt jft jt jft jt jft jt jfg}|s:t	| } |rLt
| tjjsXt| }n| }t| }t	|j}|D ]}|jD ]}t|||rt|jd jdkrq||jd j }	||j }
|
jsq|d t jt jt jfv rt|	|
}n
t|	|
}t|jd || ||jd  || qqxt||S )z
    Fuses convolution/BN and linear/BN layers for inference purposes.
    Will deepcopy your model by default, but can modify the model inplace as well.
    r   r   )nnZConv1dZBatchNorm1dConv2dBatchNorm2dZConv3dZBatchNorm3dLinearcopydeepcopyr+   torchr,   GraphModulesymbolic_tracedictnamed_modulesgraphr1   r   r(   r)   usersr   Ztrack_running_statsr   r   r   replace_all_uses_with
erase_node)r7   ZinplaceZno_tracepatternsfx_modelr&   	new_graphr$   r%   Zfirst_layerZbnZfused_layerr!   r!   r"   r   L   s8    







r   c                 C   s*   t | }G dd dtj j}|| S )z5
    Removes all dropout layers from the module.
    c                       s8   e Zd Zeeedf eeef ed fddZ	  Z
S )z&remove_dropout.<locals>.DropoutRemover.)r   r)   kwargsr   c                    s>   t | j| tjr*t|dks"J |d S t |||S d S )Nr   r   )r+   Z
submodulesr8   ZDropoutr(   superr'   )selfr   r)   rJ   	__class__r!   r"   r'   {   s    z2remove_dropout.<locals>.DropoutRemover.call_module)__name__
__module____qualname__r	   tupler   rA   r/   r   r'   __classcell__r!   r!   rM   r"   DropoutRemoverz   s   rT   )r,   r@   r>   ZTransformerZ	transform)r7   rH   rT   r!   r!   r"   r   t   s    

r   )orig_moduler1   inputsoutputsc                    s|   t  }i  |D ]}||j}| |< q|D ] }|| fdd}| |< q.| fdd|D  |  t | |S )z
    Given lists of nodes from an existing graph that represent a subgraph, returns a submodule that executes that subgraph.
    c                    s    |  S r4   r!   )xenvr!   r"   <lambda>       z"extract_subgraph.<locals>.<lambda>c                    s   g | ]} | qS r!   r!   ).0outputrY   r!   r"   
<listcomp>   r\   z$extract_subgraph.<locals>.<listcomp>)r,   Graphplaceholderr    Z	node_copyr^   lintr?   )rU   r1   rV   rW   rI   inputnew_noder%   r!   rY   r"   r      s    	

r   c                 C   s
   t | S r4   )	th_mkldnnZMkldnnBatchNorm)a_r!   r!   r"   r[      r\   r[   )r1   r&   c                 C   s   i }| D ]r}|j dkrt|jts&J ||j }t|tv rtt| |tj}t|tj	s`J t
|||< t||| q|S )z
    For each node, if it's a module that can be preconverted into MKLDNN,
    then we do so and create a mapping to allow us to convert from the MKLDNN
    version of the module to the original.
    r'   )r.   r+   r   r/   r0   
mkldnn_mapr>   floatr8   Moduler<   r=   r   )r1   r&   old_modulesr%   
cur_moduler3   r!   r!   r"   r      s    

r   )r1   r&   rk   c                 C   sJ   | D ]@}|j dkrt|jts"J ||j }||v rt||||  qdS )za
    Maps each module that's been changed with `modules_to_mkldnn` back to its
    original.
    r'   N)r.   r+   r   r/   r   )r1   r&   rk   r%   rl   r!   r!   r"   r      s    	

r   c                   @   s   e Zd ZejdddZdS )r   fx_graphc                 C   s   || _ g | _g | _g | _d S r4   )rn   r1   start_nodes	end_nodes)rL   rn   r!   r!   r"   __init__   s    zMklSubgraph.__init__N)rO   rP   rQ   r,   r`   rq   r!   r!   r!   r"   r      s   r   
   r   c                    s(   ddt td fdd}|S )aW  
    This generates a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by running it with the example_inputs.

    Example usage:
        heuristic = gen_mkl_autotuner(example_inputs, iters=10)
        fast_model = optimization.optimize_for_inference(model, heuristic)
    NrC   r   c                    s   | j }d u r,| jj| jjt dd |D  tttj	 dd | j
D }t| j||fdd}| fdd}tjjt  | fdd}||k S )	Nc                 S   s   g | ]}t |jqS r!   )r>   Zrandnshaper]   r%   r!   r!   r"   r_      r\   z@gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<listcomp>c                 S   s   g | ]}|j d  qS )r   )r)   ru   r!   r!   r"   r_      r\   c                    s<   t D ]
}|   qt }t  D ]
}|   q$t | S r4   )rangetime)frg   begin)iterswarmupr!   r"   	benchmark   s    z?gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.benchmarkc                      s   dd dd  D  D S )Nc                 S   s   g | ]}|  qS r!   )to_denser]   ir!   r!   r"   r_     s   zRgen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>.<locals>.<listcomp>c                 S   s   g | ]}|  qS r!   )	to_mkldnnr~   r!   r!   r"   r_     r\   r!   r!   Zsample_inputs	submoduler!   r"   r[     s   z>gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>c                      s     S r4   r!   r!   r   r!   r"   r[     r\   )ro   rn   Zowning_modulerk   r
   	propagater   listr,   r-   rp   r   r1   r   rC   rA   rB   )rC   Zinput_nodesZoutput_argsr|   Zmkl_timeZno_mkl_timeexample_inputsrH   rz   rk   r{   r   r"   use_mkl_heuristic   s"    z,gen_mkl_autotuner.<locals>.use_mkl_heuristic)r   bool)r   rz   r{   r   r!   r   r"   r      s    	r   rs   c                 C   s   t | jdkS )z
    This is a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by checking if there
    are more than 2 nodes in it
       )r(   r1   )rC   r!   r!   r"   r     s    r   c                   @   sB   e Zd Zdd ZedddZeedddZeed	d
dZdS )r   c                 C   s   d g| | _ dg| | _d S )Nr   r   size)rL   nr!   r!   r"   rq     s    zUnionFind.__init__)vc                 C   s   || j |< d| j|< d S )Nr   r   )rL   r   r!   r!   r"   make_set   s    
zUnionFind.make_set)r   r   c                 C   sB   | j | }||kr|S |d us"J | || j |< tt| j | S r4   )r   findr   int)rL   r   parr!   r!   r"   r   $  s    
zUnionFind.find)rf   bc                 C   sf   |  ||  | }}||kr"|S | j| | j| k r@|| }}|| j|< | j|  | j| 7  < d S r4   )r   r   r   )rL   rf   r   r!   r!   r"   join,  s    

zUnionFind.joinN)rO   rP   rQ   rq   r   r   r   r   r!   r!   r!   r"   r     s   r   )r7   pass_configtracerr   c              	      sR  dddt id}|du ri }|| |d r6t| } |d rFt| } |d du rV| S t|d tsltd	d|d vrtd
|d d }| }|t	|  t
|j  t|  }G dd dt}t jD ]}|j}	|jdkrV||j }
t|
tv r|j}	t|
 d}|dur|jtjks:J d|jtdksJ dn2|jdkr|jtv rv|j}	n|jtv r|j}	|	|jkr|	|jkrtdd |j D sqֈ !|& t
"|j  fdd}W d   n1 s0    Y  t#t$t
j%j& ||_  '|2  (dd|f}|)| |f|_ W d   q1 sP0    Y  qt*t j|}| _+ jD ]}|jdkrx|jdkrx|j d }t|j,}|D ]2}|jdkr|jdkr|)|  -| qt.|j,dkrx -| qxt. j}t/|fddt0 jD ]\}}|jdkrX|jdkrX||_12| n|jdkr|jdkr|j d dusJ |j d |_3ntfdd|j4D }t.|dkrq&tdd |D rJ t5|}|d |_6|dd D ]}7|d | qq&t8 fd d} jD ]r}t9|d!rP|:|j6 j;| t9|d"rt|:|j1 j<;| t9|d#r(|:|j3 j=;| q(|> D ]P}||s|j<|j= D ]$}|j d }|)|  -| qt?|j|| qd} jD ]&}|jdks|jdkr |d7 }q t@AtBCd$|  D  t
|  }|S )%a  
    Performs a set of optimization passes to optimize a model for the
    purposes of inference. Specifically, the passes that are run are:
    1. Conv/BN fusion
    2. Dropout removal
    3. MKL layout optimizations

    The third optimization takes a function `use_mkl_heuristic` that's used
    to determine whether a subgraph should be explicitly run in MKL layout.

    Note: As FX does not currently handle aliasing, this pass currently
    assumes nothing aliases. If that isn't true, use at your own risk.
    T	heuristic)conv_bn_fuser   mkldnn_layout_optimizeNr   r   r   Fz+mkldnn_layout_optimize config is not a dictz4Heuristic not found in mkldnn_layout_optimize configc                   @   s   e Zd ZdZdZdZdS )z*optimize_for_inference.<locals>.MklSupportr   r      N)rO   rP   rQ   NOYESUNKNOWNr!   r!   r!   r"   
MklSupportb  s   r   r'   z)this pass is only for torch.float modulescpuz!this pass is only for CPU modulescall_functionc                 s   s   | ]}|j d kV  qdS )r}   N)r   )r]   argr!   r!   r"   	<genexpr>  r\   z)optimize_for_inference.<locals>.<genexpr>c                    s     d| fS )Nr   )call_methodr   rm   r!   r"   r[     r\   z(optimize_for_inference.<locals>.<lambda>r   r}   r   r   c                    s0   t | dr | jS t | dr, | jS d S )Ncolorstart_color)hasattrr   r   r   r   )ufr!   r"   	get_color  s
    

z)optimize_for_inference.<locals>.get_colorc                    s,   g | ]$}t |tjr |d ur |qS r4   )r+   r,   r-   r~   )r   r!   r"   r_     s   z*optimize_for_inference.<locals>.<listcomp>c                 s   s   | ]}|d u V  qd S r4   r!   r~   r!   r!   r"   r     r\   r   c                      s   t  S r4   )r   r!   rm   r!   r"   r[     r\   r   r   	end_colorzmkldnn conversions: %s)Er   updater   r   r+   rA   RuntimeErrortracer<   r=   r,   r?   rootrB   r   r   r1   r   r.   r   r0   mkldnn_supportedr   next
parametersZdtyper>   ri   Zdevicemkldnn_supported_unknownr   anyr)   Zinserting_beforeZmap_argr   rR   r%   r   Zinserting_afterZcreate_noderE   r   rk   rD   rF   r(   r   	enumerater   r   r   Zall_input_nodessortedr   r   r   r   r   appendro   rp   valuesr   logging	getLoggerrO   inforb   )r7   r   r   Zdefault_pass_configr   Z
cur_tracerr&   r   r%   Zsupports_mkldnnrl   Zsample_parameterZmkldnn_argsZdense_xrk   Zprv_noderD   userZ	num_nodesZcur_idxZ
cur_colorsZother_colorZmkldnn_graphsrC   ZprvZmkldnn_conversionsresultr!   )rn   r   r   r"   r   6  s    
	


$
*











r   )FF)rr   r   )Lr<   r   operatorrw   collectionsr   collections.abcr   enumr   typingr   r   r   r>   Ztorch.fxr,   Ztorch.nnr8   Ztorch.nn.functionalZ
functionalFZtorch.utils.mkldnnutilsZmkldnnre   Ztorch.fx.noder   r	   Ztorch.fx.passes.shape_propr
   Ztorch.nn.utils.fusionr   r   __all__r/   rR   r#   r0   r-   rA   r   rj   r   r   r   r   r   r9   r;   r:   ZReLUZ	MaxPool2dZ	AvgPool2dZAdaptiveAvgPool2dZreluZ	transposeZsigmoidZ
avg_pool2dZadaptive_avg_pool2dr   addmulr   ZMkldnnConv2dZMkldnnLinearrh   r   r   r   r   r   r   r   ZTracerr   r!   r!   r!   r"   <module>   s   	(
 
.	