o
    ZhF\                     @  s  d dl mZ d dlZd dlZd dlZd dlZd dlmZmZ d dl	m
Z
mZmZmZmZmZ d dlZd dlZd dlmZ d dlmZmZ d dlmZ dd	lmZmZ dd
lmZ ddlm Z m!Z!m"Z" ddl#m$Z$m%Z%m&Z&m'Z' ddl(m)Z)m*Z* er~d dl+m,Z, edZ-G dd dej.j/Z0G dd deZ1G dd deZ2G dd deZ3G dd dZ4G dd dZ5G dd de Z6G dd de"Z7dS )     )annotationsN)autoEnum)AnyCallable
NamedTupleOptionalTYPE_CHECKINGTypeVar)identity)Scope
TracerBase)SymT   )configdependencies)index_prevent_reordering)DefaultHandler
OpsHandlerWrapperHandler)cache_on_selfreduction_num_outputssympy_index_symbol_with_prefix
sympy_subs)opsV)SequenceTc                      sL   e Zd Zeeddd Z fddZd fd
dZ fddZ	  Z
S )InterpreterShimNc                   C  s   t jtS N)torchfxZsymbolic_tracer    r"   r"   H/var/www/auris/lib/python3.10/site-packages/torch/_inductor/loop_body.py	_dummy_gm&   s   zInterpreterShim._dummy_gmc                   s>   t  j|  dd | | _|| _|| _d| _|j| _d | _	d S )NF)Zgarbage_collect_values)
super__init__r$   modulegraph
submodulesZextra_traceback__getitem__Z
fetch_attrcurrent_nodeselfr(   r)   	__class__r"   r#   r&   +   s   
zInterpreterShim.__init__ntorch.fx.Nodereturnr   c                   s   || _ t |S r   )r+   r%   run_node)r-   r0   r.   r"   r#   r3   6   s   zInterpreterShim.run_nodec                   s@   t |  t j|i |W  d    S 1 sw   Y  d S r   )r   Zset_interpreter_handlerr%   run)r-   argskwargsr.   r"   r#   r4   :   s   $zInterpreterShim.run)r0   r1   r2   r   )__name__
__module____qualname__staticmethod	functools	lru_cacher$   r&   r3   r4   __classcell__r"   r"   r.   r#   r   %   s    r   c                      s   e Zd Z fddZ  ZS )LightTracerc                   s8   t    tjj| jd| _tdd | _i | _	i | _
d S )N)Z
tracer_cls )r%   r&   r    r!   ZGraphr/   r(   r   scopeZmodule_stackZnode_name_to_scoper-   r.   r"   r#   r&   A   s
   

zLightTracer.__init__)r7   r8   r9   r&   r=   r"   r"   r.   r#   r>   @   s    r>   c                   @  s&   e Zd ZU ded< ded< ded< dS )MemoryEntrystr
index_nameOptional[str]buffer_namemodeN)r7   r8   r9   __annotations__r"   r"   r"   r#   rB   I   s   
 rB   c                   @  s6   e Zd Ze Ze Ze Ze Ze Ze Z	e Z
dS )MemoryUsageTypeN)r7   r8   r9   r   LOAD	LOAD_SEEDSTORESTORE_REDUCTION
INDEX_EXPRCHECK_BOUNDS	BUCKETIZEr"   r"   r"   r#   rI   O   s    
rI   c                      sL  e Zd ZU dZded< ded< ded< ded	< d
ed< ded< ded< ded< ded<  fddZdd ZdSddZdTddZdUd d!Z	dUd"d#Z
ed$d% Zed&d' Zed(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 ZdVd5d6ZeZ	7	7dWdXd?d@ZdAdB ZdCdD ZdEdF ZdGdH ZdIdJ ZdKdL ZdMdN ZdOdP ZdQdR Z   Z!S )YLoopBodyz
    Captures the body of a Loops subclass into an FX graph.  Persists any
    indexing simplifications and makes it easier to analyze loop bodies.
    zdict[str, sympy.Expr]indexing_exprszdict[sympy.Expr, str]indexing_exprs_namedict[str, Any]r)   zdict[str, LoopBodyBlock]	subblockszlist[sympy.Symbol]indirect_varszdict[sympy.Symbol, sympy.Expr]indirect_var_rangesLoopBodyBlock
root_blockz(dict[MemoryUsageType, list[MemoryEntry]]memory_usagecollections.Counter[str]	op_countsc                   sx   t    t| }|d t| |t|d  f| _|| _|| _|| _t	|t
r1| || n| || d | _d S r   )r%   r&   tuplevalueslensizes	iter_varsreduce_vars
var_ranges
isinstancerQ   _init_with_copy_init_with_tracingindexing)r-   fnr5   rc   ra   rb   Z_flat_sizesr.   r"   r#   r&   j   s   


zLoopBody.__init__c                 C  sZ   i | _ i | _d| ji| _i | _g | _i | _dd tD | _t	
 | _t| ||| _| `dS )z9Do an FX trace of an arbitrary callable to construct self	get_indexc                 S  s   i | ]}|g qS r"   r"   ).0tr"   r"   r#   
<dictcomp>   s    z/LoopBody._init_with_tracing.<locals>.<dictcomp>N)rR   rS   ri   r)   rU   rV   rW   rI   rZ   collectionsCounterr\   rX   rY   )r-   rh   r5   r"   r"   r#   rf   ~   s   
zLoopBody._init_with_tracingotherc                   s   | |} fdd| D  _ fdd|j D  _|j _|j _|j _|j _|j	  _i |j
}|d d ji fdd| D  _
dS )z
        _init_with_tracing() is slow, so this is a fast path in the case
        where we are just reordering/merging/splitting the args of an
        existing LoopBody.
        c                   s$   i | ]\}}|t jj| jqS r"   )r   r(   sizevarssimplify_with_rangesrc   rj   nameexprrA   r"   r#   rl      s    z,LoopBody._init_with_copy.<locals>.<dictcomp>c                      i | ]
\}}||  qS r"   clonerj   kvrA   r"   r#   rl          ri   c                   ru   r"   rv   rx   rA   r"   r#   rl      r{   N)indexing_from_argsitemsrR   rU   rV   rW   rZ   r\   rY   rw   r)   popri   )r-   ro   r5   rR   r)   r"   rA   r#   re      s    




zLoopBody._init_with_copyrs   rC   c                 C  s   | j |ddkS )Nr   )r\   getr-   rs   r"   r"   r#   has_op   s   zLoopBody.has_opr2   c                 C  s   | }| j }|j\}}|\}}g |j }tjj||t|||\}}	}
tjj||t|||\}}}
t	j
||dd\\}}}t||	|||g|||}t	j
||dd\\}}}t|||f|||}|S )zU
        Merge both iteration and reduction loops and return a new LoopBody.
        rk   )prefixp)r`   varsrR   r^   r   r(   rp   Z_simplify_loopsr   r   index_vars_no_squeezerQ   )r-   old_body	old_sizesZold_iter_varsZold_reduce_varsZold_iter_sizesZold_reduce_sizesZindex_exprsZ
iter_sizesZiter_reindex_Zreduce_sizesZreduce_reindexra   rb   rc   new_body
iter_vars2reduce_vars2var_ranges2Z	new_body2r"   r"   r#   merge_loops   sF   





zLoopBody.merge_loopsc                   s   ddl m} | | j}t|d t|ksJ ||}|\|}|f}tj|ddi\\}}}	dd t|D   fdd	tt|D  d fdd}
t|
||f|	||}tj|ddi\\}}}t|||f|||}
|
S )zD
        Reorder iteration loops and return a new LoopBody.
        r   )same_reorderr   r   rk   c                 S  s   i | ]\}}||qS r"   r"   )rj   abr"   r"   r#   rl      s    z/LoopBody.reorder_iter_loops.<locals>.<dictcomp>c                      g | ]} | qS r"   r"   rj   i)inverse_orderr"   r#   
<listcomp>       z/LoopBody.reorder_iter_loops.<locals>.<listcomp>indicesSequence[sympy.Expr]r2   r   c                    sh   g t j| }t|tt ksJ |d t  |td  } fddD   |S )Nc                   r   r"   r"   r   Ziter_idxr"   r#   r      r   zALoopBody.reorder_iter_loops.<locals>.new_body.<locals>.<listcomp>)	itertoolschainfrom_iterabler_   )r   indexZ
reduce_idxr   Z	iter_sizer   Zreduce_sizer   r#   r      s   
z-LoopBody.reorder_iter_loops.<locals>.new_bodyr   N)r   r   r2   r   )	Zirr   r`   r_   r   r   	enumeraterangerQ   )r-   Z	new_orderr   r   Z
reorder_fnZnew_iter_sizeZ	new_sizesra   rb   rc   r   Z	loop_bodyr   r   r   r"   r   r#   reorder_iter_loops   s8   zLoopBody.reorder_iter_loopsc                 C  s(   | j d usJ | jd usJ | j | jfS r   )ra   rb   rA   r"   r"   r#   r     s   zLoopBody.varsc                 C  s0   t | jjfdd | j D }dd |D S )Nc                 s  s    | ]}|j V  qd S r   )r(   )rj   blockr"   r"   r#   	<genexpr>  s    z%LoopBody.get_nodes.<locals>.<genexpr>c                 S  s   g | ]
}|j D ]}|qqS r"   )nodes)rj   r(   noder"   r"   r#   r     r{   z&LoopBody.get_nodes.<locals>.<listcomp>)r   r   rY   r(   rU   r^   )r-   Z
all_graphsr"   r"   r#   	get_nodes  s
   zLoopBody.get_nodesc                 C  s   ddl m} || S )Nr   )	BoundVars)boundsr   )r-   r   r"   r"   r#   r     s   zLoopBody.boundsc                 C  s8   t | jtj D ]}|j|kr| j|j   S qt|r   )reversedrZ   rI   rJ   rF   rR   rD   KeyErrorr-   rF   entryr"   r"   r#   get_read_expr%  s
   
zLoopBody.get_read_exprc                 C  sD   t | jtj | jtj D ]}|j|kr| j|j   S qt	|r   )
r   r   rZ   rI   rL   rM   rF   rR   rD   r   r   r"   r"   r#   get_write_expr,  s   


zLoopBody.get_write_exprc                   s    fdd j tj D S )Nc                      g | ]} j |j qS r"   rR   rD   rj   r   rA   r"   r#   r   6      
z+LoopBody.get_read_exprs.<locals>.<listcomp>)rZ   rI   rJ   rA   r"   rA   r#   get_read_exprs5  s   

zLoopBody.get_read_exprsc                   s*    fddt  jtj  jtj D S )Nc                   r   r"   r   r   rA   r"   r#   r   <  r   z,LoopBody.get_write_exprs.<locals>.<listcomp>)r   r   rZ   rI   rL   rM   rA   r"   rA   r#   get_write_exprs;  s   


zLoopBody.get_write_exprsc                 C  s`   dt | j g}|dd | j D  |dd td| jfg| j D  d	|S )Nzvar_ranges = c                 S  s   g | ]\}}| d | qS )z = r"   )rj   rs   valr"   r"   r#   r   F  s    z&LoopBody.debug_str.<locals>.<listcomp>c                 S  s   g | ]	\}}| |qS r"   )	debug_str)rj   rs   r   r"   r"   r#   r   H  s    body
)
dictrc   extendrR   r}   r   r   rY   rU   join)r-   linesr"   r"   r#   r   D  s   
zLoopBody.debug_strboolc                 C  sB   t | jtj dko t | jtj dko t | jdko | jdS )zx
        True of this contains only a single loads and store.
        Note, this could involve a layout change.
        r   )loadstore)r_   rZ   rI   rJ   rL   r)   rY   contains_only_opsrA   r"   r"   r#   is_memory_copyQ  s   
zLoopBody.is_memory_copyNrt   
sympy.ExprmtyperI   rF   rE   rG   c                 C  sP   | j |}|sdt| j }|| j |< || j|< | j| t||| |S )Nr   )rS   r   r_   rR   rZ   appendrB   )r-   rt   r   rF   rG   rs   r"   r"   r#   add_index_expr_  s   

zLoopBody.add_index_exprc                 C  s<   |d   r|| jvr|}n	| t| j }|| j|< |S )zaNot actually for nn.Modules, but subblocks in generated code are mapped to FX call_module opcodes)	isnumericr)   r_   )r-   r   r   rs   r"   r"   r#   add_submodulen  s
   
zLoopBody.add_submodulec                 C  s:   t tjt| j}|| jvsJ | j| || j|< |S r   )r   r   ZINDIRECTr_   rV   rW   r   )r-   sizevarr"   r"   r#   add_indirectw  s
   
zLoopBody.add_indirectc                   sB   t t  kr
dS | jdusJ  fdd| j D | _dS )z,Swap in a variable used in indirect indexingNc                   s    i | ]\}}|t | iqS r"   r   rx   newoldr"   r#   rl     s     z-LoopBody.replace_indirect.<locals>.<dictcomp>)rC   rg   r}   )r-   r   r   r"   r   r#   replace_indirect~  s    zLoopBody.replace_indirectc                 C  s   | j d usJ | j | S r   )rg   r   r"   r"   r#   ri     s   
zLoopBody.get_indexc                   s   g t j|}t|tjksJ |jftfdd|D s.J djd|ttj |  fddj	
 D S )Nc                 3  s    | ]}| j vV  qd S r   )rc   )rj   rz   rA   r"   r#   r     s    z.LoopBody.indexing_from_args.<locals>.<genexpr>zself.var_ranges=z
, indices=c                   s   i | ]
\}}|t | qS r"   r   rr   )replacementsr"   r#   rl     s    
z/LoopBody.indexing_from_args.<locals>.<dictcomp>)r   r   r   r_   rc   allr   zipkeysrR   r}   )r-   r   r   r"   )r   r-   r#   r|     s    
zLoopBody.indexing_from_argsc                 G  s   |  || _|  }d | _|S r   )r|   rg   rY   )r-   r   resultr"   r"   r#   __call__  s   zLoopBody.__call__c                   s0    fdd}t jtj d|_|S )Nc              	     s    tj|   d S r   )r   r   r   indirect_indexing)Znew_varcheckr-   r   r   wrap_negr"   r#   set_indirect  s   z5LoopBody.bind_set_indirect_shim.<locals>.set_indirect)r   r   r   r   )r;   partialrQ   bind_set_indirect_shimrw   )r-   r   r   r   r   r   r"   r   r#   r     s   zLoopBody.bind_set_indirect_shimc                   s"    fdd}t jtj d|_|S )Nc                   s   t j|  |S r   )r   r   scan)dtypesr^   
combine_fnr"   r#   shim  s   z%LoopBody.bind_scan_shim.<locals>.shimr   )r;   r   rQ   bind_scan_shimrw   )r-   r   r   r"   r   r#   r     s   zLoopBody.bind_scan_shimc                   s$    fdd}t jtj d|_|S )Nc                   s   t j| j  |S r   )r   r   maskedrU   )maskro   rs   r-   r"   r#   r        z'LoopBody.bind_masked_shim.<locals>.shim)rs   )r;   r   rQ   bind_masked_shimrw   )r-   rs   r   r"   r   r#   r     s   zLoopBody.bind_masked_shim)ro   rQ   )rs   rC   )r2   rQ   r2   r   NN)rt   r   r   rI   rF   rE   rG   rE   )"r7   r8   r9   __doc__rH   r&   rf   re   r   r   r   propertyr   r   r   r   r   r   r   r   r   r   __repr__r   r   r   r   ri   r|   r   r   r   r   r=   r"   r"   r.   r#   rQ   Z   sT   
 



;.


		
	rQ   c                   @  s@   e Zd ZdZddd	Zd
d ZdddZdddZdddZdS )rX   a  
    Captures the body of a Loops subclass into an FX graph.
    In normal cases there will be a 1:1 mapping between LoopBody and
    LoopBodyBlock, hower in the case of ops.masked() the masked out
    operations will manifest as an extra LoopBodyBlock.
    r   rQ   rh   Callable[..., Any]r5   	list[Any]c                 C  s   || _ t }|dddi }ddlm} tt||||j}tj	r+||| j j
| j j}t| t||  W d    n1 sBw   Y  |j| _d S )Nplaceholderr   r"   r   )IndexPropagation)r   r>   create_proxyZindex_propagationr   CountOpsCaptureIndexingr\   r   Zconstant_and_index_propagationrc   rW   r   Zset_ops_handlerr   outputr(   )r-   r   rh   r5   tracerZ	proxy_opsr   handlerr"   r"   r#   r&     s    
zLoopBodyBlock.__init__c                 C  s"   | j }| jj}t||t S r   )r(   r   r)   r   r4   r   Zget_ops_handlerr,   r"   r"   r#   r     s   zLoopBodyBlock.__call__r   c              
   C  s8   t j| jj| jj}tdd|	 
dd| dS )Nz;[^\n]*r?   zdef forward(zdef ()r    r!   ZGraphModuler   r)   r(   coderesubstripreplace)r-   rs   r   r"   r"   r#   r     s   zLoopBodyBlock.debug_strr2   r   c                   s    t  fdd| jjddD S )Nc                 3  s    | ]}|j  v V  qd S r   )target)rj   r   allowed_opsr"   r#   r     s
    
z2LoopBodyBlock.contains_only_ops.<locals>.<genexpr>Zcall_method)op)r   r(   Z
find_nodes)r-   r   r"   r   r#   r     s   zLoopBodyBlock.contains_only_opsc                 C  s(   t t }|ji | jd|i |S )z'Shallow copy with a new parent LoopBodyr   )rX   __new____dict__update)r-   r   copyr"   r"   r#   rw     s   
zLoopBodyBlock.cloneN)r   rQ   rh   r   r5   r   )r   r   )r   rQ   )	r7   r8   r9   r   r&   r   r   r   rw   r"   r"   r"   r#   rX     s    


	rX   c                   @  s    e Zd ZdddZdddZdS )r   innerOpsHandler[Any]countsr[   c                 C  s   || _ || _d S r   )_inner_counts)r-   r  r  r"   r"   r#   r&     s   
zCountOps.__init__rs   rC   r5   tuple[Any, ...]r6   rT   r2   r   c                 C  s(   | j |  d7  < t| j||i |S )Nr   )r
  getattrr	  )r-   rs   r5   r6   r"   r"   r#   _default  s   zCountOps._defaultN)r  r  r  r[   )rs   rC   r5   r  r6   rT   r2   r   )r7   r8   r9   r&   r  r"   r"   r"   r#   r     s    
r   c                      s   e Zd Zd ZdG fddZdHddZdIddZdJddZdKddZdLddZ	dd  Z
d!d" Zd#d$ Zd%d& Z		dMdNd4d5ZdOd8d9ZdPd<d=Zd>d? Zd@dA ZdQdCdDZdEdF Z  ZS )Rr   r  r  r   rQ   r   r>   c                   s   t  | || _|| _d S r   )r%   r&   r   r   )r-   r  r   r   r.   r"   r#   r&     s   
zCaptureIndexing.__init__rt   r   r   rI   r6   r   c                 K  s&   | j dd| jj||fi |fi S )Ncall_moduleri   )r   r   r   r   )r-   rt   r   r6   r"   r"   r#   
_add_index  s   zCaptureIndexing._add_indexr2   c                 C  s   t jj|| jjS r   )r   r(   rp   rq   r   rc   )r-   rt   r"   r"   r#   	_simplify  s   zCaptureIndexing._simplifyrs   rC   r   c                 C  s*   |  |}| j|tj|d}| j||S NrF   )r  r  rI   rJ   r	  r   r-   rs   r   r"   r"   r#   r     s   
zCaptureIndexing.loadintc                 C  s6   t |tsJ | jjt|tj|d | j	||S r  )
rd   r  r   r   sympyIntegerrI   rK   r	  	load_seedr  r"   r"   r#   r    s
   zCaptureIndexing.load_seedNc                 C  s0   |  |}| j|tj||d}| j||||S )N)rF   rG   )r  r  rI   rL   r	  r   )r-   rs   r   valuerG   r"   r"   r#   r   "  s
   

zCaptureIndexing.storec                 C  s,   |  |}| j|tj|d}| j|||S r  )r  r  rI   rM   r	  store_reduction)r-   rs   r   r  r"   r"   r#   r  )  s
   
zCaptureIndexing.store_reductionc                   s@   | j |||| t|}|dkrt fddt|D S  S )Nr   c                 3      | ]} | V  qd S r   r"   r   r   r"   r#   r   4      z,CaptureIndexing.reduction.<locals>.<genexpr>)r	  	reductionr   r]   r   )r-   dtypeZ	src_dtypeZreduction_typer  Znum_outputsr"   r  r#   r  0  s
   zCaptureIndexing.reductionc                 C  sH   |  |}t|ttjfr| jt||S | |tj	}| j
||S r   )r  rd   r  r  r  r	  Zconstantr  rI   rN   
index_expr)r-   r   r  r"   r"   r#   r  7  s
   
zCaptureIndexing.index_exprc                 C  s8   |  |}| |tj}| |tj}| j||||S r   )r  r  rI   rO   r	  check_bounds)r-   r   r   lowerupperr"   r"   r#   r   >  s   
zCaptureIndexing.check_boundsr^   r   
boundaries.tuple[str, sympy.Expr, sympy.Expr, sympy.Expr]boundary_indicesindexing_dtypetorch.dtyperightr   sorter Optional[tuple[str, sympy.Expr]]sorter_indicesOptional[T]c              	   C  s   |d | j |d tj|d d| j |d tj|d d| j |d tj|d df}|dur>|d | j |d tj|d df}| j|||||||S )z3
        See [Note: Inductor bucketize op]
        r   r   r        N)r  rI   rP   r	  	bucketize)r-   r^   r#  r%  r&  r(  r)  r+  r"   r"   r#   r/  D  s@   zCaptureIndexing.bucketizemasked_bodyr   c                 C  sN   | j dd}| j || j j|< t| j |g | j j|< | jd|||fi S )zR
        Recursively capture the masked out body in another LoopBodyBlock
        NZmasked_subblockr  )r   r   r   r)   rX   rU   r   r   )r-   Z
mask_proxyr0  Zother_proxyrs   r"   r"   r#   r   u  s   zCaptureIndexing.maskedr   =Callable[[tuple[Any, ...], tuple[Any, ...]], tuple[Any, ...]]c                   sN   | j |}| j |d}| jd|||fi  t fddtt|D S )Nr   r  c                 3  r  r   r"   r   r  r"   r#   r     r  z'CaptureIndexing.scan.<locals>.<genexpr>)r   r   r   r   r   r]   r   r_   )r-   Zdtype_proxyr   value_proxyr   rs   r"   r  r#   r     s   zCaptureIndexing.scanc                   s0   | j |||| t fddtt|D S )Nc                 3  r  r   r"   r   r  r"   r#   r     r  z'CaptureIndexing.sort.<locals>.<genexpr>)r	  sortr]   r   r_   )r-   r   r^   ZstableZ
descendingr"   r  r#   r3    s   zCaptureIndexing.sortc                 C  s   | j |}|d |d fS )Nr   r   )r	  frexp)r-   r2  r   r"   r"   r#   r4    s   zCaptureIndexing.frexpTc                 C  sF   | j |}| j ||||}| jd| j |d| |fi  |S )zx
        Flow data from tensors into indexing formulas.
        Introduce a call_module to update the indexing.
        r  set_)r   r   r   r   r   r   )r-   Zindex_proxyr   r   r   r   r   r"   r"   r#   r     s   z!CaptureIndexing.indirect_indexingc                 G  s   | j dd|i  d S )Nr   )r   r   )r-   r   r"   r"   r#   r     r   zCaptureIndexing.output)r  r  r   rQ   r   r>   )rt   r   r   rI   r6   r   )rt   r   r2   r   )rs   rC   r   r   )rs   rC   r   r  r   r   )r^   r   r#  r$  r%  r   r&  r'  r(  r   r)  r*  r+  r,  r2   r   )r0  r   )r   r1  )TT)r7   r8   r9   rs   r&   r  r  r   r  r   r  r  r  r   r/  r   r   r3  r4  r   r   r=   r"   r"   r.   r#   r     s*    






1

r   )8
__future__r   rm   r;   r   r   enumr   r   typingr   r   r   r   r	   r
   r  Ztorch.fxr    Ztorch._dynamo.utilsr   Ztorch.fx.proxyr   r   Ztorch.utils._sympy.symbolr   r?   r   r   Zcodegen.commonr   Zops_handlerr   r   r   utilsr   r   r   r   Zvirtualizedr   r   collections.abcr   r   r!   ZInterpreterr   r>   rB   rI   rQ   rX   r   r   r"   r"   r"   r#   <module>   s<    	  a;
