o
    Zhi                     @  s  d dl mZ d dlZd dlZd dlmZmZmZ d dlZd dl	m
Z
 d dlZd dlmZ d dlmZ ddlmZmZ dd	lmZmZmZ d
dlmZmZmZmZmZmZ d
dlm Z m!Z!m"Z" er|d dlm#Z# ddl$m%Z%m&Z& ddl'm(Z(m)Z) d
dlm*Z* ej+dej,dej-dej.dej/dej0dej1dej2dej3di	Z4d,ddZ5G d d! d!eZ6G d"d# d#eZ7e78d$ G d%d& d&e!Z9ej:d-d(d)Z;G d*d+ d+e"Z<dS ).    )annotationsN)AnyOptionalTYPE_CHECKING)
PRECEDENCE)ExprPrinter)ValueRanges   )get_bounds_index_exprget_kernel_metadata)ops
OpsWrapperV   )CSEVariableDeferredLineDTYPE_TO_COMPUTATION_DTYPEIndentedBufferOpOverridesPythonPrinter)IterationRangesEntry
SIMDKernelSIMDScheduling)Union)ReductionType	StoreMode)	SchedulerSchedulerNode)OpVarTboolcharZshortintlongZucharfloathalfZbfloatval)Union[float, int, bool, str, CSEVariable]returnstrc                 C  sZ   t | tr| tjkrdS | tj krdS | | krdS t| S t | tr)| r'dS dS t| S )NZ	HUGE_VALFz
-HUGE_VALFZNANtruefalse)
isinstancer#   torchinfr(   r   )r%    r.   J/var/www/auris/lib/python3.10/site-packages/torch/_inductor/codegen/mps.pyvalue_to_metal2   s   


r0   c                   @  s\   e Zd ZdddZdddZdd	d
ZdddZdddZdddZdddZ	dddZ
dS )MetalExprPrinterexpr
sympy.Exprr'   r(   c                 C  sH   |j \}}| |}| |}|jrd| d| dS d| d| dS )N() / ()metal::floor(argsdoprint
is_integer)selfr2   xdivr.   r.   r/   _print_FloorDivA   s   


z MetalExprPrinter._print_FloorDivc                 C  sp   |j \}}}| |}|dkr*| |}|jr!d| d| d}n	d| d| d}| |}d| d| dS )Nr   r4   r5   r6   r7   z) % (r8   )r<   r2   r=   r>   modr.   r.   r/   _print_ModularIndexingI   s   


z'MetalExprPrinter._print_ModularIndexingc                 C  2   t |jdkrtdddt| j|j dS )Nr	   z$metal::min only supported for 2 argszmetal::min(, r6   lenr9   RuntimeErrorjoinmap_printr<   r2   r.   r.   r/   
_print_MinU      zMetalExprPrinter._print_Minc                 C  rB   )Nr	   z$metal::max only supported for 2 argszmetal::max(rC   r6   rD   rJ   r.   r.   r/   
_print_MaxZ   rL   zMetalExprPrinter._print_Maxc                 C  *   t |jdks	J d| |jd  dS )Nr   metal::abs(r   r6   rE   r9   rI   rJ   r.   r.   r/   
_print_Abs_      zMetalExprPrinter._print_Absc                 C  rN   )Nr   zstatic_cast<long>(metal::rint(r   z))rP   rJ   r.   r.   r/   _print_RoundToIntc   rR   z"MetalExprPrinter._print_RoundToIntc                 C  sh   t |jdks	J |j\}}|jr|dk sJ td| d| |td }d| d| d|  d	S )
Nr	   r   zOFor integer inputs, only non-negative ndigits are currently supported, but got .ZMulz!static_cast<float>(metal::rint(1e * z) * 1er6   )rE   r9   r;   
ValueErrorZparenthesizer   )r<   r2   numberndigitsZ
number_strr.   r.   r/   _print_RoundDecimalg   s   

z$MetalExprPrinter._print_RoundDecimalc                 C  s(   |j \}}d| | d| | dS )Nstatic_cast<float>(z) / static_cast<float>(r6   )r9   rI   )r<   r2   lhsrhsr.   r.   r/   _print_IntTrueDivs   s   
z"MetalExprPrinter._print_IntTrueDivN)r2   r3   r'   r(   )__name__
__module____qualname__r?   rA   rK   rM   rQ   rS   rY   r]   r.   r.   r.   r/   r1   @   s    






r1   c                   @  s  e Zd Ze		ddddZedddZedddZedddZedddZedd"d#Z	edd$d%Z
edd&d'Zedd(d)Zedd*d+Zedd,d-Zedd.d/Zedd0d1Zedd2d3Zedd4d5Zedd6d7Zedd8d9Zedd:d;Zedd<d=Zedd>d?Zedd@dAZeddBdCZeddDdEZeddFdGZeddHdIZeddKdLZeddMdNZeddOdPZeddQdRZ eddSdTZ!eddUdVZ"eddWdXZ#eddYdZZ$edd[d\Z%edd]d^Z&edd_d`Z'eddadbZ(eddcddZ)eddedfZ*eddgdhZ+eddidjZ,eddkdlZ-eddodpZ.eddqdrZ/eddudvZ0eddwdxZ1eddydzZ2edd{d|Z3edd}d~Z4edddZ5edddZ6dS )MetalOverridesNTr=   r   dtypetorch.dtype	src_dtypeOptional[torch.dtype]use_compute_typesr   r'   r(   c                 C     dt |  d|  dS )Nzstatic_cast<>(r6   DTYPE_TO_METAL)r=   rb   rd   rf   r.   r.   r/   to_dtypez   s   zMetalOverrides.to_dtypec                 C  rg   )Nz*reinterpret_cast<thread z*>(&r6   ri   )r=   rb   rd   r.   r.   r/   to_dtype_bitcast   s   zMetalOverrides.to_dtype_bitcastr%   Union[bool, float, int]c                 C  s   t | S Nr0   )r%   rb   r.   r.   r/   constant   s   zMetalOverrides.constantr2   r3   c                 C  s<   t jt j| }t jjjt jj|t| d}t	||S )N)bounds)
r   kernelindex_to_strprepare_indexingcsegeneratecomputer
   r   rk   )r2   rb   Zidx_strvarr.   r.   r/   
index_expr   s
   zMetalOverrides.index_exprmaskbodyotherc                 C  sR   t j| |}| }W d    n1 sw   Y  |jjr"t|}t|||S rn   )r   rr   Z
mask_loadsrq   Zis_boolr   r   where)rz   r{   r|   Znew_maskresultr.   r.   r/   masked   s   zMetalOverrides.maskedar   bcc                 C  s   |  d| dt | S )Nz ? z : ro   )r   r   r   r.   r.   r/   r}      s   zMetalOverrides.wherec                 C  s   t |tr|jd ur|jjs|  d| S t | tr&| jtjkr&d|  dn| }t |tr9|jtjkr9d| dn|}| d| d| d| dS )N % rZ   r6   z - z * metal::floor( / )r+   r   rb   Zis_floating_pointr,   r#   r   r   Zfloat_aZfloat_br.   r.   r/   	remainder   s&   
zMetalOverrides.remainderc                 C  B   d|  d| d|  d}d|  d| d| d}d| d| dS )Nstatic_cast<decltype(+)>(r6   zc10::metal::max(rC   r.   r   r   Z
typecast_aZ
typecast_br.   r.   r/   maximum      zMetalOverrides.maximumc                 C  r   )Nr   r   r   r6   zc10::metal::min(rC   r.   r   r.   r.   r/   minimum   r   zMetalOverrides.minimumc                 C     |  d| S )Nz || r.   r   r   r.   r.   r/   
logical_or      zMetalOverrides.logical_orc                 C  r   )Nz && r.   r   r.   r.   r/   logical_and   r   zMetalOverrides.logical_andc                 C     d|  dS )Nzmetal::isnan(r6   r.   r=   r.   r.   r/   isnan      zMetalOverrides.isnanc                 C  r   )Nzmetal::isinf(r6   r.   r   r.   r.   r/   isinf   r   zMetalOverrides.isinfc                 C  r   )Nzmetal::log(r6   r.   r   r.   r.   r/   log   r   zMetalOverrides.logc                 C  r   )Nzmetal::exp(r6   r.   r   r.   r.   r/   exp   r   zMetalOverrides.expc                 C  r   )NrO   r6   r.   r   r.   r.   r/   abs   r   zMetalOverrides.absc                 C  r   )Nzmetal::signbit(r6   r.   r   r.   r.   r/   signbit   r   zMetalOverrides.signbitc                 C  r   )Nzmetal::precise::sin(r6   r.   r   r.   r.   r/   sin   r   zMetalOverrides.sinc                 C  r   )Nzc10::metal::sinc(r6   r.   r   r.   r.   r/   sinc   r   zMetalOverrides.sincc                 C  r   )Nzmetal::precise::cos(r6   r.   r   r.   r.   r/   cos   r   zMetalOverrides.cosc                 C  r   )Nzc10::metal::i0(r6   r.   r   r.   r.   r/   i0   r   zMetalOverrides.i0c                 C  r   )Nzc10::metal::i1(r6   r.   r   r.   r.   r/   i1   r   zMetalOverrides.i1c                 C  r   )Nzc10::metal::erf(r6   r.   r   r.   r.   r/   erf   r   zMetalOverrides.erfc                 C  r   )Nzc10::metal::erfinv(r6   r.   r   r.   r.   r/   erfinv   r   zMetalOverrides.erfinvc                 C  r   )Nzc10::metal::log_gamma(r6   r.   r   r.   r.   r/   lgamma  r   zMetalOverrides.lgammayc                 C     d|  d| dS )Nzc10::metal::polygamma(rC   r6   r.   )r=   r   r.   r.   r/   	polygamma     zMetalOverrides.polygammac                 C  r   )Nzc10::metal::digamma(r6   r.   r   r.   r.   r/   digamma	  r   zMetalOverrides.digammac                 C  r   )Nzmetal::tan(r6   r.   r   r.   r.   r/   tan  r   zMetalOverrides.tanc                 C  r   )Nzmetal::asin(r6   r.   r   r.   r.   r/   asin  r   zMetalOverrides.asinc                 C  r   )Nzmetal::acos(r6   r.   r   r.   r.   r/   acos  r   zMetalOverrides.acosc                 C  r   )Nzmetal::atan(r6   r.   r   r.   r.   r/   atan  r   zMetalOverrides.atanc                 C  r   )Nzmetal::sqrt(r6   r.   r   r.   r.   r/   sqrt  r   zMetalOverrides.sqrtc                 C  r   )Nzmetal::rsqrt(r6   r.   r   r.   r.   r/   rsqrt!  r   zMetalOverrides.rsqrtc                 C  r   )Nzmetal::tanh(r6   r.   r   r.   r.   r/   tanh%  r   zMetalOverrides.tanhc                 C  r   )Nzmetal::atanh(r6   r.   r   r.   r.   r/   atanh)  r   zMetalOverrides.atanhc                 C  sF   |  d| }|  d| }d|  d| d| d| d| d| d	S )
Nr   r   z((z
 < 0) != (z	 < 0) ? (z != 0 ? z - 1 : z) : r6   r.   )r   r   quotremr.   r.   r/   floordiv-  s   *zMetalOverrides.floordivc                 C  r   )Nr7   r6   r.   r   r.   r.   r/   floor4  r   zMetalOverrides.floorc                 C  r   )Nzmetal::sign(r6   r.   r   r.   r.   r/   sign8  r   zMetalOverrides.signc                 C  r   )Nr   r   r   r6   zmetal::fmod(rC   r.   r   r.   r.   r/   fmod<  r   zMetalOverrides.fmodc                 C  r   )Nmetal::trunc(r6   r.   r   r.   r.   r/   truncB  r   zMetalOverrides.truncc                 C  sJ   | j tjkrd|  dn| }|j tjkrd| dn|}d| d| dS )NrZ   r6   r   /)rb   r,   r#   r   r.   r.   r/   truncdivF  s   zMetalOverrides.truncdivc                 C  r   )Nzmetal::ceil(r6   r.   r   r.   r.   r/   ceilN  r   zMetalOverrides.ceilseedoffsetc                 C  r   )Nzc10::metal::rand(rC   r6   r.   r   r   r.   r.   r/   randR  r   zMetalOverrides.randc                 C  r   )Nzc10::metal::randn(rC   r6   r.   r   r.   r.   r/   randnV  r   zMetalOverrides.randnlowhighc              	   C  s   d|  d| d| d| d	S )Nzc10::metal::randint64(rC   r6   r.   )r   r   r   r   r.   r.   r/   	randint64Z  s   zMetalOverrides.randint64c                 C  r   )Nzmetal::round(r6   r.   r   r.   r.   r/   round`  r   zMetalOverrides.roundc                 C  r   )Nr   r   r   r6   zmetal::pow(rC   r.   )r   r   Zcast_aZcast_br.   r.   r/   powd  r   zMetalOverrides.powc                 C  r   )Nzc10::metal::zeta(rC   r6   r.   r   r.   r.   r/   zetaj  r   zMetalOverrides.zetac                 C  r   )Nz c10::metal::spherical_bessel_j0(r6   r.   r   r.   r.   r/   spherical_bessel_j0n  r   z"MetalOverrides.spherical_bessel_j0c                 C  r   )Nzc10::metal::xlog1py(r6   r.   r   r.   r.   r/   xlog1pyr  r   zMetalOverrides.xlog1pyc                 C  r   )Nzc10::metal::entr(r6   r.   r   r.   r.   r/   entrv  r   zMetalOverrides.entr)NT)
r=   r   rb   rc   rd   re   rf   r   r'   r(   )r=   r   rb   rc   rd   rc   r'   r(   )r%   rm   rb   rc   r'   r(   )r2   r3   rb   rc   r'   r(   )rz   r   r{   r3   r|   r   r'   r(   )r   r   r   r   r   r   r'   r(   )r   r   r   r   r'   r(   )r   r   r   r   r'   r(   )r=   r   r'   r(   )r=   r   r   r   r'   r(   )r   r   r   r   r'   r(   )
r   r   r   r   r   r   r   r   r'   r(   )7r^   r_   r`   staticmethodrk   rl   rp   ry   r   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r.   r.   r.   r/   ra   y   s    
ra   Zmpsc                      s   e Zd ZeZdZdZdZe j	Z
e j	ZeZd< fd
dZd=ddZd>ddZ	d?d@ddZde fdAd!d"ZdBd'd(ZdCd+d,ZdDd-d.Zd?dEd0d1Zd?dFd3d4ZdGd:d;Z  ZS )HMetalKernel;auto i   tilingdict[str, sympy.Expr]kwargsr   r'   Nonec                   s(   t  j|fi | t | _d| _d S )NF)super__init__	itertoolscountacc_var_idsmultistage_reduction)r<   r   r   	__class__r.   r/   r     s   

zMetalKernel.__init__rb   rc   r(   c                 C  s   t | S rn   ri   )r<   rb   r.   r.   r/   dtype_to_str  s   zMetalKernel.dtype_to_strnameindexr3   r   c                 C  sH   | j |}| |}| d| | d}| jj| j|tj	|dS )z"Codegen a load from an InputBuffer[]rb   )
r9   inputrt   rs   ru   rv   loadsr   graph	get_dtype)r<   r   r   rx   liner.   r.   r/   load  s   
zMetalKernel.loadNvaluemoder   c                 C  s|   | j |}| |}| tj|}| d| | d| d| d}| jr3| j	
t|| d S | j
t|| d S )Nr   z] = static_cast<rh   );)r9   outputrt   r   r   r   r   rs   inside_reductionrw   	writeliner   stores)r<   r   r   r   r   rx   	dtype_strr   r.   r.   r/   store  s   
"zMetalKernel.store
elem_countOptional[int]rq   ValueRanges[Any]c              	   C  sr   dt | j }tj|||}|r'| jd| | d| d| d |S | jd| | d| d |S )NZtmp_acc_zthreadgroup  r   z];r   )nextr   r   rr   Zcreate_cse_varindexing_coder   r   )r<   rb   r   rq   var_namerx   r.   r.   r/   _new_accvar  s   zMetalKernel._new_accvarrd   reduction_typer   +Union[CSEVariable, tuple[CSEVariable, ...]]c                 C  s  t dd | jD }t|j| j}|dkr>| |}| j| d | jd | j	d| d| d | j
d |S |d	v r| ||}| jry|d
krQdnd\}	}
| j| d|j d|	 d | j	| d|j d|
 d| d n| j	| d|j d| d | jj| j
d| d| d| dt| dS |dv r| ||}| d|j d}t| }| js| j	| d| d| d | jj| j
d| d| d| d|dS |drdnd}| j| d| d| d  |d!r\t d"d | j D }| tj|}|d#krd$nd%}| d|j d}| j	| d& | j	d| d'| d'| d| d(| d)| d(|j d* | jj| j
| d+| d| d| d,|dS | j| d-| d| d| d | jj| j
d| d| d| d|dS |d.kr| jrJ d/| | ||}| j	| d|j d| d | j| jd| d| d| d}t| d0| d1| jjfS t|)2zCodegen a reduction operationc                 s      | ]}|j r|V  qd S rn   is_reduction.0tr.   r.   r/   	<genexpr>  s    z(MetalKernel.reduction.<locals>.<genexpr>anyz	 = false;z7threadgroup_barrier(metal::mem_flags::mem_threadgroup);z
                if (z) {
                    z' = true;
                }
            )prodsumr  )r   r   )r   *r   z] = r   z] z= zc10::metal::threadgroup_r4   rC   r6   r   )maxminZargminargmaxr   z = static_cast<rh   r   r  Zlowestz = ::metal::numeric_limits<z>::z();argc                 s  r   rn   r   r   r.   r.   r/   r     s    
r  ><z = -1;r    = z;
                    z$;
                }
                z[c10::metal::threadgroup_z)]z = ::c10::metal::Zwelford_reducez+Multistage reduction not yet supported for z.xz.y)r   Zrange_treesr  numelmax_threadgroup_sizer   r   r   rw   splicer   r   r   ru   rv   r   rj   endswith
startswithZrange_tree_nodesvaluesr,   r"   r   Z_unwrapfeaturesZreduction_numelNotImplementedError)r<   rb   rd   r   r   Zreduction_dimZacc_buf_sizeaccZacc_bufZdefault_valZreduction_opZacc_thread_varZsrc_metal_typeZlim_fnZidx_varZidx_acc_bufcmp_opZidx_thread_varZwf_resr.   r.   r/   	reduction  s   
 



 zMetalKernel.reductionentryr   c                 C  sD  |  |j}| |}|jr|jj| jk| _|jr| js/| j	| j
 d|j d| d d S |jj| j d | j }| j	d|j d|j d| d|j d		 | j B | j	| j
 d|j d| d
| d|j d
 || j |jjkr| j	d|j d|jj d W d    d S W d    d S 1 sw   Y  d S )Nr   r  r   r   z	for(auto z
_cnt = 0; z_cnt < z; ++z_cnt) {rU   z + z_cnt;if ( >= z) break;)Zrename_indexingr2   sexprr   rootr  r  r   r   r   Zindex_dtyper   r{   indent)r<   r  ry   Z	index_strZ	loop_sizer.   r.   r/   codegen_iteration_ranges_entry!  s0   
"&""z*MetalKernel.codegen_iteration_ranges_entryc                 C  s   | j r0| j  | j| j | j| j W d   n1 s!w   Y  | jd d| _ n| j| j | j| j | j| j | j  | j  | j  dS )a  
        Concat output code from index_code, loads, compute, stores,
        suffix into self.body.

        For pointwise kernels, this is called just once at the end.

        For reduction kernels, this generates a loop over the reduction
        axis.
        N}F)	r   r{   r  r  r   rw   r   r   clearr<   r.   r.   r/   codegen_body<  s   


zMetalKernel.codegen_bodyOptional[str]c                 C  s  |    t }|d |  }|  |jddd | jr%|d |d |  | jj	 D ]\}}|| j
v r?q5| tj|}|d| d| d	 q5| jj	 D ]\}}| tj|}|d
| d| d	 q[| jj	 D ]\}}|d| d	 q{t|dk sJ dt|dkrdt| nd}t|dkr|d jnd}| jrd	nd}	|| d| d|	  | jr|| d W d   n1 sw   Y  |d | 5 t|dkrt|D ]\}
}|d|j dtd|
  d q|| j || j W d   n	1 sw   Y  |d W d   n	1 s4w   Y  |d | S )z3Called at the end to generate a final kernel stringzcompile_mps_shader("""z
            #include <c10/metal/random.h>
            #include <c10/metal/special_math.h>
            #include <c10/metal/utils.h>
            T)stripz&#include <c10/metal/reduction_utils.h>zkernel void generated_kernel(zdevice z* ,z	constant zconstant long&    z%Up to 3 index variables are supportedr   Zuintr   Z
thread_pos r   z [[thread_position_in_grid]]z- group_pos [[thread_position_in_threadgroup]]Nz) {r   z = thread_pos.x   r   r  z"""))r!  r   r   active_range_treesr  r  r   r9   output_buffersitemsremoved_buffersr   r   r   r   input_bufferssizevarsrE   r   	enumeratechrr   r{   getvalue)r<   r   codeZidx_varsouterinnerr   Zthread_pos_dtypeZthread_pos_var_nameZthread_pos_suffixidxrx   r.   r.   r/   codegen_kernelT  sh   







0zMetalKernel.codegen_kernelnodec                   s   t jj}g  jj  jj } fdd|D }|dd  jj D 7 }t 	 dkrG fdd 	 D }|dd
| dg7 } jra fd	d 	 D }|d
d
| dg7 }|j||tddd dS )zCodegen a call to this kernelc                   s   g | ]	}| j vr|qS r.   )r+  )r   r  r   r.   r/   
<listcomp>  s    z+MetalKernel.call_kernel.<locals>.<listcomp>c                 S  s   g | ]}t |qS r.   )r(   r   vr.   r.   r/   r7    s    r   c                   s.   g | ]}  |jrt|j jn|jqS r.   )pexprr   sympyMinr  r  r8  r   r.   r/   r7    s    z	threads=[rC   r   c                   s,   g | ]}|j r t|j jnd qS )1)r   r:  r;  r<  r  r  r8  r   r.   r/   r7    s    zgroup_size=[cpuF)deviceZtritonN)r   r   wrapper_coder9   r)  keysr,  r-  rE   r(  rG   r   Zgenerate_kernel_callr,   r?  )r<   r   r6  wrapperr9   threadsr.   r   r/   call_kernel  s(   


zMetalKernel.call_kernelr2   sizelowerr   upperc           	      C  s   |s|sd S |  |}|r| dnd}|r | d|  | nd}|r0|r0d| d| d}nd| | d}| jj| j|d	d
 d S )Nz < 0r&  r  zif ((z) && (z	)) returnr  z) returnF)Z
assignment)rs   ru   rv   rw   )	r<   r2   rE  rF  rG  Zexpr_strZ
lower_exprZ
upper_exprr   r.   r.   r/   check_bounds  s   
zMetalKernel.check_bounds)r   r   r   r   r'   r   )rb   rc   r'   r(   )r   r(   r   r3   r'   r   rn   )
r   r(   r   r3   r   r   r   r   r'   r   )rb   rc   r   r   rq   r   r'   r   )
rb   rc   rd   rc   r   r   r   r   r'   r   )r  r   r'   r   r'   r   )r   r"  r'   r(   )r   r(   r6  r   r'   r   )
r2   r3   rE  r3   rF  r   rG  r   r'   r   )r^   r_   r`   ra   Z	overridessuffixZnewvar_prefixr  r   r:   r:  r1   r  Zkexprr   r   r   r   r   unknownr   r  r  r!  r5  rD  rH  __classcell__r.   r.   r   r/   r   ~  s,    
	


i
:"r   r   c                  C  s   dd l } | jddd d S )Nr   ztorch.compile for Metal is an early protoype and might not work as expected. For details see https://github.com/pytorch/pytorch/issues/150121r	   )
stacklevel)warningswarn)rN  r.   r.   r/   _warn_prototype  s
   
rP  c                      s,   e Zd ZeZd fddZdddZ  ZS )MetalScheduling	schedulerOptional[Scheduler]r'   r   c                   s6   t  | t  tjj}|d ur|jd d S d S )NzDfrom torch._inductor.runtime.runtime_utils import compile_mps_shader)r   r   rP  r   r   r@  headerr  )r<   rR  rB  r   r.   r/   r     s   zMetalScheduling.__init__src_coder(   node_schedulelist[SchedulerNode]rr   r   c           
      C  sp   t jj}||jv r|j| }|S d|  }| d}||j|< t||\}}| d| }	||||	 |S )NZmps_lib_z.generated_kernel
)r   r   r@  Zsrc_to_kernelZnext_kernel_suffixr   define_kernel)
r<   rU  rV  rr   rB  Zkernel_nameZmps_lib_nameZoriginsZdetailed_originsZmetadata_commentr.   r.   r/   rY    s   



zMetalScheduling.define_kernel)rR  rS  r'   r   )rU  r(   rV  rW  rr   r   r'   r(   )r^   r_   r`   r   Zkernel_typer   rY  rL  r.   r.   r   r/   rQ    s    	rQ  )r%   r&   r'   r(   rI  )=
__future__r   	functoolsr   typingr   r   r   r;  Zsympy.printing.precedencer   r,   Ztorch.utils._sympy.printersr   ZExprPrinter_Ztorch.utils._sympy.value_rangesr   utilsr
   r   Zvirtualizedr   r   r   commonr   r   r   r   r   r   Zsimdr   r   r   r   Zops_handlerr   r   rR  r   r   r   r   Zint8Zint16Zint32Zint64Zuint8r#   r$   Zbfloat16rj   r0   r1   ra   Z_initialize_pointwise_overridesr   cacherP  rQ  r.   r.   r.   r/   <module>   sP    
9  
  E
