a
    kh                     @  sD  d dl mZ d dlZd dlZd dlZd dlmZmZmZ d dl	Z	d dl
Zd dlZd dlmZ d dlZd dlmZ d dlmZ d dlmZ ddlmZ d d	lmZ d
dlmZ d
dlmZ eG dd dZG dd dZ G dd dZ!eddG dd dZ"dd Z#dd Z$dd Z%dd Z&dd  Z'ej(e&ej)gd!Z*ej(e&ej+gd!Z,ej(e'ej-gd!Z.G d"d# d#Z/G d$d% d%Z0d&d' Z1d(d) Z2d*d+ Z3G d,d- d-Z4G d.d/ d/e4Z5G d0d1 d1e4Z6d2d3 Z7d4d5 Z8d6d7 Z9d8d9 Z:d:d; Z;e0 Z<ee<Z=d<d= Z>d>d? Z?G d@dA dAZ@G dBdC dCejAZBG dDdE dEZCG dFdG dGZDdS )H    )annotationsN)TupleListDict)	dataclass)TritonSemantic)TensorDescriptor   )InterpreterError)partial   )interpreter)irc                   @  sV   e Zd ZU dZded< ded< ejedZded< d	d
 Z	dd Z
dd Zdd ZdS )TensorHandlez
        data: numpy array
        dtype: triton type, either pointer_type or scalar_type.
        we don't store block_type here because the shape information is already available in the data field
        attr: a dictionary of attributes
    znp.arraydataztl.dtypedtype)default_factoryr   attrc                 C  s   t | j S N)boolr   allself r   H/var/www/auris/lib/python3.9/site-packages/triton/runtime/interpreter.py__bool__#   s    zTensorHandle.__bool__c                 C  s   | j }t|dr|j}q|S )N
element_ty)r   hasattrr   )r   r   r   r   r   get_element_ty&   s    
zTensorHandle.get_element_tyc                 C  s   t | j | jS r   )r   r   copyr   r   r   r   r   clone,   s    zTensorHandle.clonec                 C  s   || j |< d S r   )r   )r   keyvaluer   r   r   set_attr/   s    zTensorHandle.set_attrN)__name__
__module____qualname____doc____annotations__dataclassesfielddictr   r   r   r    r#   r   r   r   r   r      s   
r   c                   @  s   e Zd Zdd Zdd ZdS )BlockPointerHandlec                 C  s(   || _ || _|| _|| _|| _|| _d S r   )baseshapestridesoffsetsblock_shapeorder)r   r-   r.   r/   r0   r1   r2   r   r   r   __init__5   s    zBlockPointerHandle.__init__c           	      C  s   | j  }|jd }t| j j| j}tj| jtd}t	t
| jD ]}dgt
| j }| j| ||< | j| jt| j|  |}||| | j| j tj }||v rD||| j| jk @ |dk@ }qDt|| j jj}||fS )N   r   r	   r   )r-   r   primitive_bitwidthnpbroadcast_tor   r1   onesr   rangelenr0   arangereshaper/   astypeuint64r.   r   r   scalar)	r   boundary_checkdtype_ttZn_bytesptrsmasksdim
bcast_dimsoffr   r   r   materialize_pointers=   s    

" z'BlockPointerHandle.materialize_pointersN)r$   r%   r&   r3   rH   r   r   r   r   r,   3   s   r,   c                   @  s6   e Zd ZdddddddZdd Zdd	d
dZdS )TensorDescHandler   List[TensorHandle]	List[int]r-   r.   r/   r1   c                 C  s&   || _ t|| _|| _|| _|| _d S r   )r-   r;   ndimr.   r/   r1   )r   r-   r.   r/   r1   r   r   r   r3   O   s
    
zTensorDescHandle.__init__c                 C  s   | j j d dksJ dt| j| jks0J t| j| jksDJ | jd d D ]}|j d dksRJ dqR| jd j dksJ dd S )N   r   zbase must be 16-byte alignedzstride must be 16-byte alignedr	   zlast dim must be contiguous)r-   r   itemr;   r/   rM   r1   )r   strider   r   r   validateW   s    zTensorDescHandle.validate)r0   c           	      C  s  t || jksJ | jjj}|jd }|d j| d dksDJ dt| jj| j	}tj
| j	td}tt | j	D ]~}dgt | j	 }| j	| ||< || jt| j	|  |}||| | j| j tj }|d|k@ || j| jk @ }qt|jtjksJ t|| jjj}||fS )Nr4   rO   rN   r   z*block offset start must be 16-byte alignedr5   r	   )r;   rM   r-   r   r   r6   r   r7   r8   r1   r9   r   r:   r<   r=   r/   r>   r?   r.   r   r@   )	r   r0   Z	scalar_tyitemsizerC   rD   rE   rF   rG   r   r   r   rH   `   s    

  z%TensorDescHandle.materialize_pointersN)r$   r%   r&   r3   rR   rH   r   r   r   r   rI   M   s   	rI   T)frozenc                   @  s   e Zd ZU dZded< dZded< dZded< dZd	ed
< dZded< dZ	ded< dZ
d	ed< dZded< dZded< dZd	ed< dS )InterpreterOptionsNr+   extern_libsFr   debugTsanitize_overflowstrarch)fp8e5Zfp8e5b16fp8e4nvZfp8e4b8fp8e4b15z
Tuple[str]supported_fp8_dtypesr   !deprecated_fp8_dot_operand_dtypestf32default_dot_input_precision)r`   Ztf32x3Zieeeallowed_dot_input_precisionsr   intmax_num_imprecise_acc_defaultr   backend_name)r$   r%   r&   rV   r(   rW   rX   rZ   r^   r_   ra   rb   rd   re   r   r   r   r   rU   s   s   
rU   c                 C  sD   | t jkrt jS | t jkr t jS | t jkr0t jS | t jkr@t jS | S r   )	r7   uint8int8uint16int16uint32int32r?   int64r5   r   r   r   _get_signed_np_dtype   s    



rm   c              &   C  sR  t | tjrttjS tjtttjttjtj	ttj	tj
ttj
tjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttji}t | tjrJt | jtjr@ttjS || j S ||  S r   )
isinstancetlpointer_typer7   r   r?   int1r   float16float32float64rg   rf   ri   rh   rk   rj   rl   bfloat16float8e5float8e5b16
float8e4nv
float8e4b8float8e4b15
block_typer   )Ztt_dtypeZnp_typesr   r   r   _get_np_dtype   s4    
r|   c                 C  s  t td|j }t td|j }tj|  |d}||jd ? d@ }|j|j d }|j|j d }	|d|j> d @ }
|j}|j}||j? d|> d @ tj}|dk}t	|r@tj
|tjd}t|jD ]"}|
|? d@ }|j| ||dk< q|
dk}d||  ||< || |||@ < |
| || > d|j> d @ |
|< tdt|| | d|	> d }||}||}|j|jkr|
|j|j ? d|j> d @ }|tjjkr|
d|j|j d > @ }||dk }||}n$|
||j|j > d|j> d @ }|dk}t	|r||j? d|> d @ tj}|dk}||@ }tj
|tjd}d| || |  ||< || || ? d|j||  > B ||< ||jd > ||j> B |B }|| jS )Nuintr5   r	   r   )getattrr7   r6   Z
frombuffertobytesZfp_mantissa_widthZexponent_biasr>   rk   any
zeros_liker:   maximumminimum_irZROUNDING_MODEZRTNEr=   r.   )inputZinput_dtypeZoutput_dtyperounding_modeZinput_uint_dtypeZoutput_unint_dtypeZ	input_binsignZinput_exponent_widthZoutput_exponent_widthZsignificandZ
bias_inputZbias_outputexponentZsubnormal_indexZbit_posiZ	bit_indexZzero_significand_indexZexponent_outputZsign_outputZsignificand_outputcut_offZnon_zero_exponent_indexshiftoutputr   r   r   _convert_float   sl    $


r   c                 C  s
   t | S r   )matherfxr   r   r   _erf   s    r   c                 C  s   t | t | d? S )N@   )rc   )abr   r   r   
_umulhi_64   s    r   )Zotypesc                   @  s   e Zd Zedd ZdS )ExtraFunctionsc                 C  s   t |j| j|||S r   )ro   tensorbuildercreate_fp_to_fphandle)r   Zdst_tyZfp_downcast_rounding	_semanticr   r   r   _convert_custom_types   s    z$ExtraFunctions._convert_custom_typesN)r$   r%   r&   staticmethodr   r   r   r   r   r      s   r   c                   @  s  e Zd Zejjejjejjejjejjejjejj	ejj	iZ
ejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejji
ZddddZdd Zdd Zd	d
 Zdd Zdd Zdd Zdd Zdd Z dd Z!dd Z"dd Z#dd Z$dd Z%dd  Z&d!d" Z'd#d$ Z(d%d& Z)d'd( Z*d)d* Z+d+d, Z,d-d. Z-d/d0 Z.d1d2 Z/d3d4 Z0d5d6 Z1d7d8 Z2d9d: Z3d;d< Z4d=d> Z5d?d@ Z6dAdB Z7dCdD Z8dEdF Z9dGdH Z:dIdJ Z;dKdL Z<dMdN Z=dOdP Z>dQdR Z?dSdT Z@dUdV ZAdWdX ZBdYdZ ZCd[dZ ZDd\dZ ZEd]dZ ZFd^dZ ZGd_dZ ZHd`dZ ZIdadb ZJdcdd ZKdedf ZLdgdZ ZMdhdZ ZNdidZ ZOdjdZ ZPdkdZ ZQdldZ ZRdmdZ ZSdndZ ZTdodZ ZUdpdZ ZVdqdZ ZWdrdZ ZXdsdZ ZYdtdZ ZZdudZ Z[dvdZ Z\dwdZ Z]dxdZ Z^dydZ Z_dzdZ Z`d{dZ Zad|dZ Zbd}dZ Zcd~dZ ZdddZ ZeddZ ZfddZ ZgddZ ZhddZ ZiddZ ZjddZ ZkddZ ZlddZ ZmddZ ZnddZ ZoddZ ZpddZ ZqddZ ZrddZ ZsddZ ZtddZ ZuddZ ZvddZ ZwddZ ZxddZ ZyddZ ZzddZ Z{ddZ Z|eKZ}eKZ~dd Zdd Zdd Zdd ZddZ ZddZ Zdd Zdd Zdd ZddZ ZddZ ZddZ ZddZ ZddZ ZddZ ZddZ ZddZ ZddZ ZddZ ZddZ Zdd Zdd ZddZ Zdd Zdd Zdd Zdd Zdd Zdd ZddĄ ZddƄ ZddȄ Zddʄ Zdd̄ Zdd΄ ZddЄ Zdd҄ ZddԄ Zddք Zdd؄ Zddڄ Zdd܄ Zddބ Zdd Zdd Zdd Zdd ZddddddddZdddddZddddddZddddddZdddddddZdd ZdS )InterpreterBuilderNonereturnc                 C  s2   d | _ t | _i | _tj| jd< dd | jd< d S )NZconvert_custom_typesc                 S  s   dS )N)r	   r	   r	   r   )ZlhsTypeZrhsTyper   r   r   <lambda>      z-InterpreterBuilder.__init__.<locals>.<lambda>Zmin_dot_size)rZ   rU   optionsZcodegen_fnsr   r   r   r   r   r   r3     s
    zInterpreterBuilder.__init__c                 C  sR   || j d k std|| j d k s,td|| j d k sBtd|||f| _d S )Nr   zx >= grid_dim[0]r	   zy >= grid_dim[1]r   zz >= grid_dim[2])grid_dim
ValueErrorgrid_idxr   r   yzr   r   r   set_grid_idx   s    zInterpreterBuilder.set_grid_idxc                 C  s   |||f| _ d S r   )r   )r   ZnxnyZnzr   r   r   set_grid_dim)  s    zInterpreterBuilder.set_grid_dimc                 C  s   t jS r   )ro   rr   r   r   r   r   get_half_ty.  s    zInterpreterBuilder.get_half_tyc                 C  s   t jS r   )ro   ru   r   r   r   r   get_bf16_ty1  s    zInterpreterBuilder.get_bf16_tyc                 C  s   t jS r   )ro   rs   r   r   r   r   get_float_ty4  s    zInterpreterBuilder.get_float_tyc                 C  s   t jS r   )ro   rt   r   r   r   r   get_double_ty7  s    z InterpreterBuilder.get_double_tyc                 C  s   t jS r   )ro   rq   r   r   r   r   get_int1_ty:  s    zInterpreterBuilder.get_int1_tyc                 C  s   t jS r   )ro   rg   r   r   r   r   get_int8_ty=  s    zInterpreterBuilder.get_int8_tyc                 C  s   t jS r   )ro   rf   r   r   r   r   get_uint8_ty@  s    zInterpreterBuilder.get_uint8_tyc                 C  s   t jS r   )ro   ri   r   r   r   r   get_int16_tyC  s    zInterpreterBuilder.get_int16_tyc                 C  s   t jS r   )ro   rh   r   r   r   r   get_uint16_tyF  s    z InterpreterBuilder.get_uint16_tyc                 C  s   t jS r   )ro   rk   r   r   r   r   get_int32_tyI  s    zInterpreterBuilder.get_int32_tyc                 C  s   t jS r   )ro   rj   r   r   r   r   get_uint32_tyL  s    z InterpreterBuilder.get_uint32_tyc                 C  s   t jS r   )ro   rl   r   r   r   r   get_int64_tyO  s    zInterpreterBuilder.get_int64_tyc                 C  s   t jS r   )ro   r?   r   r   r   r   get_uint64_tyR  s    z InterpreterBuilder.get_uint64_tyc                 C  s   t jS r   )ro   rx   r   r   r   r   get_fp8e4nv_tyU  s    z!InterpreterBuilder.get_fp8e4nv_tyc                 C  s   t jS r   )ro   rz   r   r   r   r   get_fp8e4b15_tyX  s    z"InterpreterBuilder.get_fp8e4b15_tyc                 C  s   t jS r   )ro   ry   r   r   r   r   get_fp8e4b8_ty[  s    z!InterpreterBuilder.get_fp8e4b8_tyc                 C  s   t jS r   )ro   rv   r   r   r   r   get_fp8e5_ty^  s    zInterpreterBuilder.get_fp8e5_tyc                 C  s   t jS r   )ro   rw   r   r   r   r   get_fp8e5b16_tya  s    z"InterpreterBuilder.get_fp8e5b16_tyc                 C  s   t ||S r   )ro   rp   )r   Zelt_tyZ
addr_spacer   r   r   
get_ptr_tyd  s    zInterpreterBuilder.get_ptr_tyc                 C  s   t ||S r   )ro   r{   )r   r   r.   r   r   r   get_block_tyg  s    zInterpreterBuilder.get_block_tyc                 C  s   t tj|gtjdtjS Nr5   )r   r7   arraybool_ro   rq   r   r"   r   r   r   get_int1j  s    zInterpreterBuilder.get_int1c                 C  s   t tj|gtjdtjS r   )r   r7   r   rf   ro   r   r   r   r   	get_uint8m  s    zInterpreterBuilder.get_uint8c                 C  s   t tj|gtjdtjS r   )r   r7   r   rg   ro   r   r   r   r   get_int8p  s    zInterpreterBuilder.get_int8c                 C  s   t tj|gtjdtjS r   )r   r7   r   rh   ro   r   r   r   r   
get_uint16s  s    zInterpreterBuilder.get_uint16c                 C  s   t tj|gtjdtjS r   )r   r7   r   ri   ro   r   r   r   r   	get_int16v  s    zInterpreterBuilder.get_int16c                 C  s   t tj|gtjdtjS r   )r   r7   r   rj   ro   r   r   r   r   
get_uint32y  s    zInterpreterBuilder.get_uint32c                 C  s   t tj|gtjdtjS r   )r   r7   r   rk   ro   r   r   r   r   	get_int32|  s    zInterpreterBuilder.get_int32c                 C  s   t tj|gtjdtjS r   )r   r7   r   r?   ro   r   r   r   r   
get_uint64  s    zInterpreterBuilder.get_uint64c                 C  s   t tj|gtjdtjS r   )r   r7   r   rl   ro   r   r   r   r   	get_int64  s    zInterpreterBuilder.get_int64c                 C  s   t tj|gtjdtjS r   )r   r7   r   rr   ro   r   r   r   r   get_fp16  s    zInterpreterBuilder.get_fp16c                 C  s   t tj|gtjdtjS r   )r   r7   r   rs   ro   r   r   r   r   get_fp32  s    zInterpreterBuilder.get_fp32c                 C  s   t tj|gtjdtjS r   )r   r7   r   rt   ro   r   r   r   r   get_fp64  s    zInterpreterBuilder.get_fp64c                 C  s   t tjdgt|d|S Nr   r5   )r   r7   r   r|   )r   typer   r   r   get_null_value  s    z!InterpreterBuilder.get_null_valuec                 C  s2   | j d u rtdttj| j | gtjdtjS )Nzgrid_idx is Noner5   )r   r   r   r7   r   rk   ro   r   axisr   r   r   create_get_program_id  s    
z(InterpreterBuilder.create_get_program_idc                 C  s    t tj| j| gtjdtjS r   )r   r7   r   r   rk   ro   r   r   r   r   create_get_num_programs  s    z*InterpreterBuilder.create_get_num_programsc                 C  s0   t tj|jtdtj}d }| ||||||S r   )r   r7   	ones_liker   r   ro   rq   create_masked_load)r   ptr_0_1is_volatilemaskotherr   r   r   create_load  s    zInterpreterBuilder.create_loadc                 C  s*   t tj|jtdtj}| |||d d S r   )r   r7   r   r   r   ro   rq   create_masked_store)r   r   valr   r   r   r   r   r   create_store  s    zInterpreterBuilder.create_storec           
      C  sN   |  }t|}|d u r.ttj|j|d|}t|j|j|j|}	t|	|S r   )r   r|   r   r7   r   r   _interpreterload)
r   rC   r   r   cache_modifiereviction_policyr   rB   dtype_npretr   r   r   r     s    z%InterpreterBuilder.create_masked_loadc                 C  s   t |j|j|jS r   )r   storer   )r   rC   r"   r   r   r   r   r   r   r     s    z&InterpreterBuilder.create_masked_storec                 C  sx   |j j}|j}|tjkr"|tjks6|tjkr\|tjkr\t|j||d t|}t	||jS t	|j
t||jS d S r   )r   r@   ro   ru   rs   r   r   viewr|   r   r>   )r   srcdst_typesrc_element_typedst_element_typer   r   r   r   	cast_impl  s    zInterpreterBuilder.cast_implc                 C  s   |  ||S r   r   r   r   r   r   r   r   r     r   zInterpreterBuilder.<lambda>c                 C  s   |  ||S r   r   r   r   r   r   r     r   c                 C  s   |  ||S r   r   r   r   r   r   r     r   c                 C  s   |  ||S r   r   r   r   r   r   r     r   c                 C  s   |  ||S r   r   r   r   r   r   r     r   c                 C  s   |  ||S r   r   r   r   r   r   r     r   c                 C  s   |  ||S r   r   )r   r   r   	is_signedr   r   r   r     r   c                 C  s4   |j j}|j}t|j|||t|}t||jS r   )r   r@   r   r   r   r|   r   )r   r   r   r   r   r   r   r   r   r   r     s    z"InterpreterBuilder.create_fp_to_fpc                 C  s   t |jt||jS r   )r   r   r   r|   r@   r   r   r   r   create_bitcast  s    z!InterpreterBuilder.create_bitcastc                 C  s   t ||j|j|jjS r   r   r   r   r@   )r   lhsrhsopr   r   r   	binary_op  s    zInterpreterBuilder.binary_opc                 C  s   |  ||tjS r   r   r7   addr   r   r   r   r   r   r     r   c                 C  s   |  ||tjS r   r   r7   multiplyr   r   r   r   r     r   c                 C  s   |  ||tjS r   r   r7   divider   r   r   r   r     r   c                 C  s   |  ||tjS r   r   r7   fmodr   r   r   r   r     r   c                 C  s   |  ||tjS r   r   r7   subtractr   r   r   r   r     r   c                 C  s   |  ||tjS r   r   r   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||S r   create_idivr   r   r   r   r     r   c                 C  s   |  ||S r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r   r   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   )r   r7   Z
left_shiftr   r   r   r   r     r   c                 C  s   |  ||tjS r   )r   r7   right_shiftr   r   r   r   r     r   c                 C  s   |  ||tjS r   r   r7   r   r   r   r   r   r     r   c                 C  s   |  ||tjS r   r
  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r
  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r
  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r   r7   r   r   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r   r7   Z
less_equalr   r   r   r   r     r   c                 C  s   |  ||tjS r   r   r7   Zlessr   r   r   r   r     r   c                 C  s   |  ||tjS r   r   r7   Zgreater_equalr   r   r   r   r     r   c                 C  s   |  ||tjS r   r   r7   Zgreaterr   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r   r7   equalr   r   r   r   r     r   c                 C  s   |  ||tjS r   r   r7   	not_equalr   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   r  r   r   r   r   r     r   c                 C  s   |  ||tjS r   )r   r7   Zbitwise_andr   r   r   r   r     r   c                 C  s   |  ||tjS r   )r   r7   Zbitwise_xorr   r   r   r   r     r   c                 C  s   |  ||tjS r   )r   r7   Z
bitwise_orr   r   r   r   r     r   c                 C  s&   t |jt|j|j |j |jjS r   )r   r   r7   r  r   r@   r   r   r   r   r    s    zInterpreterBuilder.create_idivc                 C  sD   t |jj}t |jj}|j||_|j||_| ||tjS r   )rm   r   r   r>   r   r7   r	  )r   r   r   Z	lhs_dtypeZ	rhs_dtyper   r   r   create_ashr	  s
    zInterpreterBuilder.create_ashrc                 C  s   |j j}|tjks|tjkr4tt|j |j |jjS ttd|j	d d  }|j 
|}|j 
|}t|||j	d ? }t|
||jjS d S )Nr}   r4   r   )r   r   r7   rl   r?   r   np_umulhi_u64r@   r~   rS   r>   r   )r   r   r   r   Zcompute_dtypeZlhs_dataZrhs_dataZret_datar   r   r   create_umulhi  s    z InterpreterBuilder.create_umulhic                 C  s   t ||j|j|j|jjS r   r   )r   r   r   r   r   r   r   r   
ternary_op  s    zInterpreterBuilder.ternary_opc                 C  s   |  |||tjS r   )r  r7   Zclip)r   arglohiZpropagate_nansr   r   r   r      r   c                 C  s   |  |||tjS r   )r  r7   where)r   condr   r   r   r   r   r   !  r   c                 C  s   t |j|j |j |jjS r   r   r   r   r   r   
create_fma#  s    zInterpreterBuilder.create_fmac                 C  s   t ||j|jjS r   r   )r   r  r   r   r   r   unary_op'  s    zInterpreterBuilder.unary_opc                 C  sZ   |j }|jd }ttd|j }|j|}d|> d }||@ t|}t||j jS )Nr	   r}   )	r   r6   r~   r7   r   r   r|   r   r@   )r   r  rB   Zmask_bitwidthZnp_uint_dtyper   r   r   r   r   r   create_fabs*  s    
zInterpreterBuilder.create_fabsc                 C  s   |  |tjS r   )r  r7   cosr   r  r   r   r   r   4  r   c                 C  s   |  |tjS r   )r  r7   expr!  r   r   r   r   5  r   c                 C  s   |  |tjS r   )r  r7   Zexp2r!  r   r   r   r   6  r   c                 C  s   |  |tjS r   )r  r7   absr!  r   r   r   r   7  r   c                 C  s   |  |tjS r   )r  r7   floorr!  r   r   r   r   8  r   c                 C  s   |  |tjS r   )r  r7   ceilr!  r   r   r   r   9  r   c                 C  s   |  |tjS r   )r  r7   logr!  r   r   r   r   :  r   c                 C  s   |  |tjS r   )r  r7   log2r!  r   r   r   r   ;  r   c                 C  s   |  |tjS r   r  r7   sqrtr!  r   r   r   r   <  r   c                 C  s   |  |tjS r   r(  r!  r   r   r   r   =  r   c                 C  s   |  |tjS r   )r  r7   sinr!  r   r   r   r   >  r   c                 C  s0   |j jtjkrt|j nt|j }t||jjS r   )r   r   r7   rs   np_erf_fp32np_erf_fp64r   r@   )r   r  r   r   r   r   
create_erf@  s    "zInterpreterBuilder.create_erfc                 C  s   t dt|j |jjS Nr	   )r   r7   r)  r   r   r@   r!  r   r   r   create_rsqrtD  s    zInterpreterBuilder.create_rsqrtc                 C  s   t |j||jjS r   )r   r   r=   r   r@   )r   r  r.   Zallow_reorderr   r   r   r   H  r   c                 C  s   t t|j||jjS r   )r   r7   	transposer   r   r@   )r   r  permr   r   r   create_transJ  s    zInterpreterBuilder.create_transc                 C  s   |j }|j }|jjdkr"|j s8|jjdkrl|j rlt||jtjd tj}t||jtjd tj}t	tj
|||j jd|j  |jjS )Nr4   r5   )r   r   r6   Zis_floatingr   ro   rr   r   r7   r   matmulr@   )r   r   r   dZinput_precisionZmax_num_imprecise_accZa_dataZb_datar   r   r   
create_dotM  s    
zInterpreterBuilder.create_dotc                 C  s   t tj||tjdtjS r   )r   r7   r<   rk   ro   )r   ret_tystartstopr   r   r   create_make_rangeV  s    z$InterpreterBuilder.create_make_rangec                 C  sz   |d u r t tj|jtdtj}t|j|jt|j}tj	||d|fdd }|d  t
|j 8  < t |tjS )Nr5   r   )binsr:   )r   r7   r   r   r   ro   rq   r  r   	histogramZlogical_notsumrk   )r   r   r:  r   r;  r   r   r   create_histogramY  s    z#InterpreterBuilder.create_histogramc                 C  s   t tj|j|j|d|jjS )Nr   )r   r7   Ztake_along_axisr   r   r@   )r   r   indicesr   r   r   r   create_gatherc  s    z InterpreterBuilder.create_gatherc                 C  s<   |  }|j}td|d }t|j||jtj  |jS )Nr	   r4   )	r   r6   maxr   r   r>   r7   r?   r   )r   r   offsetrB   Zelement_bitwidthZelement_bytewidthr   r   r   create_addptrh  s    z InterpreterBuilder.create_addptrc                 C  s   | |\}}| }	t|	}
|d u r,d }n\|tjjkrPttj|j	|
d|	}n8|tjj
krzttj|j	td|
d|	}ntd| | ||||||S )Nr5   nanzunsupported padding option )rH   r   r|   r   ZPADDING_OPTIONZPAD_ZEROr   r7   r   r   ZPAD_NANZ	full_likefloatr   r   )r   r   rA   Zpadding_optionr   r   r   rC   rD   rB   r   r   r   r   r   create_tensor_pointer_loado  s    z-InterpreterBuilder.create_tensor_pointer_loadc                 C  s    | |\}}| |||||S r   rH   r   )r   r   r"   rA   r   r   rC   rD   r   r   r   create_tensor_pointer_store~  s    z.InterpreterBuilder.create_tensor_pointer_storec                 C  s   t t|j||jjS r   )r   r7   expand_dimsr   r   r@   )r   r  r   r   r   r   create_expand_dims  s    z%InterpreterBuilder.create_expand_dimsc                 C  s   t t|j||jjS r   )r   r7   r8   r   r   r@   )r   r  r.   r   r   r   create_broadcast  s    z#InterpreterBuilder.create_broadcastc                 C  s   t t|j|jg|jjS r   )r   r7   Zconcatenater   r   r@   r   r   r   r   
create_cat  s    zInterpreterBuilder.create_catc                 C  s    t tj|j|jgdd|jjS )NrO   r>  )r   r7   stackr   r   r@   r   r   r   r   create_join  s    zInterpreterBuilder.create_joinc                 C  s(   t |jd |jjt |jd |jjfS )N).r   ).r	   r   )r   r   r   r   r   create_split  s    zInterpreterBuilder.create_splitc                 C  s`   |j }t|jtjr:ttj||jd t	|jd|jj
S ttj||jt	|jd|jj
S d S r   )r.   rn   r   ro   r{   r   r7   fullr   r|   r@   )r   r6  r  r.   r   r   r   create_splat  s    &zInterpreterBuilder.create_splatc                 C  sB   || j vrtd| | j | }tt|j|j|j||jjS )Nunsupported semantic )ir_sem_to_interpreter_semr   r   r   Z
atomic_casr   r   r@   )r   r   cmpr   semscoper   r   r   create_atomic_cas  s    

z$InterpreterBuilder.create_atomic_casc                 C  sf   || j vrtd| || jvr0td| | j | }| j| }tt||j|j|j||jjS )Nzunsupported rmwOp rR  )	ir_rmw_op_to_interpreter_rmw_opr   rS  r   r   Z
atomic_rmwr   r   r@   )r   ZrmwOpr   r   r   rU  rV  r   r   r   create_atomic_rmw  s    



z$InterpreterBuilder.create_atomic_rmwc                 C  s   t dd S )Nz4extern_elementwise not supported in interpreter modeNotImplementedError)r   ZlibNameZlibPathsymbolZargListZretTypeisPurer   r   r   create_extern_elementwise  s    z,InterpreterBuilder.create_extern_elementwisec                 C  s   t dd S )Nz,inline_asm not supported in interpreter moderZ  )r   Z	inlineAsmconstraintsvaluesr   r]  packr   r   r   create_inline_asm  s    z$InterpreterBuilder.create_inline_asmc                 C  s   d| j d  d| j d  d| j d  d}|r<|d| 7 }|rTtjdd	d
 id |D ]}t|d|j   qX|rtjd d d S )N(r   z, r	   r   ) r   c                 S  s   d| dS )N0xZ02xr   r   r   r   r   r     r   z1InterpreterBuilder.create_print.<locals>.<lambda>)	formatter)r   r7   Zset_printoptionsprintr   )r   prefixhexr`  ZisSignedmsgr"   r   r   r   create_print  s    *zInterpreterBuilder.create_printc                 C  s   |sJ | d S r   r   )r   	conditionmessager   r   r   create_assert  s    z InterpreterBuilder.create_assertc                 C  s   |sJ dd S )NzAssume failedr   )r   rm  r   r   r   create_assume  s    z InterpreterBuilder.create_assumec                 C  s   d S r   r   r   r   r   r   create_barrier  s    z!InterpreterBuilder.create_barrierc                 C  s    dd |D }t ||||||S )Nc                 S  s   g | ]}|  qS r   r    .0rB  r   r   r   
<listcomp>  r   z<InterpreterBuilder.create_make_block_ptr.<locals>.<listcomp>)r,   )r   r-   r.   r/   r0   r1   r2   new_offsetsr   r   r   create_make_block_ptr  s    z(InterpreterBuilder.create_make_block_ptrc                 C  sv   t |jt |krtddd |jD }t|j|j|j||j|j}t	t |D ]}|j|  j
|| j
7  _
qR|S )Nz len(ptr.offsets) != len(offsets)c                 S  s   g | ]}|  qS r   rr  rs  r   r   r   ru    r   z5InterpreterBuilder.create_advance.<locals>.<listcomp>)r;   r0   r   r,   r-   r.   r/   r1   r2   r:   r   )r   r   r0   rv  r   r   r   r   r   create_advance  s    z!InterpreterBuilder.create_advancer   rJ   rK   r   )r-   r.   r/   tensor_shaper   c                 C  s   t ||||}|  |S r   )rI   rR   )r   r-   r.   r/   ry  r   descr   r   r   create_make_tensor_descriptor  s    z0InterpreterBuilder.create_make_tensor_descriptorrI   )rz  r?  c                 C  s2   t |tsJ ||\}}| j||d ||ddS )NF)r   r   r   r   )rn   rI   rH   r   )r   rz  r?  r   r   rC   r   r   r   r   create_descriptor_load  s
    z)InterpreterBuilder.create_descriptor_load)rz  r"   r?  c                 C  s    | |\}}| |||d d S r   rG  )r   rz  r"   r?  rC   r   r   r   r   create_descriptor_store  s    z*InterpreterBuilder.create_descriptor_store)rz  	x_offsetsy_offsetc                 C  s   |j jj}t|}tj|jjd |jd g|d}d }d }	t	|jD ]6\}
}t
|tj|g}| ||||	j||
d d f< qDt
||S )Nr   rO   r5   )r-   r   r   r|   r7   zerosr   r.   r1   	enumerater   ro   rk   r|  )r   rz  r~  r  r   r   np_dtyperesultr   r   r   x_offsetr?  r   r   r   create_descriptor_gather  s    
  z+InterpreterBuilder.create_descriptor_gather)rz  r"   r~  r  c           	      C  sH   t |jD ]8\}}t|j| |j}t|tj|g}| ||| q
d S r   )r  r   r   r   ro   rk   r}  )	r   rz  r"   r~  r  r   r  slicer?  r   r   r   create_descriptor_scatter  s    z,InterpreterBuilder.create_descriptor_scatterc                 C  s^   t |}d|jv r*ttjdd|d|jS |tjkrLttjdd|d|jS td| d S )Nrc   r	   rO   r5   Tzunsupported type )r|   namer   r7   rP  r@   r   	TypeError)r   r   Znp_typer   r   r   get_all_ones_value  s    

z%InterpreterBuilder.get_all_ones_valueN)r$   r%   r&   r   ZMEM_SEMANTICZACQUIREr   ZRELEASEZRELAXEDZACQUIRE_RELEASErS  Z	ATOMIC_OPZADDZRMW_OPZFADDZMINZUMINMAXZUMAXANDORZXORZXCHGrX  r3   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Zcreate_si_to_fpZcreate_ui_to_fpZcreate_fp_to_siZcreate_fp_to_uiZcreate_fp_extZcreate_fp_truncZcreate_int_castr   r   r   Zcreate_faddZcreate_fmulZcreate_fdivZcreate_fremZcreate_fsubZ
create_mulZcreate_precise_divfZcreate_sdivZcreate_udivZcreate_sremZcreate_uremZ
create_addZ
create_subZ
create_shlZcreate_lshrZcreate_minsiZcreate_minuiZcreate_minimumfZcreate_minnumfZcreate_maxsiZcreate_maxuiZcreate_maximumfZcreate_maxnumfZcreate_icmpSLEZcreate_icmpSLTZcreate_icmpSGEZcreate_icmpSGTZcreate_icmpULEZcreate_icmpULTZcreate_icmpUGEZcreate_icmpUGTZcreate_icmpEQZcreate_icmpNEZcreate_fcmpOLTZcreate_fcmpOGTZcreate_fcmpOLEZcreate_fcmpOGEZcreate_fcmpOEQZcreate_fcmpONEZcreate_fcmpULTZcreate_fcmpUGTZcreate_fcmpULEZcreate_fcmpUGEZcreate_fcmpUEQZcreate_fcmpUNEZ
create_andZ
create_xorZ	create_orZcreate_int_to_ptrZcreate_ptr_to_intr  r  r  r  Zcreate_clampfZcreate_selectr  r  r  Z
create_cosZ
create_expZcreate_exp2Zcreate_iabsZcreate_floorZcreate_ceilZ
create_logZcreate_log2Zcreate_precise_sqrtZcreate_sqrtZ
create_sinr-  r/  Zcreate_reshaper2  r5  r9  r=  r@  rC  rF  rH  rJ  rK  rL  rN  rO  rQ  rW  rY  r^  rb  rl  ro  rp  rq  rw  rx  r{  r|  r}  r  r  r  r   r   r   r   r     sX  	

	
	
r   c                   s*   t | |d fdd
}t| || d S )N)memberc                   s$   | |i dd |  D d iS )Nc                 S  s   i | ]\}}|d kr||qS )r   r   rt  kvr   r   r   
<dictcomp>  s   z1_patch_attr.<locals>.<lambda>.<locals>.<dictcomp>r   items)r  argskwargssemanticr   r   r     s   z_patch_attr.<locals>.<lambda>)r   setattr)objr  r  r   
new_memberr   r  r   _patch_attr  s    r  c                 C  s2   t | D ]"\}}tj|r
t| ||| q
d S r   )inspect
getmembersro   core
is_builtinr  )pkgr   r  r  r   r   r   _patch_builtin  s    r  c                   sJ   dd  dd }dd | _  fdd| _dd | _d	d | _t|| _d S )
Nc                 S  s   | j j}|jdkrt|S dS )Nr	   T)r   r   sizer   )r   r   r   r   r   	_get_bool  s    z%_patch_lang_tensor.<locals>._get_boolc                 S  sj   t t| jj| jj}| j s&J t| jj	}|d |d  |d< |d< t
j| j|}t
j||S )NrO   )r   r7   r0  r   r   r   r   Zis_blocklistr.   ro   r  r{   r   )r   r   r1   Zres_tyr   r   r   _get_transpose"  s    z*_patch_lang_tensor.<locals>._get_transposec                 S  s   t | jjS r   )rc   r   r   r   r   r   r   r   *  r   z$_patch_lang_tensor.<locals>.<lambda>c                   s    | S r   r   r   r  r   r   r   +  r   c                 S  s   t | jjS r   )reprr   r   r   r   r   r   r   ,  r   c                 S  s   t | jjS r   )rY   r   r   r   r   r   r   r   -  r   )	__index__r   __repr____str__propertyT)r   r  r   r  r   _patch_lang_tensor  s    


r  c                   @  s4   e Zd Zdd Zdd Zdd Zdd Zd	d
 ZdS )ReduceScanOpInterfacec                 C  s   || _ || _d S r   )r   
combine_fn)r   r   r  r   r   r   r3   3  s    zReduceScanOpInterface.__init__c                 C  s,   |d ur(|t |kr(td| d| d S )Nzaxis z out of bounds for shape )r;   r   )r   r.   r   r   r   r   
check_axis7  s    z ReduceScanOpInterface.check_axisc                 C  s>   |D ]4}t |tjjs(tdt| | |j| j qd S )Nzinput must be a tensor, got )	rn   ro   r  r   r   r   r  r.   r   )r   r   r  r   r   r   check_tensor;  s    z"ReduceScanOpInterface.check_tensorc                 C  s`   t |}t|dr6|jr6||}t|t|j}ntj|g|d}|}tj	
t||j|S )Nr.   r5   )r|   r   r.   r>   ro   r{   r  r7   r   r  r   r   r@   )r   r   r   r  Zret_typer   r   r   	to_tensorA  s    
zReduceScanOpInterface.to_tensorc                 C  sJ   t |ts| |fd S | | | |}t |ttfrDt|S |fS Nr   )rn   tupleapplyr  
apply_implr  )r   r   r   r   r   r   r  K  s
    


zReduceScanOpInterface.applyN)r$   r%   r&   r3   r  r  r  r  r   r   r   r   r  1  s
   
r  c                      sF   e Zd Z fddZdd Zdd Zddd	Zd
d Zdd Z  Z	S )	ReduceOpsc                   s   t  || || _d S r   )superr3   	keep_dims)r   r   r  r  	__class__r   r   r3   U  s    zReduceOps.__init__c                 C  sN   g }|D ]8}|d ur | | qd}| | |jj |j qt||fS r  )appendr  r   r   flattenr   r  )r   r   r   r   r   r   r   r   unravelY  s    zReduceOps.unravelc                   s,  j } j \ }g }g } d jjj}|d| ||d d   } D ],}||jj |tj||jjjd qNt	|d j
D ]}	t|	|d| |d d   t fddt|D }
| dkrt	t|D ]}|
| jj || < qqt fddt|D }jjg ||
R  }t|tsV|fn|}t	t|D ]:}t|| tjjr|| jj n|| || < qfqg }t|D ]v\}	}jr|d urt||}n t	t|D ]}t|d}qn|d u r| }|| |	 j q|S )Nr   r	   r5   c                 3  s(   | ] \}} |  | jV  qd S r   r  r   rt  iir4  )r   input_indexr   r   r   	<genexpr>r  r   z+ReduceOps.generic_reduce.<locals>.<genexpr>c                 3  s(   | ] \}} |  | jV  qd S r   r  rt  Zoio)r   output_indexr   r   r   r  x  r   )r   r  r   r   r.   r  r7   r  r   r:   r  unravel_indexr  r  r;   rP   r  fnrn   ro   r  r   r  rI  r  )r   r   Zoriginal_axisr   
input_dataoutput_dataZinput_shapeZoutput_shaper  r   Zinput_tuplej	acc_tuplecombine_fn_retr   r   _r   )r   r  r  r   r   generic_reducec  sH    

zReduceOps.generic_reduceNc                 C  s   t |tr|d n|}d }d }|rB| ||jj| j| jd|j}|rf| ||jj| j| jdtj	}|d ur~|d ur~||fS |d ur|S |d ur|S t
dd S )Nr   r   Zkeepdimsz-val_reduce_op and idx_reduce_op are both None)rn   r  r  r   r   r   r  r   ro   rk   r   )r   r   val_reduce_opidx_reduce_opr   idxr   r   r   min_max  s      zReduceOps.min_maxc                 C  s"   |  tj|jj| j| jd|jS )Nr  )r  r7   r<  r   r   r   r  r   r   r   r   r   r   r<    s    zReduceOps.sumc                 C  s   | j tjjkr&| j|d tjtjdS | j tjjkrL| j|d tj	tj
dS | j tjjkrp| j|d tj	d dS | j tjjkr| j|d tjd dS | j tjjkr| |d S | |S d S )Nr   )r  r  )r  ro   standardZ_argmin_combine_tie_break_leftr  r7   minZargminZ_argmax_combine_tie_break_leftrA  ZargmaxZ_elementwise_maxZ_elementwise_min_sum_combiner<  r  r  r   r   r   r    s    zReduceOps.apply_impl)N)
r$   r%   r&   r3   r  r  r  r<  r  __classcell__r   r   r  r   r  S  s   
+
r  c                      s<   e Zd Z fddZdd Zdd Zdd Zd	d
 Z  ZS )ScanOpsc                   s   t  || || _d S r   )r  r3   reverse)r   r   r  r  r  r   r   r3     s    zScanOps.__init__c                 C  s"   | j tj|jj| jd|jdgS Nr>  r5   )r  r7   cumsumr   r   r   r   r  r   r   r   r    s    zScanOps.cumsumc                 C  s"   | j tj|jj| jd|jdgS r  )r  r7   cumprodr   r   r   r   r  r   r   r   r    s    zScanOps.cumprodc                   s  g }g }d j jj}D ],}||j j |tj||j jjd qt|d jD ]}t	|| t
 fddt|D } j dkrtt|D ]}|| j j ||  < qqVt
 fddtt D t
fddt|D }	jjg |	|R  }
t|
t
s&|
fn|
}	tt|D ]:}t|	| tjjr^|	| j j n|	| ||  < q6qVg }t|D ]"\}}||| j q|S )Nr   r5   c                 3  s(   | ] \}} |  | jV  qd S r   r  r  )indexr   r   r   r   r    r   z'ScanOps.generic_scan.<locals>.<genexpr>c                 3  s,   | ]$}|j kr | d  n | V  qdS )r	   Nr>  )rt  r   )r  r   r   r   r    r   c                 3  s(   | ] \}} |  | jV  qd S r   r  r  )r   
prev_indexr   r   r   r    r   )r   r   r.   r  r7   r  r   r:   r  r  r  r  r   r;   rP   r  r  rn   ro   r  r   r  )r   r   r  r  r.   r  r   r   r  r  r  r   r   )r  r   r  r   r   generic_scan  s4     zScanOps.generic_scanc              	   C  s   g }| j r:|D ](}|| tj|jj| jd|j qn|}| j	t
jjkr\| |d }n(| j	t
jjkrz| |d }n
| |}| j r|D ]}tj|jj| jd|j_q|S )Nr>  r   )r  r  r  r7   Zflipr   r   r   r   r  ro   r  r  r  Z_prod_combiner  r  )r   r   Z	new_inputr  r   r   r   r   r    s    (
zScanOps.apply_impl)	r$   r%   r&   r3   r  r  r  r  r  r   r   r  r   r    s
   r  c                  C  s4   ddd} ddd}| t _|t _| t j_|t j_d S )NFc                 [  s   t |||| S r   )r  r  )r   r   r  r  r  r   r   r   _new_reduce  s    z'_patch_reduce_scan.<locals>._new_reducec                 [  s   t |||| S r   )r  r  )r   r   r  r  r  r   r   r   	_new_scan  s    z%_patch_reduce_scan.<locals>._new_scan)F)F)ro   reduceZassociative_scanr  )r  r  r   r   r   _patch_reduce_scan  s    

r  c                 C  sx   dd }ddd}ddd}dd	 }|| _ || _|| _t| _|| j_t|d
d| _t|dd| _	t|dd| _
t  d S )Nc                 S  sP  | j dkr| S | j dkr$| S | j dkr6| S | j dkrH| S | j dkrZ| S | j dkrl| S | j dkr~| S | j dkr| S | j d	kr|	 S | j d
kr|
 S | j dkr| S | j dkr| S | j dk r| S | j dkr | S | j dkr| S | j dkr(| S | j dkr<| S td|  dd S )Nvoidrq   rg   rf   ri   rh   rk   rj   rl   r?   r[   r\   r]   Zfp16Zbf16Zfp32Zfp64zfail to convert z to ir type)r  Zget_void_tyr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   
_new_to_ir  sF    











z$_patch_lang_core.<locals>._new_to_irc                 [  s6   |d u rd}|d u r d|  }}n
| | }}t |||S )Nr	   r   )r:   )Zarg1Zarg2stepr  r7  endr   r   r   
_new_range+  s    
z$_patch_lang_core.<locals>._new_range c                 S  s   | sJ |d S r   r   )r  rk  r   r   r   _new_static_assert4  s    z,_patch_lang_core.<locals>._new_static_assertc                 S  sn   t | tjs| S t |ttfs$|gn|}dd |D }t|tdt| jkr\td| | j	
|| | S )Nc                 S  s"   g | ]}t |tjr|jn|qS r   )rn   ro   	constexprr"   rt  r  r   r   r   ru  =  r   z7_patch_lang_core.<locals>._set_attr.<locals>.<listcomp>r	   z$len(values) != len(input.shape) for )rn   ro   r   r  r  r;   rA  r.   r   r   r#   )r   r`  r  r   r   r   	_set_attr7  s    z#_patch_lang_core.<locals>._set_attrztt.divisibilityr  ztt.contiguityztt.constancy)NN)r  )r:   Zstatic_rangeZstatic_assertrh  Zstatic_printr   Zto_irr   Zmultiple_ofZmax_contiguousZmax_constancyr  )langr  r  r  r  r   r   r   _patch_lang_core  s    (
	
r  c                 C  s   dd | j  D }t|dks(J d|D ]@}t|t t|jt |tkrZt|jt t|j t	| q,ttj
jt d S )Nc                 S  s,   g | ]$\}}t |r|ttjfv r|qS r   )r  ismodulero   r  )rt  r  r"   r   r   r   ru  P  r   z_patch_lang.<locals>.<listcomp>r	   z:triton.language must be visible from within jit'd function)__globals__r  r;   r  interpreter_builderr   ro   r   r  r  r  Ztensor_descriptor_base)r  Zlangsr  r   r   r   _patch_langO  s    


r  c                 C  s"   t | drt| | S t| |S )N_fields)r   r   )r  contentsr   r   r   _tuple_create\  s    r  c                 C  s  t | trttjj| }tj	}d|   kr8dk rDn ntj	}nnd|   krXdk rdn ntj
}nNd|   krxdk rn ntj}n.d|   krdk rn ntj}ntd|  ttj| g|d|}t||S t| d	rttjj| }ttj|  gtjd|}t||S t | tr8t| tt| S t | trd
d | jD }| jd dkshJ td|d< tt }|jt| jdd | jD |dd | jD dS | S )Ni   l        l        l         l            l            zUnsupported integer value r5   data_ptrc                 S  s   g | ]}t |qS r   _implicit_cvtrt  sr   r   r   ru  }  r   z!_implicit_cvt.<locals>.<listcomp>rO   r	   c                 S  s   g | ]}t |qS r   r  r  r   r   r   ru    r   c                 S  s   g | ]}t |qS r   )ro   r  )rt  r   r   r   r   ru    r   rL   ) rn   rc   ro   Z	str_to_tytritonruntimejitZmangle_typer7   rk   rj   rl   r?   r   r   r   r   r   r  r  r  mapr  r   r/   r  r   r   Zmake_tensor_descriptorr-   r.   r1   )r  tyr   r   r/   r  r   r   r   r  f  s@    

r  c                 C  s   t | tjjjr| jS | S r   )rn   r  r  r  TensorWrapperr-   )tr   r   r   _unwrap_tensor  s    r  c                 C  s&   t |tjjjr"tjj| |jS | S r   )rn   r  r  r  r  r   )r  original_tensorr   r   r   _rewrap_tensor  s    r
  c                   @  s,   e Zd Zdd Zdd Zdd Zdd Zd	S )
GridExecutorc                   sN   ddl m || _|| _|| _fdd|j D   fdd|D | _d S )Nr	   _normalize_tyc                   s   i | ]\}}| |qS r   r   )rt  r  r  r  r   r   r    r   z)GridExecutor.__init__.<locals>.<dictcomp>c                   s   g | ]}  |d kr|qS )r  )get)rt  r  )r(   r   r   ru    r   z)GridExecutor.__init__.<locals>.<listcomp>)r  r  r  	arg_namesgridr(   r  
constexprs)r   r  r  r  r   )r(   r  r   r3     s    zGridExecutor.__init__c                   sN   i  fdd  fdd|D }i }|  D ]\}} |||< q0||fS )Nc                   s   t | trt| t | S t | tr>t | j| j| j| jS t	| dsL| S t
| }|  vr|| }| | < |   }|jddd}||| | |  t|| d}|S )Nr  r   cpu)Zdevice)r	  )rn   r  r  r  r   r-   r.   r/   r1   r   r  untyped_storager  r  Z	new_emptyset_Zstorage_offsetr  rQ   r
  )r  Zunwrapped_argZstorageZcpu_arg_to_cpustoragesr   r   r    s(    


z,GridExecutor._init_args_hst.<locals>._to_cpuc                   s   g | ]} |qS r   r   )rt  r  )r  r   r   ru    r   z/GridExecutor._init_args_hst.<locals>.<listcomp>r  )r   args_devr  args_hst
kwargs_hstr!   r"   r   r  r   _init_args_hst  s    zGridExecutor._init_args_hstc           
        st   i  fdd t ||D ]\}} || q| D ]\}}|| }	 ||	 q8 D ]\}}|| q\d S )Nc                   s   t | dr:t| t| } }|  | f|   < nBt| trdt| |D ]\} } | | qNnt| tr| | j|j d S )Nr  )	r   r  r  r  rn   r  zipr   r-   )arg_devarg_hst	_from_cpur  r   r   r     s    


z1GridExecutor._restore_args_dev.<locals>._from_cpu)r  r  r`  Zcopy_)
r   r  r  r  r  r  r  r!   Z	kwarg_devZ	kwarg_hstr   r  r   _restore_args_dev  s    zGridExecutor._restore_args_devc              
     sx  | ddrd S tj  fdd| D }||\}}tj tjjg|R i |}fdd| D }tj	r	|nj	}t
|dksJ d|ddt
|   }tj|  zXt|d	 D ]F}t|d
 D ]4}t|d D ]"}	t|||	 jf i | qqqW nB tyb }
 z(tjjjr@ tt|
|
W Y d }
~
n
d }
~
0 0 |||| d S )NZwarmupFc                   s    i | ]\}}| j v r||qS r   )r  r  )argspecr   r   r    r   z)GridExecutor.__call__.<locals>.<dictcomp>c                   s(   i | ] \}}|| j v r|nt|qS r   )r  r  )rt  r  r  r   r   r   r    r      z#grid must have at most 3 dimensions)r	   r   r	   r   )popr  getfullargspecr  r  r  r  getcallargscallabler  r;   r  r   r:   r   	Exceptionr  ZknobsZcompilationZfront_end_debuggingr
   r  r!  )r   r  r  r  r  r  r  r   r   r   er   )r"  r   r   __call__  s.    

$zGridExecutor.__call__N)r$   r%   r&   r3   r  r!  r*  r   r   r   r   r    s   	#r  c                   @  s   e Zd Zdd ZdS )ASTTransformerc                 C  sv   g }|j D ]}|| |g7 }q
t|dkr4tdtjtjtjdt ddt d|j	tj
ddgg d	|_	|S )
Nr	   z&Multiple assignments are not supportedinterpreter_semantic)idctxr  )r"   r   r.  F)r"   )funcr  keywords)targetsvisitr;   r   astCall	AttributeNameLoadr"   Constant)r   nodenamestargetr   r   r   visit_Assign  s    
zASTTransformer.visit_AssignN)r$   r%   r&   r<  r   r   r   r   r+    s   r+  c                   @  sJ   e Zd Ze Zdd Zdd Zdd Zdd Zd	d
 Z	dd Z
dd ZdS )FunctionRewriterc                 K  s   || _ || _d| _d| _d S )Nr  r   )r  r  filenamedef_file_lineno)r   r  r  r   r   r   r3     s    zFunctionRewriter.__init__c                 C  sh   zt | j\}}W n ty,   | j Y S 0 |  \| _| _| || _| 	|}| 
|}| |S r   )r  getsourcelinesr  r(  _get_jit_fn_file_liner>  r?  	_find_def
def_lineno_prepare_source_transform_ast_compile_and_exec)r   linesr  r   transformed_astr   r   r   rewrite_ast  s    

zFunctionRewriter.rewrite_astc                 C  s   ddl m}m} ||| jS )Nr	   )get_jit_fn_file_lineJITFunction)r  rJ  rK  r  )r   rJ  rK  r   r   r   rA  2  s    z&FunctionRewriter._get_jit_fn_file_linec                 C  s0   d}t |D ]\}}| dr|d }q|S )Nr   zdef r	   )r  strip
startswith)r   rG  rC  r   liner   r   r   rB  6  s
    
zFunctionRewriter._find_defc                 C  s&   || j d d  }d|}t|S )Nr	   r  )rC  jointextwrapdedent)r   rG  r   r   r   r   rD  >  s    
z FunctionRewriter._prepare_sourcec                 C  s:   t |}| j|}t | | jd }t || |S r.  )r3  parseast_transformerr2  fix_missing_locationsr?  increment_lineno)r   r   Z
parsed_astrH  Z
inc_linenor   r   r   rE  C  s    


zFunctionRewriter._transform_astc                 C  s^   t || jdd}i | j}| jj}t  D ]\}}||vr,|||< q,t||| || jj S )Nexec)r>  mode)	compiler>  r  r  r  globalsr  rV  r$   )r   rH  Zcompiled_codeZlocal_namespaceZ
fn_globalsr!   r"   r   r   r   rF  N  s    

z"FunctionRewriter._compile_and_execN)r$   r%   r&   r+  rS  r3   rI  rA  rB  rD  rE  rF  r   r   r   r   r=    s   r=  c                   @  sB   e Zd Zi ZddddZdd Zedd Z d	d
 Zdd ZdS )InterpretedFunctionr   r   c                   sN   | _ t|fi | _ fdd}| _t|}dd |j D  _d S )Nc                    s(   |d }   }t| j|| i |S )Nr  rewriter  r  )r  r  r  r  r   r   r   runa  s    z)InterpretedFunction.__init__.<locals>.runc                 S  s   g | ]
}|j qS r   r  r  r   r   r   ru  h  r   z0InterpretedFunction.__init__.<locals>.<listcomp>)	r  r=  rewriterr]  r  	signature
parametersr`  r  )r   r  r  r]  r_  r   r   r   r3   ]  s    
zInterpretedFunction.__init__c                 C  s*   | j | jvr| j | j| j < | j| j  S r   )r  rewritten_fnr^  rI  r   r   r   r   r\  j  s    zInterpretedFunction.rewritec                 C  s   | j jS r   )r  r$   r   r   r   r   r$   o  s    zInterpretedFunction.__name__c                 C  s   |   }t|| j|S r   r[  )r   r  r  r   r   r   __getitem__s  s    zInterpretedFunction.__getitem__c              
   O  sZ   t | j |  }z||i |W S  tyT } ztt||W Y d }~n
d }~0 0 d S r   )r  r  r\  r(  r
   r  )r   r  r  r  r)  r   r   r   r*  w  s    
zInterpretedFunction.__call__N)	r$   r%   r&   ra  r3   r\  r  rb  r*  r   r   r   r   rZ  Y  s   
rZ  )E
__future__r   r3  rP  r  typingr   r   r   r   numpyr7   r  Ztriton.languagelanguagero   r)   r   Ztriton.language.semanticr   Ztriton.tools.tensor_descriptorr   errorsr
   	functoolsr   Z_C.libtritonr   r   r   r   r   r,   rI   rU   rm   r|   r   r   r   Z	vectorizers   r+  rt   r,  r?   r  r   r   r  r  r  r  r  r  r  r  r  r  r  r  r,  r  r
  r  NodeTransformerr+  r=  rZ  r   r   r   r   <module>   sn   & @    	"`>N
$jE