o
    cZh                     @   s  d dl Z d dlZd dlZd dlmZmZ d dlZd dlZd dl	Z	d dl
mZ d dlmZ ddlmZ d dlmZ ddlmZ dd	lmZ G d
d dZG dd dZG dd dZeddG dd dZdd Zdd Zdd Zdd Zdd Z ej!eej"gdZ#ej!eej$gdZ%ej!e ej&gdZ'G dd  d Z(G d!d" d"Z)d#d$ Z*d%d& Z+d'd( Z,G d)d* d*Z-G d+d, d,e-Z.G d-d. d.e-Z/d/d0 Z0d1d2 Z1d3d4 Z2d5d6 Z3d7d8 Z4e) Z5d9d: Z6d;d< Z7G d=d> d>Z8G d?d@ d@e j9Z:G dAdB dBZ;G dCdD dDZ<dS )E    N)TupleList)	dataclass   )InterpreterError)partial   )interpreter)irc                   @   s4   e Zd Zdd Zdd Zdd Zdd Zd	d
 ZdS )TensorHandlec                 C   s   || _ || _i | _dS )a  
            data: numpy array
            dtype: triton type, either pointer_type or scalar_type.
            we don't store block_type here because the shape information is already available in the data field
            attr: a dictionary of attributes
        N)datadtypeattr)selfr   r    r   I/var/www/auris/lib/python3.10/site-packages/triton/runtime/interpreter.py__init__   s   
zTensorHandle.__init__c                 C   s   t | j S N)boolr   allr   r   r   r   __bool__      zTensorHandle.__bool__c                 C   s$   | j }t|dr|j}t|ds|S )N
element_ty)r   hasattrr   )r   r   r   r   r   get_element_ty"   s
   

zTensorHandle.get_element_tyc                 C   s   t | j | jS r   )r   r   copyr   r   r   r   r   clone(      zTensorHandle.clonec                 C   s   || j |< d S r   )r   )r   keyvaluer   r   r   set_attr+   r   zTensorHandle.set_attrN)__name__
__module____qualname__r   r   r   r   r!   r   r   r   r   r      s    r   c                   @   s   e Zd Zdd Zdd ZdS )BlockPointerHandlec                 C   s(   || _ || _|| _|| _|| _|| _d S r   )baseshapestridesoffsetsblock_shapeorder)r   r&   r'   r(   r)   r*   r+   r   r   r   r   1   s   
zBlockPointerHandle.__init__c           	      C   s   | j  }|jd }t| j j| j}tj| jtd}t	t
| jD ]D}dgt
| j }| j| ||< | j| jt| j|  |}||| | j| j tj }||v rf||| j| jk @ |dk@ }q"t|| j jj}||fS )N   r   r   r   )r&   r   primitive_bitwidthnpbroadcast_tor   r*   onesr   rangelenr)   arangereshaper(   astypeuint64r'   r   r   scalar)	r   boundary_checkdtype_ttZn_bytesptrsmasksdim
bcast_dimsoffr   r   r   materialize_pointers9   s   

" z'BlockPointerHandle.materialize_pointersN)r"   r#   r$   r   r@   r   r   r   r   r%   /   s    r%   c                	   @   sL   e Zd Zdedee dee dee fddZdd Zd	ee fd
dZdS )TensorDescHandler&   r'   r(   r*   c                 C   s&   || _ t|| _|| _|| _|| _d S r   )r&   r3   ndimr'   r(   r*   )r   r&   r'   r(   r*   r   r   r   r   K   s
   

zTensorDescHandle.__init__c                 C   s   | j j d dksJ dt| j| jksJ t| j| jks"J | jd d D ]}|j d dks8J dq)| jd j dksGJ dd S )N   r   zbase must be 16-byte alignedzstride must be 16-byte alignedr   zlast dim must be contiguous)r&   r   itemr3   r(   rB   r*   )r   strider   r   r   validateS   s    zTensorDescHandle.validater)   c           	      C   s  t || jks	J | jjj}|jd }|d j| d dks"J dt| jj| j	}tj
| j	td}tt | j	D ]?}dgt | j	 }| j	| ||< || jt| j	|  |}||| | j| j tj }|d|k@ || j| jk @ }q:t|| jjj}||fS )Nr,   rD   rC   r   z*block offset start must be 16-byte alignedr-   r   )r3   rB   r&   r   r   r.   r   r/   r0   r*   r1   r   r2   r4   r5   r(   r6   r7   r'   r   r8   )	r   r)   Z	scalar_tyitemsizer;   r<   r=   r>   r?   r   r   r   r@   \   s   

  z%TensorDescHandle.materialize_pointersN)	r"   r#   r$   r   r   intr   rG   r@   r   r   r   r   rA   I   s    
	rA   T)frozenc                   @   s   e Zd ZU dZeed< dZeed< dZeed< dZ	e
ed< dZee
 ed	< d
Zee
 ed< dZe
ed< dZee
 ed< dZeed< dZe
ed< dS )InterpreterOptionsNextern_libsFdebugTsanitize_overflowarch)fp8e5Zfp8e5b16fp8e4nvZfp8e4b8fp8e4b15supported_fp8_dtypesr   deprecated_fp8_dtypestf32default_dot_input_precision)rU   Ztf32x3Zieeeallowed_dot_input_precisionsr   max_num_imprecise_acc_defaultr	   backend_name)r"   r#   r$   rL   dict__annotations__rM   r   rN   rO   strrS   r   rT   rV   rW   rX   rI   rY   r   r   r   r   rK   n   s   
 rK   c                 C   sD   | t jkrt jS | t jkrt jS | t jkrt jS | t jkr t jS | S r   )	r/   uint8int8uint16int16uint32int32r7   int64r-   r   r   r   _get_signed_np_dtype|   s   



rd   c                 C   st  t | tjrttjS i tjtttjttjtj	ttj	tj
ttj
tjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttji}t | tjrt | jtjrttjS || j S ||  S r   )
isinstancetlpointer_typer/   r   r7   int1r   float16float32float64r^   r]   r`   r_   rb   ra   rc   bfloat16float8e5float8e5b16
float8e4nv
float8e4b8float8e4b15
block_typer   )Ztt_dtypeZnp_typesr   r   r   _get_np_dtype   sX   	

rs   c                 C   s  t td|j }t td|j }tj|  |d}||jd ? d@ }|j|j d }|j|j d }	|d|j> d @ }
|j}|j}||j? d|> d @ tj}|dk}t	|rtj
|tjd}t|jD ]}|
|? d@ }|j| ||dk< qh|
dk}d||  ||< || |||@ < |
| || > d|j> d @ |
|< tdt|| | d|	> d }||}||}|j|jkr|
|j|j ? d|j> d @ }|tjjkr|
d|j|j d > @ }||dk }||}n|
||j|j > d|j> d @ }|dk}t	|rH||j? d|> d @ tj}|dk}||@ }tj
|tjd}d| || |  ||< || || ? d|j||  > B ||< ||jd > ||j> B |B }|| jS )Nuintr-   r   r   )getattrr/   r.   Z
frombuffertobytesZfp_mantissa_widthZexponent_biasr6   rb   any
zeros_liker2   maximumminimum_irZROUNDING_MODEZRTNEr5   r'   )inputZinput_dtypeZoutput_dtyperounding_modeZinput_uint_dtypeZoutput_unint_dtypeZ	input_binsignZinput_exponent_widthZoutput_exponent_widthZsignificandZ
bias_inputZbias_outputexponentZsubnormal_indexZbit_posiZ	bit_indexZzero_significand_indexZexponent_outputZsign_outputZsignificand_outputcut_offZnon_zero_exponent_indexshiftoutputr   r   r   _convert_float   sl   
$


r   c                 C   s
   t | S r   )matherfxr   r   r   _erf   s   
r   c                 C   s   t | t | d? S )N@   )rI   )abr   r   r   
_umulhi_64   s   r   )Zotypesc                   @   s   e Zd Zedd ZdS )ExtraFunctionsc                 C   s   t || j|||S r   )rf   tensorcreate_fp_to_fphandle)r|   Zdst_tyZfp_downcast_rounding_builderr   r   r   _convert_custom_types   s   z$ExtraFunctions._convert_custom_typesN)r"   r#   r$   staticmethodr   r   r   r   r   r      s    r   c                   @   s  e Zd Zejjejjejjejjejjejjejj	ejj	iZ
ejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejji
ZdddZdd Zdd Zd	d
 Zdd Zdd Zdd Zdd Zdd Z dd Z!dd Z"dd Z#dd Z$dd Z%dd  Z&d!d" Z'd#d$ Z(d%d& Z)d'd( Z*d)d* Z+d+d, Z,d-d. Z-d/d0 Z.d1d2 Z/d3d4 Z0d5d6 Z1d7d8 Z2d9d: Z3d;d< Z4d=d> Z5d?d@ Z6dAdB Z7dCdD Z8dEdF Z9dGdH Z:dIdJ Z;dKdL Z<dMdN Z=dOdP Z>dQdR Z?dSdT Z@dUdV ZAdWdX ZBdYdX ZCdZdX ZDd[dX ZEd\dX ZFd]dX ZGd^dX ZHd_d` ZIdadb ZJdcdd ZKdedX ZLdfdX ZMdgdX ZNdhdX ZOdidX ZPdjdX ZQdkdX ZRdldX ZSdmdX ZTdndX ZUdodX ZVdpdX ZWdqdX ZXdrdX ZYdsdX ZZdtdX Z[dudX Z\dvdX Z]dwdX Z^dxdX Z_dydX Z`dzdX Zad{dX Zbd|dX Zcd}dX Zdd~dX ZeddX ZfddX ZgddX ZhddX ZiddX ZjddX ZkddX ZlddX ZmddX ZnddX ZoddX ZpddX ZqddX ZrddX ZsddX ZtddX ZuddX ZvddX ZwddX ZxddX ZyddX ZzddX Z{eJZ|eJZ}dd Z~dd Zdd Zdd ZddX ZddX Zdd Zdd Zdd ZddX ZddX ZddX ZddX ZddX ZddX ZddX ZddX ZddX ZddX ZddX Zdd Zdd ZddX Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZddĄ ZddƄ ZddȄ Zddʄ Zdd̄ Zdd΄ ZddЄ Zdd҄ ZddԄ Zddք Zdd؄ Zddڄ Zdd܄ Zddބ Zdd Zdd Zdd Zdedee dee dee fddZdedee fddZdededee fddZdededefddZdedededefddZdd ZdS )InterpreterBuilderreturnNc                 C   s2   d | _ t | _i | _tj| jd< dd | jd< d S )NZconvert_custom_typesc                 S   s   dS )N)r   r   r   r   )ZlhsTypeZrhsTyper   r   r   <lambda>  s    z-InterpreterBuilder.__init__.<locals>.<lambda>Zmin_dot_size)rO   rK   optionsZcodegen_fnsr   r   r   r   r   r   r     s
   zInterpreterBuilder.__init__c                 C   sR   || j d k std|| j d k std|| j d k s!td|||f| _d S )Nr   zx >= grid_dim[0]r   zy >= grid_dim[1]r   zz >= grid_dim[2])grid_dim
ValueErrorgrid_idxr   r   yzr   r   r   set_grid_idx  s   zInterpreterBuilder.set_grid_idxc                 C   s   |||f| _ d S r   )r   )r   nxnyZnzr   r   r   set_grid_dim$     zInterpreterBuilder.set_grid_dimc                 C      t jS r   )rf   ri   r   r   r   r   get_half_ty)     zInterpreterBuilder.get_half_tyc                 C   r   r   )rf   rl   r   r   r   r   get_bf16_ty,  r   zInterpreterBuilder.get_bf16_tyc                 C   r   r   )rf   rj   r   r   r   r   get_float_ty/  r   zInterpreterBuilder.get_float_tyc                 C   r   r   )rf   rk   r   r   r   r   get_double_ty2  r   z InterpreterBuilder.get_double_tyc                 C   r   r   )rf   r^   r   r   r   r   get_int8_ty5  r   zInterpreterBuilder.get_int8_tyc                 C   r   r   )rf   r]   r   r   r   r   get_uint8_ty8  r   zInterpreterBuilder.get_uint8_tyc                 C   r   r   )rf   r`   r   r   r   r   get_int16_ty;  r   zInterpreterBuilder.get_int16_tyc                 C   r   r   )rf   r_   r   r   r   r   get_uint16_ty>  r   z InterpreterBuilder.get_uint16_tyc                 C   r   r   )rf   rb   r   r   r   r   get_int32_tyA  r   zInterpreterBuilder.get_int32_tyc                 C   r   r   )rf   ra   r   r   r   r   get_uint32_tyD  r   z InterpreterBuilder.get_uint32_tyc                 C   r   r   )rf   rc   r   r   r   r   get_int64_tyG  r   zInterpreterBuilder.get_int64_tyc                 C   r   r   )rf   r7   r   r   r   r   get_uint64_tyJ  r   z InterpreterBuilder.get_uint64_tyc                 C   r   r   )rf   ro   r   r   r   r   get_fp8e4nv_tyM  r   z!InterpreterBuilder.get_fp8e4nv_tyc                 C   r   r   )rf   rq   r   r   r   r   get_fp8e4b15_tyP  r   z"InterpreterBuilder.get_fp8e4b15_tyc                 C   r   r   )rf   rp   r   r   r   r   get_fp8e4b8_tyS  r   z!InterpreterBuilder.get_fp8e4b8_tyc                 C   r   r   )rf   rm   r   r   r   r   get_fp8e5_tyV  r   zInterpreterBuilder.get_fp8e5_tyc                 C   r   r   )rf   rn   r   r   r   r   get_fp8e5b16_tyY  r   z"InterpreterBuilder.get_fp8e5b16_tyc                 C      t ||S r   )rf   rg   )r   Zelt_tyZ
addr_spacer   r   r   
get_ptr_ty\     zInterpreterBuilder.get_ptr_tyc                 C   r   r   )rf   rr   )r   r   r'   r   r   r   get_block_ty_  r   zInterpreterBuilder.get_block_tyc                 C   s   t tj|gtjdtjS Nr-   )r   r/   arrayZbool_rf   rh   r   r    r   r   r   get_int1b     zInterpreterBuilder.get_int1c                 C      t tj|gtjdtjS r   )r   r/   r   r]   rf   r   r   r   r   	get_uint8e  r   zInterpreterBuilder.get_uint8c                 C   r   r   )r   r/   r   r^   rf   r   r   r   r   get_int8h  r   zInterpreterBuilder.get_int8c                 C   r   r   )r   r/   r   r_   rf   r   r   r   r   
get_uint16k  r   zInterpreterBuilder.get_uint16c                 C   r   r   )r   r/   r   r`   rf   r   r   r   r   	get_int16n  r   zInterpreterBuilder.get_int16c                 C   r   r   )r   r/   r   ra   rf   r   r   r   r   
get_uint32q  r   zInterpreterBuilder.get_uint32c                 C   r   r   )r   r/   r   rb   rf   r   r   r   r   	get_int32t  r   zInterpreterBuilder.get_int32c                 C   r   r   )r   r/   r   r7   rf   r   r   r   r   
get_uint64w  r   zInterpreterBuilder.get_uint64c                 C   r   r   )r   r/   r   rc   rf   r   r   r   r   	get_int64z  r   zInterpreterBuilder.get_int64c                 C   r   r   )r   r/   r   ri   rf   r   r   r   r   get_fp16}  r   zInterpreterBuilder.get_fp16c                 C   r   r   )r   r/   r   rj   rf   r   r   r   r   get_fp32  r   zInterpreterBuilder.get_fp32c                 C   r   r   )r   r/   r   rk   rf   r   r   r   r   get_fp64  r   zInterpreterBuilder.get_fp64c                 C   s   t tjdgt|d|S Nr   r-   )r   r/   r   rs   )r   typer   r   r   get_null_value  r   z!InterpreterBuilder.get_null_valuec                 C   s2   | j d u r	tdttj| j | gtjdtjS )Nzgrid_idx is Noner-   )r   r   r   r/   r   rb   rf   r   axisr   r   r   create_get_program_id  s   
 z(InterpreterBuilder.create_get_program_idc                 C   s    t tj| j| gtjdtjS r   )r   r/   r   r   rb   rf   r   r   r   r   create_get_num_programs  s    z*InterpreterBuilder.create_get_num_programsc                 C   s0   t tj|jtdtj}d }| ||||||S r   )r   r/   	ones_liker   r   rf   rh   create_masked_load)r   ptr_0_1is_volatilemaskotherr   r   r   create_load  s   zInterpreterBuilder.create_loadc                 C   s*   t tj|jtdtj}| |||d d S r   )r   r/   r   r   r   rf   rh   create_masked_store)r   r   valr   r   r   r   r   r   create_store  s   zInterpreterBuilder.create_storec           
      C   sN   |  }t|}|d u rttj|j|d|}t|j|j|j|}	t|	|S r   )r   rs   r   r/   rx   r   _interpreterload)
r   r;   r   r   cache_modifiereviction_policyr   r:   dtype_npretr   r   r   r     s   
z%InterpreterBuilder.create_masked_loadc                 C   s   t |j|j|jS r   )r   storer   )r   r;   r    r   r   r   r   r   r   r        z&InterpreterBuilder.create_masked_storec                 C   st   |j j}|j}|tjkr|tjks|tjkr.|tjkr.t|j||d t|}t	||jS t	|j
t||jS r   )r   r8   rf   rl   rj   r   r   viewrs   r   r6   )r   srcdst_typesrc_element_typedst_element_typer   r   r   r   	cast_impl  s   zInterpreterBuilder.cast_implc                 C      |  ||S r   r   r   r   r   r   r   r   r         zInterpreterBuilder.<lambda>c                 C   r   r   r   r   r   r   r   r     r   c                 C   r   r   r   r   r   r   r   r     r   c                 C   r   r   r   r   r   r   r   r     r   c                 C   r   r   r   r   r   r   r   r     r   c                 C   r   r   r   r   r   r   r   r     r   c                 C   r   r   r   )r   r   r   	is_signedr   r   r   r     r   c                 C   s4   |j j}|j}t|j|||t|}t||jS r   )r   r8   r   r   r   rs   r   )r   r   r   r}   r   r   r   r   r   r   r     s   z"InterpreterBuilder.create_fp_to_fpc                 C   s   t |jt||jS r   )r   r   r   rs   r8   r   r   r   r   create_bitcast     z!InterpreterBuilder.create_bitcastc                 C   s   t ||j|j|jjS r   r   r   r   r8   )r   lhsrhsopr   r   r   	binary_op  r   zInterpreterBuilder.binary_opc                 C      |  ||tjS r   r   r/   addr   r   r   r   r   r   r         c                 C   r   r   r   r/   multiplyr   r   r   r   r     r   c                 C   r   r   r   r/   divider   r   r   r   r     r   c                 C   r   r   r   r/   fmodr   r   r   r   r     r   c                 C   r   r   r   r/   subtractr   r   r   r   r     r   c                 C   r   r   r   r   r   r   r   r     r   c                 C   r   r   r   r   r   r   r   r     r   c                 C   r   r   create_idivr   r   r   r   r     r   c                 C   r   r   r  r   r   r   r   r     r   c                 C   r   r   r  r   r   r   r   r     r   c                 C   r   r   r  r   r   r   r   r     r   c                 C   r   r   r   r   r   r   r   r     r   c                 C   r   r   r  r   r   r   r   r     r   c                 C   r   r   )r   r/   Z
left_shiftr   r   r   r   r     r   c                 C   r   r   )r   r/   right_shiftr   r   r   r   r     r   c                 C   r   r   r   r/   rz   r   r   r   r   r     r   c                 C   r   r   r	  r   r   r   r   r     r   c                 C   r   r   r	  r   r   r   r   r     r   c                 C   r   r   r	  r   r   r   r   r     r   c                 C   r   r   r   r/   ry   r   r   r   r   r     r   c                 C   r   r   r
  r   r   r   r   r     r   c                 C   r   r   r
  r   r   r   r   r     r   c                 C   r   r   r
  r   r   r   r   r     r   c                 C   r   r   r   r/   Z
less_equalr   r   r   r   r     r   c                 C   r   r   r   r/   lessr   r   r   r   r     r   c                 C   r   r   r   r/   Zgreater_equalr   r   r   r   r     r   c                 C   r   r   r   r/   Zgreaterr   r   r   r   r     r   c                 C   r   r   r  r   r   r   r   r     r   c                 C   r   r   r  r   r   r   r   r     r   c                 C   r   r   r  r   r   r   r   r     r   c                 C   r   r   r  r   r   r   r   r     r   c                 C   r   r   r   r/   equalr   r   r   r   r     r   c                 C   r   r   r   r/   	not_equalr   r   r   r   r     r   c                 C   r   r   r  r   r   r   r   r     r   c                 C   r   r   r  r   r   r   r   r     r   c                 C   r   r   r  r   r   r   r   r     r   c                 C   r   r   r  r   r   r   r   r     r   c                 C   r   r   r  r   r   r   r   r     r   c                 C   r   r   r  r   r   r   r   r     r   c                 C   r   r   r  r   r   r   r   r     r   c                 C   r   r   r  r   r   r   r   r     r   c                 C   r   r   r  r   r   r   r   r     r   c                 C   r   r   r  r   r   r   r   r     r   c                 C   r   r   r  r   r   r   r   r     r   c                 C   r   r   r  r   r   r   r   r     r   c                 C   r   r   )r   r/   Zbitwise_andr   r   r   r   r     r   c                 C   r   r   )r   r/   Zbitwise_xorr   r   r   r   r     r   c                 C   r   r   )r   r/   Z
bitwise_orr   r   r   r   r     r   c                 C   s&   t |jt|j|j |j |jjS r   )r   r   r/   r  r   r8   r   r   r   r   r    s   &zInterpreterBuilder.create_idivc                 C   sD   t |jj}t |jj}|j||_|j||_| ||tjS r   )rd   r   r   r6   r   r/   r  )r   r   r   Z	lhs_dtypeZ	rhs_dtyper   r   r   create_ashr  s
   zInterpreterBuilder.create_ashrc                 C   s   |j j}|tjks|tjkrtt|j |j |jjS ttd|j	d d  }|j 
|}|j 
|}t|||j	d ? }t|
||jjS )Nrt   r,   r   )r   r   r/   rc   r7   r   np_umulhi_u64r8   ru   rH   r6   r   )r   r   r   r   Zcompute_dtypeZlhs_dataZrhs_dataZret_datar   r   r   create_umulhi	  s   z InterpreterBuilder.create_umulhic                 C   s   t ||j|j|j|jjS r   r   )r   r   r   r   r   r   r   r   
ternary_op     zInterpreterBuilder.ternary_opc                 C      |  |||tjS r   )r  r/   Zclip)r   arglohiZpropagate_nansr   r   r   r         c                 C   r  r   )r  r/   where)r   condr   r   r   r   r   r     r  c                 C   s   t |j|j |j |jjS r   r   r   r   r   r   
create_fma  r  zInterpreterBuilder.create_fmac                 C   s   t ||j|jjS r   r   )r   r  r   r   r   r   unary_op  r   zInterpreterBuilder.unary_opc                 C   sZ   |j }|jd }ttd|j }|j|}d|> d }||@ t|}t||j jS )Nr   rt   )	r   r.   ru   r/   r   r   rs   r   r8   )r   r  r:   Zmask_bitwidthZnp_uint_dtyper   r   r   r   r   r   create_fabs"  s   
zInterpreterBuilder.create_fabsc                 C      |  |tjS r   )r!  r/   cosr   r  r   r   r   r   ,      c                 C   r#  r   )r!  r/   expr%  r   r   r   r   -  r&  c                 C   r#  r   )r!  r/   Zexp2r%  r   r   r   r   .  r&  c                 C   r#  r   )r!  r/   absr%  r   r   r   r   /  r&  c                 C   r#  r   )r!  r/   floorr%  r   r   r   r   0  r&  c                 C   r#  r   )r!  r/   ceilr%  r   r   r   r   1  r&  c                 C   r#  r   )r!  r/   logr%  r   r   r   r   2  r&  c                 C   r#  r   )r!  r/   log2r%  r   r   r   r   3  r&  c                 C   r#  r   r!  r/   sqrtr%  r   r   r   r   4  r&  c                 C   r#  r   r-  r%  r   r   r   r   5  r&  c                 C   r#  r   )r!  r/   sinr%  r   r   r   r   6  r&  c                 C   s0   |j jtjkrt|j nt|j }t||jjS r   )r   r   r/   rj   np_erf_fp32np_erf_fp64r   r8   )r   r  r   r   r   r   
create_erf8  s   "zInterpreterBuilder.create_erfc                 C   s   t dt|j |jjS Nr   )r   r/   r.  r   r   r8   r%  r   r   r   create_rsqrt<  r   zInterpreterBuilder.create_rsqrtc                 C   s   t |j||jjS r   )r   r   r5   r   r8   )r   r  r'   Zallow_reorderr   r   r   r   @  s    c                 C      t t|j||jjS r   )r   r/   	transposer   r   r8   )r   r  permr   r   r   create_transB  r   zInterpreterBuilder.create_transc                 C   s   |j }|j }|jjdkr|j s|jjdkr6|j r6t||jtjd tj}t||jtjd tj}t	tj
|||j jd|j  |jjS )Nr,   r-   )r   r   r.   Zis_floatingr   rf   ri   r   r/   r   matmulr8   )r   r   r   dZinput_precisionZmax_num_imprecise_accZa_dataZb_datar   r   r   
create_dotE  s   $zInterpreterBuilder.create_dotc                 C   s   t tj||tjdtjS r   )r   r/   r4   rb   rf   )r   startstopr   r   r   create_make_rangeN  r   z$InterpreterBuilder.create_make_rangec                 C   s"   t tj|j|d|fdd tjS )Nr   )binsr2   )r   r/   Z	histogramr   rf   rb   )r   r   r?  r   r   r   create_histogramQ     "z#InterpreterBuilder.create_histogramc                 C   s   t tj|j|j|d|jjS )Nr   )r   r/   Ztake_along_axisr   r   r8   )r   r   indicesr   r   r   r   create_gatherT  s   z InterpreterBuilder.create_gatherc                 C   s<   |  }|j}td|d }t|j||jtj  |jS )Nr   r,   )	r   r.   maxr   r   r6   r/   r7   r   )r   r   offsetr:   Zelement_bitwidthZelement_bytewidthr   r   r   create_addptrY  s    z InterpreterBuilder.create_addptrc                 C   s   | |\}}| }	t|	}
|d u rd }n.|tjjkr(ttj|j	|
d|	}n|tjj
kr=ttj|j	td|
d|	}ntd| | ||||||S )Nr-   nanzunsupported padding option )r@   r   rs   r{   ZPADDING_OPTIONZPAD_ZEROr   r/   rx   r   ZPAD_NANZ	full_likefloatr   r   )r   r   r9   Zpadding_optionr   r   r   r;   r<   r:   r   r   r   r   r   create_tensor_pointer_load`  s   z-InterpreterBuilder.create_tensor_pointer_loadc                 C   s    | |\}}| |||||S r   r@   r   )r   r   r    r9   r   r   r;   r<   r   r   r   create_tensor_pointer_storeo     z.InterpreterBuilder.create_tensor_pointer_storec                 C   r5  r   )r   r/   expand_dimsr   r   r8   )r   r  r   r   r   r   create_expand_dimss  r   z%InterpreterBuilder.create_expand_dimsc                 C   r5  r   )r   r/   r0   r   r   r8   r   r  r'   r   r   r   create_broadcastv  r   z#InterpreterBuilder.create_broadcastc                 C   s   t t|j|jg|jjS r   )r   r/   Zconcatenater   r   r8   r   r   r   r   
create_caty  r  zInterpreterBuilder.create_catc                 C   s    t tj|j|jgdd|jjS )NrD   rB  )r   r/   stackr   r   r8   r   r   r   r   create_join|  s    zInterpreterBuilder.create_joinc                 C   s(   t |jd |jjt |jd |jjfS )N).r   ).r   r   )r   r   r   r   r   create_split  s   (zInterpreterBuilder.create_splitc                 C   sV   t |jtjrttj||jd t|jd|jj	S ttj||jt|jd|jj	S r   )
re   r   rf   rr   r   r/   fullr   rs   r8   rP  r   r   r   create_splat  s   &"zInterpreterBuilder.create_splatc                 C   sB   || j vrtd| | j | }tt|j|j|j||jjS )Nunsupported semantic )ir_sem_to_interpreter_semr   r   r   Z
atomic_casr   r   r8   )r   r   cmpr   semscoper   r   r   create_atomic_cas  s   

 z$InterpreterBuilder.create_atomic_casc                 C   sf   || j vrtd| || jvrtd| | j | }| j| }tt||j|j|j||jjS )Nzunsupported rmwOp rX  )	ir_rmw_op_to_interpreter_rmw_opr   rY  r   r   Z
atomic_rmwr   r   r8   )r   ZrmwOpr   r   r   r[  r\  r   r   r   create_atomic_rmw  s   



"z$InterpreterBuilder.create_atomic_rmwc                 C      t d)Nz4extern_elementwise not supported in interpreter modeNotImplementedError)r   ZlibNameZlibPathsymbolZargListZretTypeisPurer   r   r   create_extern_elementwise     z,InterpreterBuilder.create_extern_elementwisec                 C   r`  )Nz,inline_asm not supported in interpreter modera  )r   Z	inlineAsmconstraintsvaluesr   rd  packr   r   r   create_inline_asm  rf  z$InterpreterBuilder.create_inline_asmc                 C   s   d| j d  d| j d  d| j d  d}|r|d| 7 }|r*tjdd	d
 id |D ]}t|d|j   q,|rCtjd d d S d S )N(r   z, r   r   ) r   c                 S   s   d| dS )N0x02xr   r   r   r   r   r     r   z1InterpreterBuilder.create_print.<locals>.<lambda>)	formatter)r   r/   Zset_printoptionsprintr   )r   prefixhexrh  ZisSignedmsgr    r   r   r   create_print  s   *zInterpreterBuilder.create_printc                 C   s   |sJ | d S r   r   )r   	conditionmessager   r   r   create_assert  s   z InterpreterBuilder.create_assertc                 C   s   |sJ dd S )NzAssume failedr   )r   rv  r   r   r   create_assume  r   z InterpreterBuilder.create_assumec                 C   s   d S r   r   r   r   r   r   create_barrier  s   z!InterpreterBuilder.create_barrierc                 C   s    dd |D }t ||||||S )Nc                 S      g | ]}|  qS r   r   .0rF  r   r   r   
<listcomp>      z<InterpreterBuilder.create_make_block_ptr.<locals>.<listcomp>)r%   )r   r&   r'   r(   r)   r*   r+   new_offsetsr   r   r   create_make_block_ptr  s   z(InterpreterBuilder.create_make_block_ptrc                 C   sv   t |jt |krtddd |jD }t|j|j|j||j|j}t	t |D ]}|j|  j
|| j
7  _
q)|S )Nz len(ptr.offsets) != len(offsets)c                 S   r{  r   r|  r}  r   r   r   r    r  z5InterpreterBuilder.create_advance.<locals>.<listcomp>)r3   r)   r   r%   r&   r'   r(   r*   r+   r2   r   )r   r   r)   r  r   r   r   r   r   create_advance  s   z!InterpreterBuilder.create_advancer&   r'   r(   tensor_shapec                 C   s   t ||||}|  |S r   )rA   rG   )r   r&   r'   r(   r  descr   r   r   create_make_tensor_descriptor  s   z0InterpreterBuilder.create_make_tensor_descriptorr  rC  c                 C   s2   t |tsJ ||\}}| j||d ||ddS )NF)r   r   r   r   )re   rA   r@   r   )r   r  rC  r   r   r;   r   r   r   r   create_descriptor_load  s
   z)InterpreterBuilder.create_descriptor_loadr    c                 C   s    | |\}}| |||d d S r   rK  )r   r  r    rC  r;   r   r   r   r   create_descriptor_store  rM  z*InterpreterBuilder.create_descriptor_store	x_offsetsy_offsetc                 C   s   |j jj}t|}tj|jjd |jd g|d}d }d }	t	|jD ]\}
}t
|tj|g}| ||||	j||
d d f< q"t
||S )Nr   rD   r-   )r&   r   r   rs   r/   zerosr   r'   r*   	enumerater   rf   rb   r  )r   r  r  r  r   r   np_dtyperesultr   r   r   x_offsetrC  r   r   r   create_descriptor_gather  s   
  
z+InterpreterBuilder.create_descriptor_gatherc           	      C   sH   t |jD ]\}}t|j| |j}t|tj|g}| ||| qd S r   )r  r   r   r   rf   rb   r  )	r   r  r    r  r  r   r  slicerC  r   r   r   create_descriptor_scatter  s
   z,InterpreterBuilder.create_descriptor_scatterc                 C   s8   t |}d|jv rttjdd|d|jS td| )NrI   r   rD   r-   zunsupported type )rs   namer   r/   rV  r8   	TypeError)r   r   Znp_typer   r   r   get_all_ones_value  s   
z%InterpreterBuilder.get_all_ones_valuer   N)r"   r#   r$   r{   ZMEM_SEMANTICZACQUIREr   ZRELEASEZRELAXEDZACQUIRE_RELEASErY  Z	ATOMIC_OPZADDZRMW_OPZFADDZMINZUMINMAXZUMAXANDORZXORZXCHGr^  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Zcreate_si_to_fpZcreate_ui_to_fpZcreate_fp_to_siZcreate_fp_to_uiZcreate_fp_extZcreate_fp_truncZcreate_int_castr   r   r   Zcreate_faddZcreate_fmulZcreate_fdivZcreate_fremZcreate_fsubZ
create_mulZcreate_precise_divfZcreate_sdivZcreate_udivZcreate_sremZcreate_uremZ
create_addZ
create_subZ
create_shlZcreate_lshrZcreate_minsiZcreate_minuiZcreate_minimumfZcreate_minnumfZcreate_maxsiZcreate_maxuiZcreate_maximumfZcreate_maxnumfZcreate_icmpSLEZcreate_icmpSLTZcreate_icmpSGEZcreate_icmpSGTZcreate_icmpULEZcreate_icmpULTZcreate_icmpUGEZcreate_icmpUGTZcreate_icmpEQZcreate_icmpNEZcreate_fcmpOLTZcreate_fcmpOGTZcreate_fcmpOLEZcreate_fcmpOGEZcreate_fcmpOEQZcreate_fcmpONEZcreate_fcmpULTZcreate_fcmpUGTZcreate_fcmpULEZcreate_fcmpUGEZcreate_fcmpUEQZcreate_fcmpUNEZ
create_andZ
create_xorZ	create_orZcreate_int_to_ptrZcreate_ptr_to_intr  r  r  r  Zcreate_clampfZcreate_selectr   r!  r"  Z
create_cosZ
create_expZcreate_exp2Zcreate_iabsZcreate_floorZcreate_ceilZ
create_logZcreate_log2Zcreate_precise_sqrtZcreate_sqrtZ
create_sinr2  r4  Zcreate_reshaper8  r;  r>  r@  rD  rG  rJ  rL  rO  rQ  rR  rT  rU  rW  r]  r_  re  rj  ru  rx  ry  rz  r  r  r   r   rI   r  rA   r  r  r  r  r  r   r   r   r   r      sl   
	

		


r   c                    s"   |d fdd
}t | || d S )N)memberc                    s$   | |i dd |  D d iS )Nc                 S   s   i | ]\}}|d kr||qS )r   r   r~  kvr   r   r   
<dictcomp>  s
    z1_patch_attr.<locals>.<lambda>.<locals>.<dictcomp>r   items)r  argskwargsbuilderr   r   r     s    z_patch_attr.<locals>.<lambda>)setattr)objr  r  r  
new_memberr   r  r   _patch_attr  s   r  c                 C   s2   t | D ]\}}tj|rt| ||| qd S r   )inspect
getmembersrf   core
is_builtinr  )pkgr  r  r  r   r   r   _patch_builtin   s
   r  c                    sJ   dd  dd }dd | _  fdd| _dd | _d	d | _t|| _d S )
Nc                 S   s   | j j}|jdkrt|S dS )Nr   T)r   r   sizer   )r   r   r   r   r   	_get_bool  s   z%_patch_lang_tensor.<locals>._get_boolc                 S   sj   t t| jj| jj}| j sJ t| jj	}|d |d |d< |d< t
j| j|}t
j||S )NrD   )r   r/   r6  r   r   r   r   Zis_blocklistr'   rf   r  rr   r   )r   r   r*   Zres_tyr   r   r   _get_transpose  s   z*_patch_lang_tensor.<locals>._get_transposec                 S      t | jjS r   )rI   r   r   r   r   r   r   r     r   z$_patch_lang_tensor.<locals>.<lambda>c                    s    | S r   r   r   r  r   r   r     s    c                 S   r  r   )reprr   r   r   r   r   r   r     r   c                 S   r  r   )r\   r   r   r   r   r   r   r     r   )	__index__r   __repr____str__propertyT)r   r  r   r  r   _patch_lang_tensor  s   


r  c                   @   s<   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd ZdS )ReduceScanOpInterfacec                 C   s   || _ || _d S r   )r   
combine_fn)r   r   r  r   r   r   r     s   
zReduceScanOpInterface.__init__c                 C   s0   |d ur|t |krtd| d| d S d S )Nzaxis z out of bounds for shape )r3   r   )r   r'   r   r   r   r   
check_axis#  s   z ReduceScanOpInterface.check_axisc                 C   s>   |D ]}t |tjjstdt| | |j| j qd S )Nzinput must be a tensor, got )	re   rf   r  r   r   r   r  r'   r   )r   r|   r  r   r   r   check_tensor'  s
   z"ReduceScanOpInterface.check_tensorc                 C   s`   t |}t|dr|jr||}t|t|j}n
tj|g|d}|}tj	
t||j|S )Nr'   r-   )rs   r   r'   r6   rf   rr   r  r/   r   r  r   r   r8   )r   r   r   r  Zret_typer   r   r   	to_tensor-  s   
zReduceScanOpInterface.to_tensorc                 C   s$   t |ts|f}| | | |S r   )re   tupler  
apply_implr   r|   r   r   r   apply7  s   


zReduceScanOpInterface.applyc                 C   r`  )Nzapply_impl not implementedra  r  r   r   r   r  =  rf  z ReduceScanOpInterface.apply_implN)	r"   r#   r$   r   r  r  r  r  r  r   r   r   r   r    s    
r  c                       sF   e Zd Z fddZdd Zdd Zddd	Zd
d Zdd Z  Z	S )	ReduceOpsc                       t  || || _d S r   )superr   	keep_dims)r   r   r  r  	__class__r   r   r   C     
zReduceOps.__init__c                 C   sN   g }|D ]}|d ur| | qd}| | |jj |j qt||fS )Nr   )appendr  r   r   flattenr   r  )r   r|   r   r   r   r   r   r   unravelG  s   zReduceOps.unravelc                    s2  j } j \ }g }g } d jjj}|d| ||d d   } D ]}||jj |tj||jjjd q't	|d j
D ]}	t|	|d| |d d   t fddt|D }
| dkrt	t|D ]}|
| jj || < qvqEt fddt|D }jjg ||
R  }t|ts|fn|}t	t|D ]}t|| tjjr|| jj n|| || < qqEg }t|D ]6\}	}jr|d urt||}nt	t|D ]}t|d}qn|d u r| }|| |	 j qt|dkr|d S t|S )Nr   r   r-   c                 3   *    | ]\}} |  | jV  qd S r   r  r   r~  iir:  )r|   input_indexr   r   r   	<genexpr>`     ( z+ReduceOps.generic_reduce.<locals>.<genexpr>c                 3   r  r   r  r~  Zoio)r|   output_indexr   r   r   r  f  r  )r   r  r   r   r'   r  r/   r  r   r2   r  unravel_indexr  r  r3   rE   r  fnre   rf   r  r   r  rN  r  )r   r|   Zoriginal_axisr   
input_dataoutput_dataZinput_shapeZoutput_shaper  r   Zinput_tuplej	acc_tuplecombine_fn_retr   r   _r   )r|   r  r  r   r   generic_reduceQ  sN   zReduceOps.generic_reduceNc                 C   s   t |tr	|d n|}d }d }|r!| ||jj| j| jd|j}|r3| ||jj| j| jdtj	}|d ur?|d ur?||fS |d urE|S |d urK|S t
d)Nr   r   Zkeepdimsz-val_reduce_op and idx_reduce_op are both None)re   r  r  r   r   r   r  r   rf   rb   r   )r   r|   val_reduce_opidx_reduce_opr   idxr   r   r   min_max|  s     zReduceOps.min_maxc                 C   s"   |  tj|jj| j| jd|jS )Nr  )r  r/   sumr   r   r   r  r   r  r   r   r   r    rA  zReduceOps.sumc                 C   s   | j tjjkr| j|d tjtjdS | j tjjkr&| j|d tj	tj
dS | j tjjkr8| j|d tj	d dS | j tjjkrJ| j|d tjd dS | j tjjkrX| |d S | |S )Nr   )r  r  )r  rf   standardZ_argmin_combine_tie_break_leftr  r/   minZargminZ_argmax_combine_tie_break_leftrE  ZargmaxZ_elementwise_maxZ_elementwise_min_sum_combiner  r  r  r   r   r   r    s   
zReduceOps.apply_implr   )
r"   r#   r$   r   r  r  r  r  r  __classcell__r   r   r  r   r  A  s    

+r  c                       s<   e Zd Z fddZdd Zdd Zdd Zd	d
 Z  ZS )ScanOpsc                    r  r   )r  r   reverse)r   r   r  r  r  r   r   r     r  zScanOps.__init__c                 C   "   | j tj|jj| jd|jdgS NrB  r-   )r  r/   cumsumr   r   r   r   r  r   r   r   r    rA  zScanOps.cumsumc                 C   r  r  )r  r/   cumprodr   r   r   r   r  r   r   r   r    rA  zScanOps.cumprodc                    s  g }g }d j jj}D ]}||j j |tj||j jjd qt|d jD ]}t	|| t
 fddt|D } j dkr_tt|D ]}|| j j ||  < qOq+t
 fddtt D t
fddt|D }	jjg |	|R  }
t|
t
s|
fn|
}	tt|D ]}t|	| tjjr|	| j j n|	| ||  < qq+g }t|D ]\}}||| j q|S )Nr   r-   c                 3   s*    | ]\}} |  | jV  qd S r   r  r  )indexr|   r   r   r   r    r  z'ScanOps.generic_scan.<locals>.<genexpr>c                 3   s.    | ]}|j kr | d  n | V  qdS )r   NrB  )r~  r   )r  r   r   r   r    s   , c                 3   r  r   r  r  )r|   
prev_indexr   r   r   r    r  )r   r   r'   r  r/   r  r   r2   r  r  r  r  r   r3   rE   r  r  re   rf   r  r   r  )r   r|   r  r  r'   r  r   r   r  r  r  r   r   )r  r|   r  r   r   generic_scan  s8    zScanOps.generic_scanc              	   C   s   g }| j r|D ]}|| tj|jj| jd|j qn|}| j	t
jjkr.| |d }n| j	t
jjkr=| |d }n| |}| j rV|D ]}tj|jj| jd|j_qGt|dkr`|d pct|S )NrB  r   r   )r  r  r  r/   flipr   r   r   r   r  rf   r  r  r  Z_prod_combiner  r  r3   r  )r   r|   Z	new_inputr  r   r   r   r   r    s   &
zScanOps.apply_impl)	r"   r#   r$   r   r  r  r  r  r  r   r   r  r   r    s    r  c                  C   s4   ddd} ddd}| t _|t _| t j_|t j_d S )NFc                 [      t |||| S r   )r  r  )r|   r   r  r  r  r   r   r   _new_reduce  r   z'_patch_reduce_scan.<locals>._new_reducec                 [   r  r   )r  r  )r|   r   r  r  r  r   r   r   	_new_scan  r   z%_patch_reduce_scan.<locals>._new_scan)F)rf   reduceZassociative_scanr  )r  r  r   r   r   _patch_reduce_scan  s   

r  c                 C   sx   dd }ddd}ddd}dd	 }|| _ || _|| _t| _|| j_t|d
d| _t|dd| _	t|dd| _
t  d S )Nc                 S   sB  | j dkr	| S | j dkr| S | j dkr| S | j dkr$| S | j dkr-| S | j dkr6| S | j dkr?| S | j dkrH| S | j d	krQ|	 S | j d
krZ|
 S | j dkrc| S | j dkrl| S | j dkru| S | j dkr~| S | j dkr| S | j dkr| S | j dkr| S td|  d)Nvoidrh   r^   r]   r`   r_   rb   ra   rc   r7   rP   rQ   rR   Zfp16Zbf16Zfp32Zfp64zfail to convert z to ir type)r  Zget_void_tyZget_int1_tyr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r  r   r   r   
_new_to_ir  sF   
















z$_patch_lang_core.<locals>._new_to_irc                 [   s6   |d u rd}|d u rd| }}n| |}}t |||S )Nr   r   )r2   )Zarg1Zarg2stepr  r<  endr   r   r   
_new_range  s   
z$_patch_lang_core.<locals>._new_range c                 S   s   | sJ |d S r   r   )r  rt  r   r   r   _new_static_assert"  r   z,_patch_lang_core.<locals>._new_static_assertc                 S   sn   t | tjs| S t |ttfs|gn|}dd |D }t|tdt| jkr.td| | j	
|| | S )Nc                 S   s"   g | ]}t |tjr|jn|qS r   )re   rf   	constexprr    r~  r  r   r   r   r  +  s   " z7_patch_lang_core.<locals>._set_attr.<locals>.<listcomp>r   z$len(values) != len(input.shape) for )re   rf   r   r  r  r3   rE  r'   r   r   r!   )r|   rh  r  r   r   r   	_set_attr%  s   z#_patch_lang_core.<locals>._set_attrztt.divisibilityr  ztt.contiguityztt.constancy)NN)r  )r2   Zstatic_rangeZstatic_assertrq  Zstatic_printr   Zto_irr   Zmultiple_ofZmax_contiguousZmax_constancyr  )langr  r   r  r  r   r   r   _patch_lang_core  s   
(
	
r  c                 C   s   dd | j  D }t|dksJ d|D ] }t|t t|jt |tkr-t|jt t|j t	| qttj
jt d S )Nc                 S   s,   g | ]\}}t |r|ttjfv r|qS r   )r  ismodulerf   r  )r~  r  r    r   r   r   r  >  s   , z_patch_lang.<locals>.<listcomp>r   z:triton.language must be visible from within jit'd function)__globals__r  r3   r  interpreter_builderr   rf   r   r  r  r  Z$_experimental_tensor_descriptor_base)r  Zlangsr  r   r   r   _patch_lang=  s   


r  c                 C   s"   t | drt| | S t| |S )N_fields)r   r   )r  contentsr   r   r   _tuple_createJ  s   "r  c                 C   s8  t | trjttjj| }tj	}d|   krdk r"n ntj	}n7d|   kr,dk r2n ntj
}n'd|   kr<dk rBn ntj}nd|   krLdk rRn ntj}ntd|  ttj| g|d|}t||S t| d	rttjj| }ttj|  gtjd|}t||S t | trt| tt| S | S )
Ni   l        l        l         l            l            zUnsupported integer value r-   data_ptr)re   rI   rf   Z	str_to_tytritonruntimejitZmangle_typer/   rb   ra   rc   r7   r   r   r   r   r   r  r  r  map_implicit_cvt)r  tyr   r   r   r   r   r  T  s*   


r  c                 C   s   t | tjjjr| jS | S r   )re   r  r  r  TensorWrapperr&   )tr   r   r   _unwrap_tensorp  s   r  c                 C   s&   t |tjjjrtjj| |jS | S r   )re   r  r  r  r  r   )r  original_tensorr   r   r   _rewrap_tensorv  s   r  c                   @   s,   e Zd Zdd Zdd Zdd Zdd Zd	S )
GridExecutorc                    sN   ddl m || _|| _|| _fdd|j D   fdd|D | _d S )Nr   _normalize_tyc                    s   i | ]	\}}| |qS r   r   )r~  r  r  r  r   r   r    s    z)GridExecutor.__init__.<locals>.<dictcomp>c                    s   g | ]}  |d kr|qS )r  )get)r~  r  )r[   r   r   r    s    z)GridExecutor.__init__.<locals>.<listcomp>)r  r  r  	arg_namesgridr[   r  
constexprs)r   r  r   r!  r   )r[   r  r   r   ~  s   zGridExecutor.__init__c                    sN   i  fdd  fdd|D }i }|  D ]
\}} |||< q||fS )Nc                    s   t | trt| t | S t| ds| S t| }|  vr,| }| | < |   }|j	ddd}|
|| | |  t|| d}|S )Nr  r   cpu)Zdevice)r  )re   r  r  r  r   r  untyped_storager  r#  Z	new_emptyset_Zstorage_offsetr  rF   r  )r  Zunwrapped_argZstorageZcpu_arg_to_cpustoragesr   r   r'    s   

z,GridExecutor._init_args_hst.<locals>._to_cpuc                    s   g | ]} |qS r   r   )r~  r  )r'  r   r   r    r  z/GridExecutor._init_args_hst.<locals>.<listcomp>r  )r   args_devr  args_hst
kwargs_hstr   r    r   r&  r   _init_args_hst  s   zGridExecutor._init_args_hstc           
         st   i  fdd t ||D ]	\}} || q| D ]\}}|| }	 ||	 q D ]	\}}|| q.d S )Nc                    sl   t | drt| t|} }|  | f|   < d S t| tr2t| |D ]\} } | | q(d S d S )Nr  )r   r  r$  r  re   r  zip)arg_devarg_hst	_from_cpur(  r   r   r1    s   
 
z1GridExecutor._restore_args_dev.<locals>._from_cpu)r-  r  rh  Zcopy_)
r   r)  r*  r  r+  r.  r/  r   Z	kwarg_devZ	kwarg_hstr   r0  r   _restore_args_dev  s   	zGridExecutor._restore_args_devc              
      s\  | ddrd S tj  fdd| D }||\}}tj tjjg|R i |}fdd| D }tj	rH	|nj	}t
|dksUJ d|ddt
|   }tj|  z,t|d	 D ]#}t|d
 D ]}t|d D ]}	t|||	 jdi | q{qsqkW n ty }
 ztt|
|
d }
~
ww |||| d S )NZwarmupFc                    s    i | ]\}}| j v r||qS r   )r  r  )argspecr   r   r    s     z)GridExecutor.__call__.<locals>.<dictcomp>c                    s(   i | ]\}}|| j v r|nt|qS r   )r"  r  )r~  r  r  r   r   r   r    s   (    z#grid must have at most 3 dimensions)r   r   r   r   r   )popr  getfullargspecr  r  r,  r  getcallargscallabler!  r3   r  r   r2   r   	Exceptionr   r  r2  )r   r)  r  r*  r+  r  r!  r   r   r   er   )r3  r   r   __call__  s4   

zGridExecutor.__call__N)r"   r#   r$   r   r,  r2  r;  r   r   r   r   r  |  s
    	r  c                   @   s   e Zd Zdd ZdS )ASTTransformerc                 C   s   g }|j D ]
}|| |g7 }qt|dkrtdtjtjtjtjtjdt ddt ddt ddt d|j	tjd	t dtj
d
dgg d|_	|S )Nr   z&Multiple assignments are not supportedr  )idctxlanguage)r    r   r>  Zsemanticr  r  F)r    )funcr  keywords)targetsvisitr3   r   astCall	AttributeNameLoadr    Constant)r   nodenamestargetr   r   r   visit_Assign  s&   
 
zASTTransformer.visit_AssignN)r"   r#   r$   rM  r   r   r   r   r<    s    r<  c                   @   sJ   e Zd Ze Zdd Zdd Zdd Zdd Zd	d
 Z	dd Z
dd ZdS )FunctionRewriterc                 K   s   || _ || _d| _d| _d S )Nr  r   )r  r  filenamedef_file_lineno)r   r  r  r   r   r   r     s   
zFunctionRewriter.__init__c                 C   sh   z
t | j\}}W n ty   | j Y S w |  \| _| _| || _| 	|}| 
|}| |S r   )r  getsourcelinesr  r9  _get_jit_fn_file_linerO  rP  	_find_def
def_lineno_prepare_source_transform_ast_compile_and_exec)r   linesr  r   transformed_astr   r   r   rewrite_ast  s   
	


zFunctionRewriter.rewrite_astc                 C   s   ddl m}m} ||| jS )Nr   )get_jit_fn_file_lineJITFunction)r  r[  r\  r  )r   r[  r\  r   r   r   rR    s   z&FunctionRewriter._get_jit_fn_file_linec                 C   s0   d}t |D ]\}}| dr|d }q|S )Nr   zdef r   )r  strip
startswith)r   rX  rT  r   liner   r   r   rS    s   zFunctionRewriter._find_defc                 C   s&   || j d d  }d|}t|S )Nr   r  )rT  jointextwrapdedent)r   rX  r   r   r   r   rU    s   

z FunctionRewriter._prepare_sourcec                 C   s:   t |}| j|}t | | jd }t || |S r3  )rD  parseast_transformerrC  fix_missing_locationsrP  increment_lineno)r   r   Z
parsed_astrY  Z
inc_linenor   r   r   rV    s   


zFunctionRewriter._transform_astc                 C   s^   t || jdd}i | j}| jj}t  D ]\}}||vr"|||< qt||| || jj S )Nexec)rO  mode)	compilerO  r  r  r
  globalsr  rg  r"   )r   rY  Zcompiled_codeZlocal_namespaceZ
fn_globalsr   r    r   r   r   rW  )  s   
z"FunctionRewriter._compile_and_execN)r"   r#   r$   r<  rd  r   rZ  rR  rS  rU  rV  rW  r   r   r   r   rN    s    rN  c                   @   s>   e Zd Zi ZdddZdd Zedd Z d	d
 Zdd ZdS )InterpretedFunctionr   Nc                    sN   | _ t|fi | _ fdd}| _t|}dd |j D  _d S )Nc                     s(   |d }   }t| j|| i |S )Nr!  rewriter  r   )r  r  r!  r  r   r   r   run<  s   z)InterpretedFunction.__init__.<locals>.runc                 S   s   g | ]}|j qS r   r  r  r   r   r   r  C  r  z0InterpretedFunction.__init__.<locals>.<listcomp>)	r  rN  rewriterrn  r  	signature
parametersrh  r   )r   r  r  rn  rp  r   r   r   r   8  s   
zInterpretedFunction.__init__c                 C   s*   | j | jvr| j | j| j < | j| j  S r   )r  rewritten_fnro  rZ  r   r   r   r   rm  E  s   zInterpretedFunction.rewritec                 C   s   | j jS r   )r  r"   r   r   r   r   r"   J  s   zInterpretedFunction.__name__c                 C   s   |   }t|| j|S r   rl  )r   r!  r  r   r   r   __getitem__N  s   zInterpretedFunction.__getitem__c              
   O   sJ   t | j |  }z||i |W S  ty$ } ztt||d }~ww r   )r  r  rm  r9  r   r  )r   r  r  r  r:  r   r   r   r;  R  s   
zInterpretedFunction.__call__r  )	r"   r#   r$   rr  r   rm  r  rs  r;  r   r   r   r   rk  4  s    

rk  )=rD  ra  r  typingr   r   r   numpyr/   r  Ztriton.languager?  rf   dataclassesr   errorsr   	functoolsr   Z_C.libtritonr	   r   r
   r{   r   r%   rA   rK   rd   rs   r   r   r   Z	vectorizerj   r0  rk   r1  r7   r  r   r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  NodeTransformerr<  rN  rk  r   r   r   r   <module>   sb    % @   |$`>N
_E