o
    cZh                     @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlZd dlZd dlmZ dZedkreedZejd	d
d ejddedddd ejddeddd ejddeddd ejddeddd ejddedd d ejd!d"ed#dd$ ejd%d&ed'dd$ e ZejrejnejZejrejneeZeejZejd eej ejej eZ!ej"e!Z#e!j$%e# e&e#ejZ'ej()d(Z(e*e(dksJ e+e,d)d* ej-)d(Z-d+e
e fd,d-Z.d.ej/ d/ej0 Z1e.e-e1g Z2d0d1 Z3d2d3 e4e-D Z5d4d3 e56 D Z5d5d3 e4e-D Z7d6d3 e76 D Z7e56 D ]\Z8Z9e9dkr=e9e7e'j:e8d   < q+d7d3 e4e-D Z-e7D ]Z8d1e-e8< qJd8;d9d: e7< D Z=d;d: e76 D Z>e>d<ej/ d=ej0 g7 Z>e5< D ]Z?e?d>v sJ d?e? qyd@d3 e56 D Z@ejAjBe'e7e-e@dAZCej/ej0dBZDejEeCeDdCZFeFjGjHd kreIdDg Z:g ZJg ZKg ZLe4e'j:D ]8\ZMZNeNe7vre:OeN eJOe-eN  eKOeN eLOe-eN  qe5PeMfddkre:OeN eJOdE qdZQe4e-< D ]'\ZMZReQeeM7 ZQe5PeMfddkreQdF7 ZQe5PeMfddGkr)eQdH7 ZQqdI;ee2eQgZSeFjTdJ ZTee UeTdKdL ZVi dMeSdNejdOe*eTdPdQ;dRd: eWeVdddK eVdddK D d+dQ;dSd: eWeKeLD dTdQ;dUd: eWe:eJD dVdQ;dWd: eKD dXg dYe*eKd dZe>d[eFjGjXd\ej/d]dI;e=e1gd^e(d  d_e(d d`e(dK dadZYdbD ]DZZee[jdc dd deeZ  Z\e]dfe2 dIeQ dfeZ ^dgZ_e_`ee\a jbdhi eY W d   n	1 sw   Y  qdS dS )i    N)ArgumentParser)Path)List	ty_to_cppa  
Triton ahead-of-time compiler:

This program compiles the kernel with name `kernel-name` in the file at the
provided `path` into self-contained C source-code that embeds the `cubin`
data along with utilities to load, unload and launch the kernel.

signature is provided as a list of (optionally divisibility-hinted) types
or constexpr values, e.g.

`compile.py --kernel-name kernel --signature "*fp32:16, i32:16, 1024, i32" --out-name kernel /path/to/kernel.py`

will compile triton.JITFunction of name `kernel` inside the file `/path/to/kernel.py`.
Said kernel will be specialized such that argument 0, 1 are assumed to be multiple of 16,
and argument 2 is assumed to be a compile-time constant of value 1024, i.e. it won't be part of the generated prototype.

The resulting entry point will have signature

CUresult kernel_{specialization_suffix}(CUstream stream, unsigned gX, unsigned gY, unsigned gZ, float* arg0, int32_t arg1, int32_t arg2)

Different such specialized entry points can be combined using the `linker.py` script.

NOTE: when resolving the scope of /path/to/kernel.py, the file will be executed from within its parent directory with the python interpreter
used to run this `compile.py` script
__main__)descriptionpathzTPath to Python source containing desired kernel in its scope. File will be executed.)helpz--kernel-namez-n zName of the kernel to compileT)typedefaultr
   requiredz--num-warpsz-w   z$Number of warps to launch the kernel)r   r   r
   z--num-stagesz-ns   z/Number of stages (meta-parameter of the kernel)z
--out-namez-onz Out name for the compiled kernelz
--out-pathz-ozOut filenamez--signaturez-szSignature of the kernel)r   r
   r   z--gridz-gzLaunch grid of the kernel,c                 C   s
   |  dS )N )strip)s r   C/var/www/auris/lib/python3.10/site-packages/triton/tools/compile.py<lambda>F   s   
 r   	signaturec                 C   s,   t  }|d|   | d d S )Nr      )hashlibsha256updatejoinencode	hexdigest)r   mr   r   r   hash_signatureH   s   r!   ZwarpsZxstagesc                 C   sF   zt | }|W S  ty   Y nw zt| }|W S  ty"   Y d S w N)int
ValueErrorfloat)r   retr   r   r   	constexprP   s   r'   c                 C   s.   i | ]\}}d |v r|ft |d d qS ):r   )r'   split.0ir   r   r   r   
<dictcomp>]   s   . r-   c                 C      i | ]\}}|d ur||qS r"   r   r+   kvr   r   r   r-   ^       c                 C   s    i | ]\}}t j| t|qS r   )kernel	arg_namesr'   r*   r   r   r   r-   _   s     c                 C   r.   r"   r   r/   r   r   r   r-   `   r2   c                 C   s&   i | ]\}}t j| |d d qS )r(   r   )r3   r4   r)   r*   r   r   r   r-   d   s   & xc                 C   s   g | ]}t |qS r   )str)r+   r1   r   r   r   
<listcomp>g   s    r7   c                 C   s   g | ]\}}| d | qS )=r   r/   r   r   r   r7   h   r2   z
num_warps=znum_stages=)r      z#Only 1 and 16 are valid hints, got c                 C   s$   i | ]\}}|d kr|dd ggqS )r9   ztt.divisibilityr   r/   r   r   r   r-   m   s   $ )fnZ
constexprsr   attrs)	num_warps
num_stages)optionszMAOT compiling kernels with global scratch requirements is not yet implementedZi32cr9   d_Zcubin   kernel_nameZtriton_kernel_nameZbin_sizeZbin_dataz, c                 C   s   g | ]\}}d | | qS )0xr   )r+   r5   yr   r   r   r7      r2   c                 C   "   g | ]\}}t | d | qS r   r   r+   nametyr   r   r   r7         " Zfull_signaturec                 C   rG   rH   r   rI   r   r   r   r7      rL   Zarg_pointersc                 C   s   g | ]}d | qS )&r   )r+   argr   r   r   r7      s    z&global_scratchnum_argsZkernel_docstringsharedr<   Z	algo_infoZgridXZgridYZgridZZ_placeholder)hr?   extracudazcompile..wr   )cbinasciir   importlib.util	importlibsysargparser   pathlibr   typingr   ZtritonZtriton.backendsZtriton.backends.nvidia.driverr   Zdesc__name__parseradd_argumentr6   r#   
parse_argsargsZout_namerD   Zout_pathr	   Zarg_pathinsertparentutilspec_from_file_locationstemspecmodule_from_specmodloaderexec_modulegetattrr3   gridr)   lenlistmapr   r!   r<   r=   Zmeta_sigZsig_hashr'   	enumeratehintsitems	constantskeyvaluer4   r   valuesZ	const_sigZ
doc_stringrQ   r;   compilerZ	ASTSourcesrcoptscompileZccinfometadataZglobal_scratch_sizeRuntimeErrorZ	arg_typesZarg_names_not_1Zarg_types_not_1r,   Zarg_nameappendgetsuffixrK   	func_nameasmhexlifyZhex_ziprP   paramsext__file__Ztemplate_pathwith_suffixopenfpwrite	read_textformatr   r   r   r   <module>   s   









.	
$ x