a
    kº”hÙ  ã                   @   sÔ  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlZd dlZd dlmZ dZedkrÐeedZejd	d
d ejddedddd ejddeddd ejddeddd ejddeddd ejddedd d ejd!d"ed#dd$ ejd%d&ed'dd$ e ¡ Zejr*ejnejZejr>ejneeƒZeejƒZej d eejƒ¡ ej ej e¡Z!ej "e!¡Z#e!j$ %e#¡ e&e#ejƒZ'ej( )d(¡Z(e*e(ƒdks¶J ‚e+e,d)d*„ ej- )d(¡ƒƒZ-e
e d+œd,d-„Z.d.ej/› d/ej0› Z1e.e-e1g ƒZ2d0d1„ Z3d2d3„ e4e-ƒD ƒZ5d4d3„ e5 6¡ D ƒZ5d5d3„ e4e-ƒD ƒZ7d6d3„ e7 6¡ D ƒZ7e5 6¡ D ]&\Z8Z9e9dkr\e9e7e'j:e8d   < q\d7d3„ e4e-ƒD ƒZ-e7D ]Z8d1e-e8< qšd8 ;d9d:„ e7 <¡ D ƒ¡Z=d;d:„ e7 6¡ D ƒZ>e>d<ej/› d=ej0› g7 Z>e5 <¡ D ]Z?e?d>v søJ d?e?› ƒ‚qød@d3„ e5 6¡ D ƒZ@ejAjBe'e7e-e@dAZCej/ej0dBœZDejEeCeDdCZFeFjGjHd krpeIdDƒ‚g Z:g ZJg ZKg ZLe4e'j:ƒD ]n\ZMZNeNe7vrÎe: OeN¡ eJ Oe-eN ¡ eK OeN¡ eL Oe-eN ¡ n(e5 PeMfd¡dkrŠe: OeN¡ eJ OdE¡ qŠdZQe4e- <¡ ƒD ]N\ZMZReQeeMƒ7 ZQe5 PeMfd¡dkr:eQdF7 ZQe5 PeMfd¡dGkr
eQdH7 ZQq
dI ;ee2eQg¡ZSeFjTdJ ZTee  UeT¡ƒdKdL… ZVeSeje*eTƒdM ;dNd:„ eWeVdddK… eVdddK… ƒD ƒ¡dM ;dOd:„ eWeKeLƒD ƒ¡dM ;dPd:„ eWe:eJƒD ƒ¡dM ;dQd:„ eKD ƒdRg ¡e*eKƒd e>eFjGjXej/dI ;e=e1g¡e(d  e(d e(dK ddSœZYdTD ]„ZZee[ƒjdU dV dWeZ›  Z\e ]dXe2› dIeQ› dXeZ› ¡ ^dY¡.Z_e_ `ee\ƒ a¡ jbf i eY¤Ž¡ W d  ƒ n1 sÂ0    Y  qJdS )Zé    N)ÚArgumentParser)ÚPath)ÚList©Ú	ty_to_cppa½  
Triton ahead-of-time compiler:

This program compiles the kernel with name `kernel-name` in the file at the
provided `path` into self-contained C source-code that embeds the `cubin`
data along with utilities to load, unload and launch the kernel.

signature is provided as a list of (optionally divisibility-hinted) types
or constexpr values, e.g.

`compile.py --kernel-name kernel --signature "*fp32:16, i32:16, 1024, i32" --out-name kernel /path/to/kernel.py`

will compile triton.JITFunction of name `kernel` inside the file `/path/to/kernel.py`.
Said kernel will be specialized such that argument 0, 1 are assumed to be multiple of 16,
and argument 2 is assumed to be a compile-time constant of value 1024, i.e. it won't be part of the generated prototype.

The resulting entry point will have signature

CUresult kernel_{specialization_suffix}(CUstream stream, unsigned gX, unsigned gY, unsigned gZ, float* arg0, int32_t arg1, int32_t arg2)

Different such specialized entry points can be combined using the `linker.py` script.

NOTE: when resolving the scope of /path/to/kernel.py, the file will be executed from within its parent directory with the python interpreter
used to run this `compile.py` script
Ú__main__)ÚdescriptionÚpathzTPath to Python source containing desired kernel in its scope. File will be executed.)Úhelpz--kernel-namez-nÚ zName of the kernel to compileT)ÚtypeÚdefaultr
   Úrequiredz--num-warpsz-wé   z$Number of warps to launch the kernel)r   r   r
   z--num-stagesz-nsé   z/Number of stages (meta-parameter of the kernel)z
--out-namez-onz Out name for the compiled kernelz
--out-pathz-ozOut filenamez--signaturez-szSignature of the kernel)r   r
   r   z--gridz-gzLaunch grid of the kernelú,c                 C   s
   |   d¡S )Nú )Ústrip)Ús© r   úB/var/www/auris/lib/python3.9/site-packages/triton/tools/compile.pyÚ<lambda>F   ó    r   )Ú	signaturec                 C   s,   t  ¡ }| d | ¡ ¡ ¡ | ¡ d d… S )Nr   é   )ÚhashlibÚsha256ÚupdateÚjoinÚencodeÚ	hexdigest)r   Úmr   r   r   Úhash_signatureH   s    r"   ZwarpsZxstagesc                 C   sH   zt | ƒ}|W S  ty    Y n0 zt| ƒ}|W S  tyB   Y n0 d S ©N)ÚintÚ
ValueErrorÚfloat)r   Úretr   r   r   Ú	constexprP   s    r(   c                 C   s.   i | ]&\}}d |v r|ft | d ¡d ƒ“qS )ú:r   )r(   Úsplit©Ú.0Úir   r   r   r   Ú
<dictcomp>]   r   r.   c                 C   s   i | ]\}}|d ur||“qS r#   r   ©r,   ÚkÚvr   r   r   r.   ^   r   c                 C   s    i | ]\}}t j| t|ƒ“qS r   )ÚkernelÚ	arg_namesr(   r+   r   r   r   r.   _   r   c                 C   s   i | ]\}}|d ur||“qS r#   r   r/   r   r   r   r.   `   r   c                 C   s&   i | ]\}}t j| | d ¡d “qS )r)   r   )r2   r3   r*   r+   r   r   r   r.   d   r   Úxc                 C   s   g | ]}t |ƒ‘qS r   )Ústr)r,   r1   r   r   r   Ú
<listcomp>g   r   r6   c                 C   s   g | ]\}}|› d |› ‘qS )ú=r   r/   r   r   r   r6   h   r   z
num_warps=znum_stages=)r   é   z#Only 1 and 16 are valid hints, got c                 C   s$   i | ]\}}|d kr|dd gg“qS )r8   ztt.divisibilityr   r/   r   r   r   r.   m   r   )ÚfnZ
constexprsr   Úattrs)Ú	num_warpsÚ
num_stages)ÚoptionszMAOT compiling kernels with global scratch requirements is not yet implementedZi32Úcr8   ÚdÚ_Zcubiné   éÿÿÿÿz, c                 C   s   g | ]\}}d |› |› ‘qS )Ú0xr   )r,   r4   Úyr   r   r   r6   ‘   r   c                 C   s"   g | ]\}}t |ƒ› d |› ‘qS ©r   r   ©r,   ÚnameÚtyr   r   r   r6   ’   r   c                 C   s"   g | ]\}}t |ƒ› d |› ‘qS rE   r   rF   r   r   r   r6   “   r   c                 C   s   g | ]}d |› ‘qS )ú&r   )r,   Úargr   r   r   r6   ”   r   z&global_scratch)Úkernel_nameZtriton_kernel_nameZbin_sizeZbin_datar   Zfull_signatureZarg_pointersÚnum_argsZkernel_docstringÚsharedr;   Z	algo_infoZgridXZgridYZgridZZ_placeholder)Úhr>   ÚextraZcudazcompile.Ú.Úw)cÚbinasciir   Úimportlib.utilÚ	importlibÚsysÚargparser   Úpathlibr   Útypingr   ZtritonZtriton.backendsZtriton.backends.nvidia.driverr   ÚdescÚ__name__ÚparserÚadd_argumentr5   r$   Ú
parse_argsÚargsZout_namerK   Zout_pathr	   Zarg_pathÚinsertÚparentÚutilÚspec_from_file_locationÚstemÚspecÚmodule_from_specÚmodÚloaderÚexec_moduleÚgetattrr2   Zgridr*   ÚlenÚlistÚmapr   r"   r;   r<   Zmeta_sigZsig_hashr(   Ú	enumerateÚhintsÚitemsÚ	constantsÚkeyÚvaluer3   r   ÚvaluesZ	const_sigZ
doc_stringrN   r:   ÚcompilerZ	ASTSourceÚsrcÚoptsÚcompileZccinfoÚmetadataZglobal_scratch_sizeÚRuntimeErrorZ	arg_typesZarg_names_not_1Zarg_types_not_1r-   Zarg_nameÚappendÚgetÚsuffixrH   Ú	func_nameZasmÚhexlifyZhex_ÚziprM   ÚparamsÚextÚ__file__Ztemplate_pathÚwith_suffixÚopenÚfpÚwriteÚ	read_textÚformatr   r   r   r   Ú<module>   sÖ   

ÿÿÿ






,
ð$