o
    cZh_.                     @   s  d dl mZ d dlmZ d dlmZmZ d dlmZ dd Z	G dd de
ZeG d	d
 d
ZG dd dZdd Zdd Zdedee defddZdedefddZdedefddZdedee defddZdedefddZdededefd d!Zdededefd"d#Zdedefd$d%Zdedefd&d'Zd(Zed)krd d*lmZ eed+Zejd,d-d.d/ ejd0d1ed2d3 ejd4ed5d6d7 e  Z!e Zg Z"e!j#D ]Z$ee$Z%e%& Z'e"(e%j) e*e' qd8d9 ej+, D Z-d:d9 ej+, D Z.e.d  d  Z/ee/Z0ee/Z1e!j23d;4d<&Z5d=Z2e2d>6e-7 Z2e2d>7 Z2e2e07 Z2e2d>7 Z2e2e17 Z2e57e2 W d?   n	1 s>w   Y  d@d9 ej+, D Z8dAd9 ej+9 D Z:ee:e/Z;ee/Z<ee:e/Z=ee/Z>ee/Z?e!j23dB4d<OZ5d5Z2e2d=7 Z2e2dC7 Z2e2dD7 Z2e2d>7 Z2e2d>6e87 Z2e2d>7 Z2e2e;7 Z2e2d>7 Z2e2e>7 Z2e2d>7 Z2e2e<7 Z2e2d>7 Z2e2e=7 Z2e2d>7 Z2e2e?7 Z2e57e2 W d?   d?S 1 sw   Y  d?S d?S )E    )defaultdict)Path)SequenceUnion)	dataclassc                 C   s   | d uS N )xr   r   @/var/www/auris/lib/python3.10/site-packages/triton/tools/link.py_exists   s   r   c                   @   s   e Zd ZdS )LinkerErrorN)__name__
__module____qualname__r   r   r   r
   r      s    r   c                   @   sb   e Zd ZU eed< ee ed< ee ed< eeedf  ed< eed< eed< eed< eed	< dS )
KernelLinkerMetaorig_kernel_name	arg_names
arg_ctypesNsizessig_hashtriton_suffixsuffix	num_specs)r   r   r   str__annotations__r   r   intr   r   r   r
   r      s   
 r   c                   @   sd   e Zd ZdddZdefddZdefd	d
ZdefddZdedefddZdede	fddZ
dS )HeaderParserreturnNc                 C   sF   dd l }|d| _|d| _|d| _|d| _tt| _d S )Nr   z'//[\s]*tt-linker:[\s]*([\w]+):(.+):(.+)z^([\w]+)_([\w]+)_([\w]+)$z[\s]*(\w+)\s(\w+)[,]?z[c,d])	recompilelinker_directiveskernel_namec_sig
arg_suffixr   listkernels)selfr   r   r   r
   __init__   s   zHeaderParser.__init__headerc                 C   s   |  D ]N}|drR| j|}t|rR|d|d|d}}}| |\}}}	| |\}
}| |	|\}}| 	d
||gt|||
|||	|	|d qd S )Nz//         _)r   r   r   r   r   r   r   r   )
splitlines
startswithr    matchr   group_match_name_match_c_sig_match_suffix_add_kerneljoinr   )r&   r(   lnmker_namer"   Z	algo_infonamer   r   Zc_typesr   r   r   r   r   r
   extract_linker_meta-   s.   
"z HeaderParser.extract_linker_metar8   c                 C   sN   | j |}t|r |d|d|d}}}|||fS t| d)Nr)   r*   r+   z is not a valid kernel name)r!   r/   r   r0   r   )r&   r8   r7   r9   r   r   r   r   r
   r1   D   s
   "
zHeaderParser._match_namer"   c                 C   sV   | j |}t|r$g g }}|D ]\}}|| || q||fS t| d)Nz" is not a valid argument signature)r"   findalllenappendr   )r&   r"   r7   ZtysargstyZarg_namer   r   r
   r2   K   s   

zHeaderParser._match_c_sigr   c           	      C   s   | d}ddd}d}g }tt|D ]]}|t|}|dkr(t| d|tt|7 }| j||rT|d7 }|d g|t|   |	|||   |d7 }|t|d k rc||d  }q|d gt|t|   q||fS )N,r)      )cdr   z is not a valid kernel suffix)
splitranger<   findr   r   r#   r/   extendr=   )	r&   r   r"   r>   Zs2ir   r   iposr   r   r
   r3   V   s$   

zHeaderParser._match_suffixr9   kerc              
   C   sv   || j v r1| j | d }t|j|jD ]\}}||kr0td| dd|j dd|j q| j | | d S )NrD   z Mismatched signature for kernel z: 
	existing sig is: r@   z
	current is: )r%   zipr   r   r5   r=   )r&   r9   rK   lastcurZnew_r   r   r
   r4   m   s   
$zHeaderParser._add_kernel)r   N)r   r   r   r'   r   r:   r1   r2   r3   r   r4   r   r   r   r
   r      s    
r   c                 C   s   d dd t| j| jD S )N, c                 S      g | ]\}}| d | qS  r   .0r?   argr   r   r
   
<listcomp>{       z0gen_signature_with_full_args.<locals>.<listcomp>)r5   rL   r   r   r7   r   r   r
   gen_signature_with_full_argsz   s   rY   c                 C   sN   dd t | j| jD }dd t | j| jD }ddd t ||D }|S )Nc                 S      g | ]
\}}|d kr|qS r)   r   )rT   r?   hintr   r   r
   rV          z!gen_signature.<locals>.<listcomp>c                 S   rZ   r[   r   rT   rU   r\   r   r   r
   rV      r]   rO   c                 S   rP   rQ   r   rS   r   r   r
   rV      rW   )rL   r   r   r   r5   )r7   Z	arg_typesr   sigr   r   r
   gen_signature~   s   r`   r9   metasr   c              	   C   s&   d|  dt |d  d|  d|  d	S )N

CUresult (CUstream stream, rD   z);
void load_();
void unload_();
    )rY   )r9   ra   r   r   r
   make_algo_decls   s   
rf   metac                 C   s:   d| j  dt|  d| j  dt|  d| j  d| j  dS )Nrb   _default(CUstream stream, z);
CUresult rc   z, int algo_id);
void load_rd   re   )r   rY   )rg   r   r   r
   make_global_decl   s   ri   c                 C   sD   d| j  dt|  d}|d| j  dd| j d7 }|d7 }|S )	N	CUresult rh   z){
	  return 	(stream, rO   z, 0);
}
r   rY   r5   r   rg   srcr   r   r
   make_default_algo_kernel   s    rq   c                    s  d|  d}t |dd dD ]}|d|j d|j d|j dt| d		7 }q|d7 }|d|  dt|d
  d7 }|d7 }t |dd dD ]I}dd  d fddt|j|j	D }|t
|j	rid| dnd7 }dd t|j|j	D }|d|j d|j d|j dd| d		7 }qF|d7 }|d7 }|d7 }dD ]^}|d| d|  d7 }t |dd dD ]}|d| d|j d|j d|j d	7 }q|d| d|  d 7 }|d7 }t |d!d dD ]}|d"| d|j d|j d|j d	7 }q|d7 }q|S )#Nz// launcher for: 
c                 S      | j  S r   r   rX   r   r   r
   <lambda>       z.make_kernel_hints_dispatcher.<locals>.<lambda>)keyrj   r,   rc   );
rD   z){c                 S   rs   r   rt   rX   r   r   r
   ru      rv   c                 S   s8   |dkrd|  d| dS |dkrd|  d| dS d S )NrA   (z % z == 0)r)   z == )r   )valr\   r   r   r
   ru      s
   z && c                    s"   g | ]\}}|d ur ||qS r   r   )rT   r{   r\   Zcond_fnr   r
   rV      s
    z0make_kernel_hints_dispatcher.<locals>.<listcomp>z  if (z)
zif (1)
c                 S   rZ   r[   r   r^   r   r   r
   rV      r]   z    return rl   rO   z#  return CUDA_ERROR_INVALID_VALUE;
rm   loadZunloadz
// z for: c                 S   rs   r   rt   rX   r   r   r
   ru      rv   void ();
z() {c                 S   rs   r   rt   rX   r   r   r
   ru      rv     )sortedr   r   r   r`   rY   r5   rL   r   r   any)r9   ra   rp   rg   Zcondsr   moder   r|   r
   make_kernel_hints_dispatcher   s8   .0**
r   c                 C   sV   d| j  dt|  d}|d| j  d7 }|d| j  dd| j d	7 }|d
7 }|S )Nrj   rc   z, int algo_id){
z   assert (algo_id < (int)sizeof(z_kernels));
rk   z_kernels[algo_id](stream, rO   rx   rm   rn   ro   r   r   r
   !make_kernel_meta_const_dispatcher   s
    r   namesc                 C   sH   dt | d}|d|j d7 }| D ]
}|d| d7 }q|d7 }|S )Nz3typedef CUresult (*kernel_func_t)(CUstream stream, rx   zkernel_func_t z_kernels[] = {
r   z,
z};
)rY   r   )r   rg   rp   r9   r   r   r
   make_func_pointers   s   r   c                 C   sR   d}dD ]"}|d| d|j  d7 }| D ]}|d| d| d7 }q|d7 }q|S )	N r}   r   r,   z(void){
r   r   z}

r   )r   rg   rp   r   r9   r   r   r
   make_kernel_load_def   s   
r   c                 C   s   d| j  d}|S )Nint z_get_num_algos(void);r   ro   r   r   r
   make_get_num_algos_decl   s   r   c                 C   s4   d| j  d}|d| j  d| j  d7 }|d7 }|S )Nr   z_get_num_algos(void){
z  return (int)(sizeof(z_kernels) / sizeof(z_kernels[0]));
rm   r   ro   r   r   r
   make_get_num_algos_def   s   r   a0  
Triton ahead-of-time linker:

This program takes in header files generated by compile.py, and generates a
single entry-point responsible for dispatching the user's input to the right
kernel given the specializations that were compiled.

Example usage:
python link.py /path/to/headers/*.h -o kernel_name
__main__)ArgumentParser)descriptionheaders+z_Paths to header files to link. Must include linker directive annotations (autogenerated by ttc))nargshelpz--outz-ozOut filename)typer   z--prefixr   z(String to prefix kernel dispatcher names)r   defaultr   c                 C      g | ]	\}}t ||qS r   )rf   rT   r9   rg   r   r   r
   rV         rV   c                 C   s   g | ]\}}|qS r   r   r   r   r   r
   rV     s    z.hwz#include <cuda.h>
rr   Nc                 C   r   r   )r   r   r   r   r
   rV   *  r   c                 C   s   g | ]}|qS r   r   )rT   r9   r   r   r
   rV   +  s    z.cz#include <stdint.h>
z#include <assert.h>
)@collectionsr   pathlibr   typingr   r   dataclassesr   r   	Exceptionr   r   r   rY   r`   r   rf   ri   rq   r   r   r   r   r   r   Zdescr   argparser   parseradd_argument
parse_argsr>   Zincludesr   r(   Zh_path	read_textZh_strr=   r9   r:   r%   itemsZ
algo_declsZ
meta_listsrg   Zget_num_algos_declZglobal_decloutwith_suffixopenfpr5   writeZdefskeysr   Zfunc_pointers_defZmeta_const_defZload_unload_defZget_num_algos_defZdefault_algo_kernelr   r   r   r
   <module>   s    ]	
)	






$