a
    khT                     @  s  d dl mZ d dlZd dlZddlmZmZ ddlmZ ddlm	Z	 ddlm
Z
mZ ddlmZmZ dd	lmZ dd
lmZmZmZ ddlmZ ddlmZ d dlmZ d dlZd dlZd dlZd dlZd dlZdZ de iZ!dZ"de"iZ#dd Z$G dd dZ%G dd dZ&e' dd Z(e' dd Z)dd Z*dddd Z+G d!d" d"Z,d0d#d$Z-d%d&d'd(d)Z.G d*d+ d+Z/G d,d- d-e0Z1G d.d/ d/Z2dS )1    )annotationsN   )get_cache_invalidating_env_varsir)backends)Language)BaseBackend	GPUTarget)__version__knobs)OutOfResources)get_cache_managerget_dump_managerget_override_manager)driver)get_sass)Pathz=\.(?:visible|extern)\s+\.(?:entry|func)\s+(\w+)\s*\(([^)]*)\)ptxz\.param\s+\.(\w+)c                 C  sP   t d| }t d| }|d ur$dS t dd| } |d urLdt|d S | S )Nz!tt\.ptr<([^,]+)ztt.nv_tma_desc = 1Z	nvTmaDescz {[^}]+} *   )researchsubconvert_type_reprgroup)xmatchZtma r   F/var/www/auris/lib/python3.9/site-packages/triton/compiler/compiler.pyr   '   s    r   c                   @  s4   e Zd ZdddddZdd Zdd	 Zd
d ZdS )	ASTSourceNNonereturnc                 C  s   || _ tj| _d| _|j| _|| _t | _	|d urx|
 D ]<\}}t|trZ|j|fn|}t|tslJ || j	|< q:|pt | _t| jtrdd t| jdD | _n"| j D ]}t|tstdqd S )Nttirc                 S  s   i | ]\}}||  qS r   )strip.0kvr   r   r   
<dictcomp>D       z&ASTSource.__init__.<locals>.<dictcomp>,zSignature keys must be string)fnr   TRITONlanguageext__name__name	signaturedict	constantsitems
isinstancestr	arg_namesindextupleattrs	enumeratesplitkeys	TypeError)selfr-   r3   Z
constexprsr<   r(   r)   r   r   r   __init__6   s"    
zASTSource.__init__c                   sz   dd t | j D }dd  d fddt | j D }| jj dt| j d| d| }t	
|d S )Nc                 S  s   g | ]\}}|qS r   r   r&   r   r   r   
<listcomp>K   r+   z"ASTSource.hash.<locals>.<listcomp>c                 S  s   t | dr| jS t| S )N	cache_key)hasattrrD   r8   )r   r   r   r   <lambda>L   r+   z ASTSource.hash.<locals>.<lambda>-c                   s   g | ]\}} |qS r   r   r&   get_keyr   r   rC   M   r+   utf-8)sortedr3   r6   joinr5   r-   rD   r8   r<   hashlibsha256encode	hexdigest)rA   Z
sorted_sigZconstants_keykeyr   rH   r   hashJ   s
    "$zASTSource.hashc                 C  s"   ddl m} || j| ||||dS )Nr   )ast_to_ttir)contextoptionscodegen_fns
module_map)Zcode_generatorrS   r-   )rA   rU   rV   rW   rT   rS   r   r   r   make_irQ   s    zASTSource.make_irc                 C  s   t  S N)r4   rA   r   r   r   parse_optionsV   s    zASTSource.parse_options)NNr1   
__module____qualname__rB   rR   rX   r[   r   r   r   r   r    4   s   r    c                   @  s,   e Zd Zdd Zdd Zdd Zdd Zd	S )
IRSourcec           
      C  s   || _ t|}|jdd  | _tj| _| | _t	
| |
| | jdkrtt| j | jtj}|d| _|d}tt| j |}dd t|D | _nPt	| j || _| j }d| | _| j|}| j|}	dd t|	D | _d S )Nr   r   r   c                 S  s   i | ]\}}|t |qS r   )r   r'   r(   tyr   r   r   r*   l   r+   z%IRSource.__init__.<locals>.<dictcomp>@c                 S  s   i | ]\}}||qS r   r   r`   r   r   r   r*   s   r+   )pathr   suffixr0   r   r.   r/   	read_textsrcr   load_dialectsr   r   prototype_pattern	MULTILINEr   r2   findallarg_type_patternr=   r3   parse_mlir_modulemoduleZget_entry_func_nameZget_functionZget_function_signature)
rA   rc   rT   backendr   r3   typesfn_nameZfuncOpZfunc_tyr   r   r   rB   \   s&    






zIRSource.__init__c                 C  s   t | jd S )NrJ   )rM   rN   rf   rO   rP   rZ   r   r   r   rR   u   s    zIRSource.hashc                 C  s   || j _| j S rY   )rm   rT   )rA   rU   rV   rW   rT   r   r   r   rX   x   s    zIRSource.make_irc                 C  s4   | j dkr.| jd}|d us&J dd|iS t S )Nttgirzttg.num-warpsz'Unable to parse ttg.num-warps attribute	num_warps)r0   rm   Zget_int_attrr4   )rA   rr   r   r   r   r[   |   s
    
zIRSource.parse_optionsNr\   r   r   r   r   r_   Z   s   r_   c               
   C  s  dd l } tjtjtjt}g }ttd(}|t|	 
 g7 }W d    n1 s`0    Y  tj|ddftj|ddfg}|D ]l\}}| j|g|dD ]R}t|j|jjd(}|t|	 
 g7 }W d    q1 s0    Y  qqt }tdd	d
 }	ttj|dd|	 d2}|	d}
|
sNq\||
 q:W d    n1 sr0    Y  ||
  tj|d}| j|gddD ]V}t|j|jjd(}|t|	 
 g7 }W d    n1 s0    Y  qt d| S )Nr   rbcompilerztriton.compiler.r   ztriton.backends.)prefix
EXT_SUFFIX.Z_Cz
libtriton.i   r/   ztriton.language.rG   )pkgutilosrc   dirnameabspath__file__openrM   rN   readrP   rL   walk_packagesmodule_finder	find_specr2   origin	sysconfigget_config_varr>   updateappendr
   )ry   ZTRITON_PATHcontentsfZpath_prefixesrc   ru   libZlibtriton_hashr0   chunkZlanguage_pathr   r   r   
triton_key   s2    6:
.<r   c                 C  s   t jj| d S )Nmax_shared_mem)r   activeutilsZget_device_properties)devicer   r   r   r      s    r   c                 C  sj   |dks|dkr&t | |}||_|S |dks>|dks>|dkrJt|  S |dksZ|dkrft|  S d S )Nr$   rq   Zllirr   Zamdgcncubinhsaco)r   rl   rT   r   re   
read_bytes)	full_namer0   rT   rm   r   r   r   parse   s    r   BaseException)ec                   s   t jjrdS | jdur t| j | jdur4t| j ddg}dd |D }| j g } durt fdd|D s||   j	 qTt
||dd D ]\}}||_	q|sd| _nd|d	 _	|d
 | _dS )z
    Removes code_generator.py and related files from tracebacks.

    These are uninteresting to the user -- "just show me *my* code!"
    Nz"/triton/compiler/code_generator.pyz/ast.pyc                 S  s   g | ]}| d tjqS )/)replacerz   sep)r'   Zbad_filer   r   r   rC      r+   z$filter_traceback.<locals>.<listcomp>c                 3  s"   | ]} j jj|r|V  qd S rY   )tb_framef_codeco_filenameendswith)r'   r   tbr   r   	<genexpr>   r+   z#filter_traceback.<locals>.<genexpr>r   rx   r   )r   compilationZfront_end_debugging	__cause__filter_traceback__context____traceback__anyr   tb_nextzip)r   Z	BAD_FILESframesZ	cur_frameZ
next_framer   r   r   r      s,    





r   c                   @  sF   e Zd ZddddZddddZdddd	d
ZddddZdS )CompileTimerr!   r"   c                 C  s    t   | _d | _g | _d | _d S rY   )timestartir_initialization_endlowering_stage_endsstore_results_endrZ   r   r   r   rB      s    
zCompileTimer.__init__c                 C  s   t   | _d S rY   )r   r   rZ   r   r   r   finished_ir_initialization   s    z'CompileTimer.finished_ir_initializationr8   )
stage_namer#   c                 C  s   | j |t f d S rY   )r   r   r   )rA   r   r   r   r   stage_finished   s    zCompileTimer.stage_finishedzknobs.CompileTimesc                 C  s   t   }| jd u r|| _n|| _dddddd}g }| j}| jD ] \}}|||||f |}qBtj|| j| j|||| jdS )Nfloatzfloat | Noneint)r   endr#   c                 S  s   |d u rdS t ||  d S )Nr   i@B )r   )r   r   r   r   r   delta   s    zCompileTimer.end.<locals>.delta)Zir_initializationZlowering_stagesZstore_results)r   r   r   r   r   r   ZCompileTimesr   )rA   	timestampr   Zlowering_stage_durationsZstage_startr   Z	stage_endr   r   r   r      s    

zCompileTimer.endN)r1   r]   r^   rB   r   r   r   r   r   r   r   r      s   r   c           '      C  s  t jj}|rt }|d u r$tj }t|ts6J dt	|}t| t
 }|rtt| ts`J dt }t| ||} |  }|t|pt fi |}t }	t  d|   d|  d|  dtt|	  	}
t|
d }t|}t jj}t jj}t jj}|rt|  nd }|r4t|  nd }| j d d }| d}|!|p^i }|"|}t jj#}|s|d urt$| ||}|r|| |j%& ||' dd |S ||d	|j(|	}t)|d
< t }|*||| j+ t,|- .| j/}|r|d7 }t| ts0t }t0| |0| |1|}|2 }z| 3||||}W n0 t4y } zt5|  W Y d }~n
d }~0 0 |r| d| j/ }|6||||< n| d}|6||||< t jj7} |r| r|8| j9 t:d| j9  |r|;  t,| |d  D ]\}!}"|"||}#| d|! }|d u rx|"dd  }$r|$<d|! rt=|$|!|}#n*|>| }%rt:d|%  t=|%|!|}#|r|!dv r|6|#|||< |d ur|6|#| | |!kr|>|}&|#8|& t:d|&  |#}|r|?|! q|j6t@jA|tBd|dd||< |C|| t jjDsX|E  |rt|| |||' dd t$| ||S )Nz target must be of GPUTarget typez'source must be either AST or a filepathrG   rJ      .jsonT)rf   metadatametadata_grouptimesZ	cache_hit)rR   targetZtriton_versionr   rw   z.sourcezCreating new locations for ir_overridez
Overriding kernel with file )r   r   json)defaultF)binary)Fr   r   Zlistenerr   r   r   get_current_targetr7   r	   make_backendr    r8   r   rT   r_   r[   r4   r   r   rR   rK   r6   rM   rN   rO   rP   r   overrideZdump_irZstore_binary_onlyr   r   r2   Z	get_groupgetalways_compileCompiledKernelr   _asdictr   __dict__r
   Z
add_stagesr/   listr?   r:   r0   rg   Zget_codegen_implementationZget_module_maprX   	Exceptionr   put
use_ir_locZcreate_location_snapshotrc   printr   r   r   Zget_filer   r   dumpsvarsZ	put_groupZenable_asanZdisable_multithreading)'rf   r   rU   Zcompilation_listenerZtimerrn   Z	ir_sourcerT   Zextra_optionsZenv_varsrQ   rR   Zfn_cache_managerZenable_overrideZenable_ir_dumpZstore_only_binaryZfn_override_managerZfn_dump_manager	file_nameZmetadata_filenamer   metadata_pathr   resr   ZstagesZfirst_stagerV   rW   rm   r   Zir_filenamer   r0   Z
compile_irZnext_moduler   r   Zir_full_namer   r   r   compile  s    
:







$






r   r	   r   )r   r#   c                   sN    fddt  D }t|dkrBtt| d j d| d|d  S )Nc                   s   g | ]}|j  r|j qS r   )rt   Zsupports_target)r'   r   r   r   r   rC     r+   z make_backend.<locals>.<listcomp>r   z! compatible backends for target (z) (z). There should only be one.r   )r   valueslenRuntimeErrorrn   )r   Zactivesr   r   r   r     s    r   c                   @  s$   e Zd Zdd Zdd Zdd ZdS )LazyDictc                 C  s   || _ g | _d S rY   )dataextras)rA   r   r   r   r   rB     s    zLazyDict.__init__c                 C  s0   | j D ]\}}| j|| B | _q| j   | jS rY   )r   r   clearrA   funcargsr   r   r   r     s    
zLazyDict.getc                 C  s   | j ||f d S rY   )r   r   r   r   r   r   add  s    zLazyDict.addN)r1   r]   r^   rB   r   r   r   r   r   r   r     s   r   c                   @  s   e Zd Zdd ZdS )AsmDictc                 C  s.   |dkrt | d }ntd| || |< |S )NZsassr   zUnknown key: '%s')r   KeyError)rA   rQ   valuer   r   r   __missing__  s
    zAsmDict.__missing__N)r1   r]   r^   r   r   r   r   r   r     s   r   c                      s<   e Zd Zdd Zdd Z fddZdd Zd	d
 Z  ZS )r   c                   s  ddl m} tdd | D }t| }t|d |d< |d }t|d |d |d	 |d< |d
t	t
| }|f i || _t| jj}	|	| j| _|| _|| _| jj| _dd | D }
|	j t fdd|
D | _| j  | _d | _d | _d S )Nr   )
namedtuplec                 s  s$   | ]\}}| d rt|V  qdS )r   Nr   r   r'   cpr   r   r   r     r+   z*CompiledKernel.__init__.<locals>.<genexpr>Zcluster_dimsr   rn   arch	warp_sizeKernelMetadatac                 S  s"   g | ]\}}| d st|qS )r   r   r   r   r   r   rC     r+   z+CompiledKernel.__init__.<locals>.<listcomp>c                   s:   i | ]2}|j d d |j d d  kr.| n| qS )r   N)rd   r   re   )r'   file
binary_extr   r   r*     s   z+CompiledKernel.__init__.<locals>.<dictcomp>)collectionsr   nextr6   r   loadsre   r;   r	   rK   r   r?   r   r   r   Zpack_metadatapacked_metadatarf   rR   r2   r   r   Zasmkernelrm   function)rA   rf   r   rR   r   r   r   r   r   rn   Z	asm_filesr   r   r   rB     s*    

zCompiledKernel.__init__c                 C  s   | j d urd S tj }tj| j| j| _t|}| jj	|krPt
| jj	|dt| jdr| jjd urd}| jj|krt
| jj|dtjj| j| j| jj	|\| _ | _| _| _| _tj j}| jj| | jkrt
| jj| | jdd S )Nzshared memory	tmem_sizei   ztensor memorythreads)rm   r   r   get_current_deviceZlauncher_clsrf   r   runr   Zsharedr   rE   r   r   Zload_binaryr2   r   r   Zn_regsZn_spillsZn_max_threadsr   r   rr   )rA   r   Z
max_sharedZmax_tmem_sizer   r   r   r   _init_handles  s"    

zCompiledKernel._init_handlesc                   s   |dkr|    t |S )Nr   )r   super__getattribute__)rA   r2   	__class__r   r   r     s    zCompiledKernel.__getattribute__c           	      G  s   t jjd u rd S t| j| j|d}t| jtr>| jj	j
d u rB|S i }d}t| jj	jD ]\}}|| ||< |d7 }qX|| jj	j
|| j|f |S )N)r2   r   streamr   r   )r   runtimelaunch_enter_hookr   r2   r   r7   rf   r    r-   launch_metadatar=   r9   r   r   )	rA   gridr   r   retZarg_dictZarg_idxiZarg_namer   r   r   r    s    
zCompiledKernel.launch_metadatac                   s       d d fdd
}|S )N)r   c              
     sp   | d u rt j }t j|} j | g|R  }j d  d  d | jj|tj	j
tj	jg	|R   d S )Nr   r   r   )r   r   r   Zget_current_streamr  r   r   r   r   r  r  Zlaunch_exit_hook)r   r   r   r  r  rA   r   r   runner  s    
"z*CompiledKernel.__getitem__.<locals>.runner)r   )rA   r  r  r   r  r   __getitem__  s    zCompiledKernel.__getitem__)	r1   r]   r^   rB   r   r   r  r	  __classcell__r   r   r   r   r     s
   r   )NN)3
__future__r   rM   r   Z_C.libtritonr   r   r   Zbackends.compilerr   r   r	   r   r
   r   Zruntime.autotunerr   Zruntime.cacher   r   r   Zruntime.driverr   Ztools.disasmr   pathlibr   r   	functoolsrz   r   r   Zptx_prototype_patternrh   Zptx_arg_type_patternrk   r   r    r_   	lru_cacher   r   r   r   r   r   r   r   r4   r   r   r   r   r   r   <module>   sN   
&*
#
&'
 