a
    jº”hþM  ã                   @   sâ   d dl mZmZmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZmZmZ d dlmZ d dlZd dlZd dlZd dlZd dlZd dlmZ ed	œd
d„Zdd„ Zdd„ ZeddG dd„ dƒƒZG dd„ deƒZdS )é    )ÚBaseBackendÚ	GPUTargetÚLanguage)ÚirÚpassesÚllvmÚamd)Úknobs)Ú	dataclass)ÚAnyÚDictÚTuple)Ú
ModuleTypeN)ÚPath©Útargetc                 C   s   dd„ S )Nc                 S   s   dS )N©é   r   r   © )Zlhs_typeZrhs_typer   r   úJ/var/www/auris/lib/python3.9/site-packages/triton/backends/amd/compiler.pyÚ<lambda>   ó    z"get_min_dot_size.<locals>.<lambda>r   r   r   r   r   Úget_min_dot_size   s    r   c                 C   s   t jjd u r| dkS t jjS ©NÚgfx942)r	   r   Úuse_block_pingpong©Úarchr   r   r   Úis_pingpong_schedule_enabled   s    r   c                 C   s   t jjd u r| dkS t jjS r   )r	   r   Zuse_in_thread_transposer   r   r   r   Úis_in_thread_transpose_enabled   s    r   T)Úfrozenc                   @   s&  e Zd ZU dZeed< dZeed< dZeed< dZeed< dZ	e
ed	< d
Zeed< dZeed< dZeed< dZeed< dZee ed< dZee ed< dZeed< dZee ed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< d Zeed!< d"Zeed#< d$d%„ Zd&d'„ Z dS )(Ú
HIPOptionsé   Ú	num_warpsr   Úwaves_per_eué   Ú
num_stagesÚnum_ctasNÚextern_libsr   Úcluster_dimsFÚdebugTÚsanitize_overflowr   )Úfp8e5Úsupported_fp8_dtypesr   Ú!deprecated_fp8_dot_operand_dtypesÚieeeÚdefault_dot_input_precision)r/   Úallowed_dot_input_precisionsÚenable_fp_fusionÚlaunch_cooperative_gridr   Úmatrix_instr_nonkdimÚkpackÚallow_flush_denormÚmax_num_imprecise_acc_defaultÚhipÚbackend_nameÚnoneÚschedule_hintc                 C   sÒ   t | jdd… ƒ}|dkrdnd}t | d|¡ | jdkrN| j| jd @ dksVJ d	ƒ‚| jd
krr| jdksrJ dƒ‚ttƒjd }| j	d u rŽi nt
| j	ƒ}dD ]}t||› d ƒ||< qœt | dt| ¡ ƒ¡ d S )Né   éþÿÿÿé
   é    é@   Ú	warp_sizer   r   znum_warps must be a power of 2Úgfx950zgfx950 only accepts kpack == 1Úlib)ZocmlZocklz.bcr(   )Úintr   ÚobjectÚ__setattr__r#   r5   r   Ú__file__Úparentr(   ÚdictÚstrÚtupleÚitems)ÚselfZ	gfx_majorrA   Údefault_libdirr(   rC   r   r   r   Ú__post_init__E   s     ÿ
zHIPOptions.__post_init__c                 C   s.   d  dd„ | j ¡ D ƒ¡}t | d¡¡ ¡ S )NÚ_c                 S   s   g | ]\}}|› d |› ‘qS )ú-r   )Ú.0ÚnameÚvalr   r   r   Ú
<listcomp>V   r   z#HIPOptions.hash.<locals>.<listcomp>úutf-8)ÚjoinÚ__dict__rL   ÚhashlibÚsha256ÚencodeÚ	hexdigest)rM   Úkeyr   r   r   ÚhashU   s    zHIPOptions.hash)!Ú__name__Ú
__module__Ú__qualname__r#   rD   Ú__annotations__r$   r&   r'   r(   rI   r)   rK   r*   Úboolr+   r   rJ   r-   r   r.   r0   r1   r2   r3   r4   r5   r6   r7   r9   r;   rO   r^   r   r   r   r   r!      s.   
r!   c                       s  e Zd Zeedœdd„ƒZeddœ‡ fdd„Zedœd	d
„Ze	dœdd„Z
dd„ Zdd„ Zeeef dœdd„Zdd„ Zedd„ ƒZedd„ ƒZedd„ ƒZedd„ ƒZedd„ ƒZedd „ ƒZed!d"„ ƒZed#d$„ ƒZed%d&„ ƒZed'd(„ ƒZd)d*„ Ze ¡ d+d,„ ƒZ‡  ZS )-Ú
HIPBackendr   c                 C   s
   | j dkS )Nr8   )Úbackendr   r   r   r   Úsupports_target\   s    zHIPBackend.supports_targetN)r   Úreturnc                    s&   t ƒ  |¡ t|jtƒsJ ‚d| _d S )NÚhsaco)ÚsuperÚ__init__Ú
isinstancer   rJ   Z
binary_ext)rM   r   ©Ú	__class__r   r   rj   `   s    zHIPBackend.__init__)rg   c                 C   s   d|j › S )Núhip:r   ©rM   Úoptionsr   r   r   Úget_target_namee   s    zHIPBackend.get_target_namec                    sú   dt jjp| jji}| jjdkrFttjƒ}| dh¡ t	t
|ƒƒ|d< dˆ vrºttjƒ}| jjdkrt| h d£¡ n6| jjdkr| dd	h¡ nd
| jjv rª| dd	h¡ t	t
|ƒƒ|d< dˆ vrÎt jj|d< | ‡ fdd„tj ¡ D ƒ¡ tf i |¤ŽS )Nr   r   Ztf32r1   r-   >   Úfp8e4nvZfp8e4b8Zfp8e5b16rB   rr   r,   Zgfx12r2   c                    s*   i | ]"}|ˆ v rˆ | d ur|ˆ | “qS ©Nr   )rR   Úk©Úoptsr   r   Ú
<dictcomp>}   s   ÿz,HIPBackend.parse_options.<locals>.<dictcomp>)r	   ZruntimeZoverride_archr   r   Úsetr!   r1   ÚupdaterK   Úsortedr-   ÚlanguageZdefault_fp_fusionZ__dataclass_fields__Úkeys)rM   rv   Úargsr1   r-   r   ru   r   Úparse_optionsh   s$    

zHIPBackend.parse_optionsc                 C   s(   |j |j|j|jd |jd |jd fS )Nr   r   r%   )r#   r'   Úsharedr)   )rM   Úmetadatar   r   r   Úpack_metadata   s    úzHIPBackend.pack_metadatac                 C   s   dt | jƒiS )NZmin_dot_size)r   r   ro   r   r   r   Úget_codegen_implementation‹   s    z%HIPBackend.get_codegen_implementationc                 C   s   ddl m} d|iS )Nr   )Ú	libdeviceztriton.language.extra.libdevice)Ztriton.language.extra.hiprƒ   )rM   rƒ   r   r   r   Úget_module_mapŽ   s    zHIPBackend.get_module_mapc                 C   s   t  |¡ d S rs   )r   Úload_dialects)rM   Úctxr   r   r   r…   “   s    zHIPBackend.load_dialectsc                 C   sL   dd l }d}t| dƒr"|  ¡ |kS t| |jƒrHt| dƒrH|  ¡  ¡ |kS dS )Nr   iÿÿÿÚ	ptr_rangeÚuntyped_storageF)ÚtorchÚhasattrr‡   rk   ZTensorrˆ   Úsize)Úargr‰   Z
MAX_INT_32r   r   r   Úis_within_2gb–   s    
zHIPBackend.is_within_2gbc                 C   s$   t  | ¡}d| v r |ddgg7 }|S )NÚSztt.pointer_ranger?   )r   Ú
parse_attr)ÚdescÚretr   r   r   r   ¡   s    
zHIPBackend.parse_attrc                 K   s:   t j| |fi |¤Ž}tjjr6|dkr6t | ¡r6|d7 }|S )NZtensorrŽ   )r   Úget_arg_specializationr	   r   Úuse_buffer_opsrd   r   )rŒ   ÚtyÚkwargsr‘   r   r   r   r’   ¨   s    z!HIPBackend.get_arg_specializationc                  C   sr   t jj} | d ur$t| ƒ}| ¡ r$|S ttƒjd }| ¡ r>|S tdƒ}| ¡ rR|S tdƒ}| ¡ rf|S tdƒ‚d S )Nzllvm/bin/ld.lldz/opt/rocm/llvm/bin/ld.lldz/usr/bin/ld.lldzWROCm linker /opt/rocm/llvm/bin/ld.lld not found. Set 'TRITON_HIP_LLD_PATH' to its path.)r	   r   Zlld_pathr   Úis_filerG   rH   Ú	Exception)Zlld_env_pathZlldr   r   r   Úpath_to_rocm_lld±   s    zHIPBackend.path_to_rocm_lldc                 C   sš   t  | j¡}| ¡  tj |¡ tj |¡ tj 	|¡ tj 
|¡ tj |¡ tj |¡ tj |¡ tj |¡ tj |¡ tj |¡ | | ¡ | S rs   )r   Úpass_managerÚcontextÚenable_debugr   ÚcommonÚadd_inlinerÚttirZadd_rewrite_tensor_pointerZ(add_rewrite_tensor_descriptor_to_pointerÚadd_canonicalizerZadd_combineZadd_reorder_broadcastÚadd_cseÚadd_triton_licmÚadd_symbol_dceZadd_loop_unrollÚrun)Úmodr€   rp   Úpmr   r   r   Ú	make_ttirÅ   s    
zHIPBackend.make_ttirc                 C   s˜  t  | j¡}| ¡  tj |d|j› |j|j	|j
¡ | | ¡ t  | j¡}| ¡  tj |¡ tj |¡ tj |¡ tjj ||j|j|j¡ tj |¡ tjj |¡ tj |d¡ tjj |¡ tj |¡ tj |¡ tj |¡ tj |¡ tjj}tjj}tjj}|jdkr$d }}tjj  ||j!|||¡ |rTtjj "||j¡ tj |¡ |j #¡ dkr‚tjj $||j¡ tj |d¡ tj |¡ tj %|¡ t&|jƒrÎtjj '|¡ tj |¡ tjj (|¡ t)|jƒ}|r
|j!dkr
tjj *||j!¡ tjj+r@tjj ,|¡ tj |¡ tjj -||j¡ tjj .|¡ tj |¡ tj /|¡ tj 0|¡ |rŠtjj 1||j¡ | | ¡ | S )Nrn   Tzlocal-prefetchr   r:   r%   )2r   r™   rš   r›   r   rž   Zadd_convert_to_ttgpuirr   r#   rA   r'   r£   ÚttgpuirZadd_coalesceZadd_remove_layout_conversionsZadd_optimize_thread_localityr   Zadd_accelerate_matmulr4   r5   Zadd_optimize_epilogueZadd_optimize_dot_operandsZadd_hoist_layout_conversionsZadd_fuse_nested_loopsrœ   rŸ   r¡   r	   Úglobal_prefetchÚlocal_prefetchÚuse_async_copyr;   Zadd_stream_pipeliner&   Zadd_coalesce_async_copyÚlowerZinsert_instruction_sched_hintsZadd_reduce_data_duplicationr   Zadd_in_thread_transposeZadd_reorder_instructionsr   Zadd_block_pingpongr“   Zadd_canonicalize_pointersZadd_convert_to_buffer_opsZadd_fold_true_cmpir    r¢   Zadd_update_async_wait_count)r¤   r€   rp   r¥   r¨   r©   rª   r   r   r   r   Ú
make_ttgirÖ   sj    ÿ



zHIPBackend.make_ttgirc                 C   sb   | }t  |j¡}| ¡  tj |¡ tj |¡ tj	 
|¡ tj |¡ tj |¡ | |¡ |S rs   )r   r™   rš   r›   r   r§   r   rœ   Zadd_sccprž   Zadd_loop_aware_cserŸ   Z add_combine_tensor_select_and_ifr£   )Úsrcr€   rp   r¤   r¥   r   r   r   Ú	ttgir_opt  s    
zHIPBackend.ttgir_optc                    s(  | }t  |j¡}| ¡  d}tjj ||j|¡ tj	 
|¡ tj	 |¡ tj |¡ d}tjj ||j|¡ tj |¡ tj |¡ tj	 |¡ tj	 |¡ tj |¡ tj |¡ tj |¡ |j ¡ dkrätjj ||j|j¡ tjjsøtj |¡ tjj ||¡ | |¡ t  ¡  t ¡ }t !||¡‰ t "ˆ ¡ d}tjj#rJd}t $ˆ tj%|j|¡ t &ˆ |j¡ t 'ˆ d¡ t (ˆ dd¡ t (ˆ d	d¡ t (ˆ d
d¡ t (ˆ d|j)dk¡ dd„ ˆ  *¡ D ƒ}	|	d  +tj,¡ |	d  -dd|j.|j) › ¡ |	d  -d|j/› ¡ |j0rdnd}
|	d  -d|
¡ tjj#rN|	d  1d¡ |	d  2¡  t 3|	d ¡ tjj#r¤t4t5ƒj6d }t7|d ƒt7|d ƒt7|d ƒg}t 8ˆ |¡ n(|j9rÌ‡ fdd„|j9D ƒ}t 8ˆ |¡ t :ˆ tj;|jdg |j<¡ tjj=rþt >|	d ¡ |  ?d¡|d< t @ˆ ¡ t Aˆ ¡ t7ˆ ƒS )Nr   Tr:   Ú ú+xnackiô  Z__oclc_finite_only_optFZ__oclc_correctly_rounded_sqrt32Z__oclc_unsafe_math_optZ__oclc_wavefrontsize64r@   c                 S   s   g | ]}|  ¡ s|‘qS r   )Zis_declaration)rR   Úfnr   r   r   rU   `  r   z(HIPBackend.make_llir.<locals>.<listcomp>zamdgpu-flat-work-group-sizez1,zamdgpu-waves-per-euzpreserve-signr/   zdenormal-fp-math-f32rC   z
asanrtl.bczocml.bczockl.bcc                    s    g | ]\}}t  ˆ |¡r|‘qS r   )r   Zneed_extern_lib)rR   rS   Úpath©Zllvm_modr   r   rU   €  r   z
ttg.sharedr   )Br   r™   rš   r›   r   r   r§   Zadd_optimize_lds_usager   ÚconvertZadd_scf_to_cfZadd_index_to_llvmirZadd_allocate_shared_memoryZadd_to_llvmirrœ   rŸ   r    Zadd_cf_to_llvmirZadd_arith_to_llvmirr¢   r;   r«   Zlower_instruction_sched_hintsr&   r	   ÚcompilationZdisable_line_infoZllvmirZadd_di_scopeZadd_builtin_func_to_llvmirr£   r   Zinit_targetsZ	to_moduleZattach_target_tripleÚenable_asanZattach_datalayoutÚTARGET_TRIPLEZset_isa_versionZset_abi_versionZset_bool_control_constantrA   Zget_functionsZset_calling_convZCALLING_CONV_AMDGPU_KERNELZadd_fn_attrr#   r$   r6   Zadd_fn_target_featureZadd_fn_asan_attrZset_all_fn_arg_inregr   rG   rH   rJ   Zlink_extern_libsr(   Zoptimize_moduleZOPTIMIZE_O3r2   Zscalarize_packed_fopsZ#add_scalarize_packed_fops_llvm_passZget_int_attrZcleanup_bitcode_metadataZdisable_print_inline)r­   r€   rp   r¤   r¥   Zcustom_lds_sizeZ_HIPBackend__HIP_FTZrš   Útarget_featuresÚfnsZdenormal_moderN   Úpathsr   r³   r   Ú	make_llir#  s€    







ý


zHIPBackend.make_llirc              	   C   sx   t  d| ¡}t|ƒdksJ ‚|d |d< g }|jdkr@| d¡ t | tj|j	d||j
d¡}tjjrttd	ƒ t|ƒ |S )
Nz3define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)r   r   rS   Z	attentionzsink-insts-to-avoid-spillsr¯   Fz!// -----// AMDGCN Dump //----- //)ÚreÚfindallÚlenr;   Úappendr   Ztranslate_to_asmr   r·   r   r2   r	   Zdump_amdgcnÚprint)r­   r€   rp   ÚnamesÚflagsÚamdgcnr   r   r   Úmake_amdgcn‘  s    

zHIPBackend.make_amdgcnc                 C   s  d}t jjrd}t | |j|¡}t ¡ }t 	¡ ¾}t 	¡ b}t
|jdƒ}| |¡ W d   ƒ n1 sh0    Y  t |ddd|jd|jg¡ W d   ƒ n1 s¢0    Y  t
|jdƒ}	|	 ¡ }
W d   ƒ n1 sÖ0    Y  W d   ƒ n1 sô0    Y  |
S )	Nr¯   r°   Úwbz-flavorZgnuz-sharedz-oÚrb)r	   rµ   r¶   r   Zassemble_amdgcnr   rd   r˜   ÚtempfileÚNamedTemporaryFileÚopenrS   ÚwriteÚ
subprocessÚ
check_callÚread)r­   r€   rp   r¸   rh   Z	rocm_pathZtmp_outZtmp_inZfd_inZfd_outr‘   r   r   r   Ú
make_hsaco§  s    

(:DzHIPBackend.make_hsacoc                    s†   |t jkr0‡ ‡fdd„|d< ‡ ‡fdd„|d< n|t jkrL‡ ‡fdd„|d< ‡ ‡fdd„|d< ‡ ‡fd	d„|d
< ‡ ‡fdd„|d< d S )Nc                    s   ˆ  | |ˆ ¡S rs   )r¦   ©r­   r€   ©rp   rM   r   r   r   º  r   z'HIPBackend.add_stages.<locals>.<lambda>rž   c                    s   ˆ  | |ˆ ¡S rs   )r¬   rÏ   rÐ   r   r   r   »  r   Zttgirc                    s   ˆ  | |ˆ ¡S rs   )r®   rÏ   rÐ   r   r   r   ½  r   c                    s   ˆ  | |ˆ ¡S rs   )r»   rÏ   rÐ   r   r   r   ¾  r   Zllirc                    s   ˆ  | |ˆ ¡S rs   )rÄ   rÏ   rÐ   r   r   r   ¿  r   rÃ   c                    s   ˆ  | |ˆ ¡S rs   )rÎ   rÏ   rÐ   r   r   r   À  r   rh   )r   ZTRITONZGLUON)rM   Zstagesrp   r{   r   rÐ   r   Ú
add_stages¸  s    

zHIPBackend.add_stagesc                 C   s&   t jt ¡ dgdd}|› d| j› S )Nz	--versionrV   )ÚencodingrQ   )rË   Úcheck_outputrd   r˜   r   )rM   Úversionr   r   r   r^   Â  s    zHIPBackend.hash) r_   r`   ra   Ústaticmethodr   rf   rj   rJ   rq   r   r~   r   r‚   r   r   r„   r…   r   r   r’   r˜   r¦   r¬   r®   r»   rÄ   rÎ   rÑ   Ú	functoolsÚ	lru_cacher^   Ú__classcell__r   r   rl   r   rd   Z   s@   







=

m


rd   )Ztriton.backends.compilerr   r   r   Ztriton._C.libtritonr   r   r   r   Ztritonr	   Zdataclassesr
   Útypingr   r   r   Útypesr   rY   rÇ   r¼   rË   rÖ   Úpathlibr   r   r   r   r!   rd   r   r   r   r   Ú<module>   s"   <