
    &ThJ                         S SK JrJr  S SKJrJrJrJr  S SKJ	r	  S SK
JrJrJr  S SKJr  S SKrS SKrS SKrS SKrS SKrS SKrS SKJr  S\4S	 jrS
 r\	" SS9 " S S5      5       r " S S\5      rg)    )BaseBackend	GPUTarget)irpassesllvmamd)	dataclass)AnyDictTuple)
ModuleTypeN)Pathtargetc                     S $ )Nc                     g)N   r   r    )lhsTyperhsTypes     T/var/www/auris/envauris/lib/python3.13/site-packages/triton/backends/amd/compiler.py<lambda>min_dot_size.<locals>.<lambda>   s    I    r   r   s    r   min_dot_sizer      s    --r   c                 J    U S:X  a  SOSn[         R                  " SU5      S:H  $ )Ngfx94210TRITON_HIP_USE_BLOCK_PINGPONG)osgetenv)archdefaults     r   is_pingpong_enabledr&      s'    X%c3G994g>#EEr   T)frozenc                      \ rS rSr% Sr\\S'   Sr\\S'   Sr\\S'   Sr	\\S'   S	r
\\S
'   S	r\\S'   S	r\\S'   S	r\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\   \S'   Sr\\   \S'   Sr\\S'   Sr\\   \S'   Sr\\S'   Sr\\S '   S	r\\S!'   Sr\\S"'   Sr \\S#'   S	r!\\S$'   S%r"\\S&'   S'r#\\S('   S) r$S* r%Sr&g)+
HIPOptions      	num_warpsr   waves_per_eu   
num_stagesnum_ctasr   num_buffers_warp_specnum_consumer_groupsreg_dec_producerreg_inc_consumerNextern_libsr   cluster_dimsFdebugTsanitize_overflowr$   )fp8e5supported_fp8_dtypesr   deprecated_fp8_dtypesieeedefault_dot_input_precision)r<   allowed_dot_input_precisionsenable_fp_fusionlaunch_cooperative_gridmatrix_instr_nonkdimkpackallow_flush_denormmax_num_imprecise_acc_defaulthipbackend_namenoneinstruction_sched_variantc                    [        [        5      R                  S-  nU R                  c  0 O[	        U R                  5      nSU R
                  ;   d   SU R
                  ;   d  SU R
                  ;   a  SOSn[        R                  U SU5        U R
                  S:X  a  S	OU R                  n[        R                  U S
U5        SS/nU H  n[        X S3-  5      X&'   M     [        R                  U S[        UR                  5       5      5        U R                  S:  a   U R                  U R                  S	-
  -  S:X  d   S5       eg )Nlibgfx10gfx11gfx12    @   	warp_sizegfx950r   rB   ocmlocklz.bcr5   r   znum_warps must be a power of 2)r   __file__parentr5   dictr$   object__setattr__rB   strtupleitemsr,   )selfdefault_libdirr5   rP   rB   libsrJ   s          r   __post_init__HIPOptions.__post_init__G   s   h..6 ,,4b$t?O?O:P!TYY.'TYY2F'UYU^U^J^Bdf	4i8YY(*

4%0C">e3K#?@K 4k6G6G6I0JK~~!t~~!9K'LQR&R 	0/	0R&Rr   c           	          SR                  U R                  R                  5        VVs/ s H  u  pU SU 3PM     snn5      n[        R                  " UR                  S5      5      R                  5       $ s  snnf )N_-utf-8)join__dict__r[   hashlibsha256encode	hexdigest)r\   namevalkeys       r   hashHIPOptions.hashW   sa    hh9L9L9NO9NID4&#9NOP~~cjj12<<>> Ps   A7
)'__name__
__module____qualname____firstlineno__r,   int__annotations__r-   r/   r0   r1   r2   r3   r4   r5   rV   r6   rZ   r7   boolr8   r$   rY   r:   r   r;   r=   r>   r?   r@   rA   rB   rC   rD   rF   rH   r_   rn   __static_attributes__r   r   r   r)   r)      s"   IsL#JHc!"3"  ccK#L%#E4"t"D#'2%*2(*5:*'--/9 %*9!d!$)T) !#!E3N$$)*!3*L#& &,s+0 ?r   r)   c                     ^  \ rS rSr\S\4S j5       rS\SS4U 4S jjrS\4S jr	S r
S	 rS\\\4   4S
 jrS r\\R$                  " 5       S 5       5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       rS r\R$                  " 5       S 5       rSrU =r $ )
HIPBackend\   r   c                      U R                   S:H  $ )NrE   )backendr   s    r   supports_targetHIPBackend.supports_target^   s    ~~&&r   returnNc                 t   > [         TU ]  U5        [        UR                  [        5      (       d   eSU l        g )Nhsaco)super__init__
isinstancer$   rY   
binary_ext)r\   r   	__class__s     r   r   HIPBackend.__init__b   s.     &++s++++!r   c                 :   S[         R                  " SU R                  R                  5      0nU R                  R                  S;   aB  [	        [
        R                  5      nUR                  S15        [        [        U5      5      US'   SU;  a  [	        [
        R                  5      nU R                  R                  S;   a  UR                  1 Sk5        O-U R                  R                  S;   a  UR                  S	S
15        [        [        U5      5      US'   SU;  a  [         R                  " SS5      S:H  US'   UR                  [
        R                  R                  5        Vs0 s H  oUU;   d  M
  X   c  M  XQU   _M     sn5        [        S0 UD6$ s  snf )Nr$   TRITON_OVERRIDE_ARCH)gfx940gfx941r   tf32r>   r:   >   fp8e4b8fp8e4nvfp8e5b16rQ   r   r9   r?   TRITON_DEFAULT_FP_FUSIONr   r   )r"   r#   r   r$   setr)   r>   updaterZ   sortedr:   __dataclass_fields__keys)r\   optsargsr>   r:   ks         r   parse_optionsHIPBackend.parse_optionsg   s_   		"8$++:J:JKL ;;==+.z/V/V+W((//938@\9]3^D/0!-#&z'F'F#G {{#AA$++,NO!!h/$++Y,@A+08L1M+ND'(T)')yy1KS'QUX'XD#$)H)H)M)M)Ou)OAX\S\ZaeahZQQZ)Ouv!D!! vs   )	F6F=	Fc                     UR                   UR                  UR                  UR                  S   UR                  S   UR                  S   4$ )Nr   r   r.   )r,   r0   sharedr6   )r\   metadatas     r   pack_metadataHIPBackend.pack_metadata}   sO    OO!!!$!!!$!!!$
 	
r   c                 4    S[        U R                  5      0nU$ )Nr   )r   r   )r\   optionscodegen_fnss      r   get_codegen_implementation%HIPBackend.get_codegen_implementation   s    %|DKK'@Ar   c                     SSK Jn  SU0$ )Nr   )	libdeviceztriton.language.extra.libdevice)triton.language.extra.hipr   )r\   r   s     r   get_module_mapHIPBackend.get_module_map   s    719==r   c                 0    [         R                  " U5        g N)r   load_dialects)r\   ctxs     r   r   HIPBackend.load_dialects   s    #r   c                  H    [         R                  R                  SS5      S:H  $ )NAMDGCN_USE_BUFFER_OPSr    r   )r"   environgetr   r   r   use_buffer_opsHIPBackend.use_buffer_ops   s     zz~~5s;sBBr   c                     SS K nSn[        U S5      (       a  U R                  5       U:*  $ [        XR                  5      (       a2  [        U S5      (       a!  U R                  5       R                  5       U:*  $ g)Nr   i	ptr_rangeuntyped_storageF)torchhasattrr   r   Tensorr   size)argr   
MAX_INT_32s      r   is_within_2gbHIPBackend.is_within_2gb   sd    
3$$==?j00c<<((WS:K-L-L&&(--/:==r   c                 N    [         R                  " U 5      nSU ;   a  USS//-  nU$ )NSztt.pointer_rangerN   )r   
parse_attr)descrets     r   r   HIPBackend.parse_attr   s1    $$T*$;',--C
r   c                     [         R                  " X40 UD6n[        R                  5       (       a%  US:X  a  [        R	                  U 5      (       a  US-  nU$ )Ntensorr   )r   get_arg_specializationry   r   r   )r   tykwargsr   s       r   r   !HIPBackend.get_arg_specialization   sM    00CFC $$&&2>j>V>VWZ>[>[3JC
r   c                  |   [         R                  " S5      n U b"  [        U 5      nUR                  5       (       a  U$ [        [        5      R
                  S-  nUR                  5       (       a  U$ [        S5      nUR                  5       (       a  U$ [        S5      nUR                  5       (       a  U$ [        S5      e)NTRITON_HIP_LLD_PATHzllvm/bin/ld.lldz/opt/rocm/llvm/bin/ld.lldz/usr/bin/ld.lldzWROCm linker /opt/rocm/llvm/bin/ld.lld not found. Set 'TRITON_HIP_LLD_PATH' to its path.)r"   r#   r   is_filerT   rU   	Exception)lld_env_pathllds     r   path_to_rocm_lldHIPBackend.path_to_rocm_lld   s     yy!67#|$C{{}}
8n##&77;;==J./;;==J$%;;==Jqrrr   c                    [         R                  " U R                  5      nUR                  5         [        R
                  R                  U5        [        R                  R                  U5        [        R
                  R                  U5        [        R                  R                  U5        [        R                  R                  U5        [        R
                  R                  U5        [        R
                  R                  U5        [        R
                  R                  U5        [        R                  R                  U5        UR!                  U 5        U $ r   )r   pass_managercontextenable_debugr   commonadd_inlinerttiradd_rewrite_tensor_pointeradd_canonicalizeradd_combineadd_reorder_broadcastadd_cseadd_licmadd_symbol_dceadd_loop_unrollrun)modr   r   pms       r   	make_ttirHIPBackend.make_ttir   s    __S[[)
!!"%..r2''+#))"-b!r"$$R(##B'
s
r   c                 ^
   [         R                  " U R                  5      nUR                  5         [        R
                  R                  USUR                   3UR                  UR                  UR                  5        UR                  U 5        [         R                  " U R                  5      nUR                  5         [        R                  R                  U5        [        R                  R                  U5        [        R                  R                  U5        [         R                  R                  R#                  X2R                  UR$                  UR&                  5        [        R                  R                  U5        [         R                  R                  R)                  U5        [        R                  R+                  US5        [         R                  R                  R-                  U5        [/        [0        R2                  " SS5      5      n[/        [0        R2                  " SS5      5      nUR4                  S:X  a  S=pE[         R6                  " UR                  5      (       aj  UR8                  S:w  d   S	5       e[         R                  R                  R;                  X2R8                  XE5        [        R<                  R?                  U5        UR4                  RA                  5       S
:w  a3  [         R                  R                  RC                  X2R4                  5        [        R                  R+                  US5        [        R                  R                  U5        [        R                  RE                  U5        [         R6                  " UR                  5      (       a~  [         R                  R                  RG                  U5        [I        UR                  5      nU(       a9  UR8                  S:X  a)  [         R                  R                  RK                  U5        [L        RO                  5       (       a{  [         R                  R                  RQ                  U5        [        R<                  R?                  U5        [         R                  R                  RS                  X2R                  5        [        R<                  R?                  U5        [        R<                  RU                  U5        [        R<                  RW                  U5        UR                  U 5        U $ )Nzhip:TTRITON_HIP_GLOBAL_PREFETCHr    TRITON_HIP_LOCAL_PREFETCHzlocal-prefetchr   r   zTriton AMD backend pipeliner has been updated. We used to trigger software pipelining with num_stages == 0. Now it will not happen anymore; please update to use num_stages == 2 for equivalent behavior in the past.rG   r.   ),r   r   r   r   r   r   add_convert_to_ttgpuirr$   r,   rP   r0   r   ttgpuiradd_coalesceadd_remove_layout_conversionsadd_optimize_thread_localityr   add_accelerate_matmulrA   rB   add_optimize_epilogueadd_optimize_dot_operandsadd_hoist_layout_conversionsrt   r"   r#   rH   has_matrix_core_featurer/   add_stream_pipeliner   r   lowerinsert_instruction_sched_hintsadd_reduce_data_duplicationadd_reorder_instructionsr&   add_block_pingpongry   r   add_canonicalize_pointersadd_convert_to_buffer_opsr   r   )r   r   r   r   global_prefetchlocal_prefetchuse_block_pingpongs          r   
make_ttgirHIPBackend.make_ttgir   sV   __S[[)
**2gll^/DgFWFWY`YjYj+2+;+;	=
s__S[[)
##B'44R833B7

00\\7C_C_ahanano44R8

00400T:

77;bii(DcJKRYY'BCHI ,,0@@/00O&&w||44%%* Q .P Q*
 JJ2227I7I?kMM++B/,,224>JJ==bBcBcd00T:44R82226&&w||44JJ77;!4W\\!B!g&8&8A&=

""55b9$$&&JJ88<MM++B/JJ88\\J''+b!$$R(
s
r   c                    U n[         R                  " UR                  5      nUR                  5         [        R
                  R                  R                  XBR                  5        Sn[        R
                  R                  R                  XBR                  U5        [
        R                  R                  U5        [
        R                  R                  U5        [
        R                  R                  U5        Sn[        R
                  R                  R                  XBR                  U5        [
        R                  R!                  U5        [
        R                  R#                  U5        [
        R                  R%                  U5        [
        R                  R'                  U5        [
        R                  R!                  U5        [
        R                  R#                  U5        [
        R                  R)                  U5        UR*                  R-                  5       S:w  a>  [        R
                  R                  R/                  XBR                  UR0                  5        [2        R4                  R7                  SS5      S:X  a  [
        R8                  R;                  U5        [        R
                  R                  R=                  XF5        UR?                  U5        [@        RB                  " 5         [@        R                  " 5       n[@        RD                  " X75      n[        RF                  " U5        Sn	[2        R4                  R7                  SS5      S:X  a  S	n	[@        RH                  " U[        RJ                  UR                  U	5        [        RL                  " XR                  5        [        RN                  " US
5        [        RP                  " USS5        [        RP                  " USS5        [        RP                  " USS5        [        RP                  " USURR                  S:H  5        URU                  5        V
s/ s H  oRW                  5       (       a  M  U
PM     nn
US   RY                  [        RZ                  5        US   R]                  SSUR^                  URR                  -   35        US   R]                  SUR`                   5        URb                  (       a  SOSnUS   R]                  SU5        [2        R4                  R7                  SS5      S:X  a'  US   Re                  S	5        US   Rg                  5         [        Rh                  " US   5        [2        R4                  R7                  SS5      S:X  a\  [k        [l        5      Rn                  S-  n[q        US-  5      [q        US-  5      [q        US-  5      /n[@        Rr                  " X5        OeURt                  (       aT  URt                   VVs/ s H%  u  nn[        Rv                  " X5      (       d  M#  UPM'     nnn[@        Rr                  " X5        [@        Rx                  " U[@        Rz                  UR                  S/ UR|                  5        U R                  S5      US'   [        R                  " U5        [        R                  " U5        [q        U5      $ s  sn
f s  snnf )Nr   TrG   TRITON_DISABLE_LINE_INFOr     TRITON_ENABLE_ASANr   +xnacki  __oclc_finite_only_optF__oclc_correctly_rounded_sqrt32__oclc_unsafe_math_opt__oclc_wavefrontsize64rO   zamdgpu-flat-work-group-sizez1,zamdgpu-waves-per-euzpreserve-signr<   zdenormal-fp-math-f32rJ   z
asanrtl.bczocml.bczockl.bcz
ttg.sharedr   )Br   r   r   r   r   r   r   %add_decompose_unsupported_conversionsr$   add_optimize_lds_usageconvertadd_scf_to_cfadd_index_to_llvmiradd_allocate_shared_memoryadd_to_llvmirr   r   r   add_cf_to_llvmiradd_arith_to_llvmirr   rH   r   lower_instruction_sched_hintsr/   r"   r   r   llvmiradd_di_scopeadd_builtin_func_to_llvmirr   r   init_targets	to_moduleattach_target_tripleattach_datalayoutTARGET_TRIPLEset_isa_versionset_abi_versionset_bool_control_constantrP   get_functionsis_declarationset_calling_convCALLING_CONV_AMDGPU_KERNELadd_fn_attrr,   r-   rC   add_fn_target_featureadd_fn_asan_attrset_all_fn_arg_inregr   rT   rU   rY   link_extern_libsr5   need_extern_liboptimize_moduleOPTIMIZE_O3r?   get_int_attrcleanup_bitcode_metadatadisable_print_inline)srcr   r   r   r   custom_lds_size_HIPBackend__HIP_FTZr   llvm_modtarget_featuresfnfnsdenormal_moder]   pathsrk   paths                    r   	make_llirHIPBackend.make_llir  s   __S[[)


@@\\R 

11"llOT$$R(**2.11"5 	

((\\9E''+b!''+**2.''+b!$$R(,,224>JJ<<RwOaOab::>>4c:cAMM&&r*

55bD
s 	,,.>>#/  *::>>.4;&Ox):):GLL/Z 	Hll3Hc*%%h0H%P%%h0QSWX%%h0H%P%%h0H'J[J[_aJab %224P4b<M<M<Or4PA > >?A8Bw?P?PQXQbQb?b>c:de 	A0W5I5I4JL+2+E+E6A1=A::>>.4;F((2F##%
 	  Q(::>>.4;!(^22U:NN\12NY./NY./E
 !!(2  .5.A.Ai.AltTSEXEXYaEhT.AEi!!(2Xt'7'7r2wOgOgh !--l;$$X. 	  *8}Y Q@ js   ["[	"[/[c           	      V   [         R                  " SU 5      n[        U5      S:X  d   eUS   US'   [        R                  " U [
        R                  UR                  S/ UR                  S5      n[        R                  R                  SS5      S	:X  a  [        S
5        [        U5        U$ )Nz3define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)r   r   rk   r  FAMDGCN_ENABLE_DUMPr    r   z!// -----// AMDGCN Dump //----- //)refindalllenr   translate_to_asmr   r  r$   r?   r"   r   r   print)r/  r   r   namesamdgcns        r   make_amdgcnHIPBackend.make_amdgcny  s    
 

QSVW5zQ 8&&sC,=,=w||RQSU\UmUmotu::>>.4;56&Mr   c                    Sn[         R                  R                  SS5      S:X  a  Sn[        R                  " XR
                  U5      n[        R                  5       n[        R                  " 5        n[        R                  " 5        n[        UR                  S5       nUR                  U5        S S S 5        [        R                  " USSS	UR                  S
UR                  /5        S S S 5        [        UR                  S5       n	U	R                  5       n
S S S 5        S S S 5        W
$ ! , (       d  f       N= f! , (       d  f       NX= f! , (       d  f       N:= f! , (       d  f       W
$ = f)Nr  r  r    r   r  wbz-flavorgnuz-sharedz-orb)r"   r   r   r   assemble_amdgcnr$   ry   r   tempfileNamedTemporaryFileopenrk   write
subprocess
check_callread)r/  r   r   r3  r   	rocm_pathtmp_outtmp_infd_infd_outr   s              r   
make_hsacoHIPBackend.make_hsaco  s   ::>>.4;&O##CG//1	((*g,,.&&++t,KK& -%%y)UIv{{\`bibnbn&op / gllD)Vkkm * + 
 -, /. *) +* 
sT   3E	D= D,29D=+E	EE,
D:6D==
E	E
E	E
E.c                 l   ^ ^ UU 4S jUS'   UU 4S jUS'   UU 4S jUS'   UU 4S jUS'   UU 4S	 jUS
'   g )Nc                 (   > TR                  XT5      $ r   )r   r/  r   r   r\   s     r   r   'HIPBackend.add_stages.<locals>.<lambda>      t~~cW/Ur   r   c                 (   > TR                  XT5      $ r   )r   r[  s     r   r   r\        w0Wr   ttgirc                 (   > TR                  XT5      $ r   )r9  r[  s     r   r   r\    r]  r   llirc                 (   > TR                  XT5      $ r   )rD  r[  s     r   r   r\    s    1A1A#QX1Yr   rC  c                 (   > TR                  XT5      $ r   )rW  r[  s     r   r   r\    r_  r   r   r   )r\   stagesr   s   ` `r   
add_stagesHIPBackend.add_stages  s1    UvWwUvYxWwr   c                 v    [         R                  " [        R                  5       S/SS9nU SU R                   3$ )Nz	--versionrd   )encodingrc   )rO  check_outputry   r   r   )r\   versions     r   rn   HIPBackend.hash  s8    )):+F+F+H+*Vahi!DKK=))r   )r   )!rp   rq   rr   rs   staticmethodr   r}   r   r
   r   r   r   r   rY   r   r   r   	functools	lru_cacher   r   r   r   r   r   r   r9  rD  rW  rf  rn   rw   __classcell__)r   s   @r   ry   ry   \   sc   '	 ' '"y "T "
"S ",
>S*_ 5 >
 C  C       s s&   3 3j i iV     X * *r   ry   )triton.backends.compilerr   r   triton._C.libtritonr   r   r   r   dataclassesr	   typingr
   r   r   typesr   rg   rK  r"   r=  rO  rn  pathlibr   r   r&   r)   ry   r   r   r   <module>rw     sn    ; 5 5 ! # #    	 	   . .
F
 $?? ?? ??DG* G*r   