a
    gh                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
Z
d dlmZ d dlmZmZ d dlmZmZmZ g dZg dZee Zg dZed	g Zee Zed	g Zd
dgZdge dg e d	g Zeeeh d Zdd Z dd Z!dd Z"dd Z#dd Z$dd Z%dd Z&dd Z'dd  Z(d!d" Z)d#d$ Z*d%d& Z+d@ee d'd(d)Z,dAej-eeej.f d*d+d,Z/e0ej1d*d-d.Z2e0d/d0d1Z3d2d3 Z4dBd5d6Z5dCd7d8Z6e
j7j8e5  e6 d9Z9e:e:d:d;d<Z;eej.ej<j=jf ej.d=d>d?Z>dS )D    N)knobs)RandomState)OptionalUnion)TensorWrapperreinterprettype_canonicalisation_dict)int8int16int32int64)uint8Zuint16uint32uint64)float16float32float64bfloat16Zfloat8_e4m3fnZfloat8_e5m2boolr   >   r   r   r   c                   C   s   t jdddkS )NZTRITON_INTERPRET01)osenvironget r   r   F/var/www/auris/lib/python3.9/site-packages/triton/_internal_testing.pyis_interpreter   s    r   c                   C   s   t  r
d S tjjj S N)r   tritonruntimeZdriveractiveget_current_targetr   r   r   r   r!      s    r!   c                  C   s   t  } | d u rdS | jdkS )NFcudar!   backendtargetr   r   r   is_cuda$   s    r'   c                   C   s   t  otj d dkS )Nr   	   )r'   torchr"   get_device_capabilityr   r   r   r   	is_hopper)   s    r+   c                  C   s   t  } | d u rdS | jdkS )NFhipr#   r%   r   r   r   is_hip-   s    r-   c                  C   s"   t  } | d uo | jdko | jdkS )Nr,   Zgfx90ar!   r$   archr%   r   r   r   is_hip_cdna22   s    r0   c                  C   s"   t  } | d uo | jdko | jdkS )Nr,   Zgfx942r.   r%   r   r   r   is_hip_cdna37   s    r1   c                  C   s"   t  } | d uo | jdko | jdkS )Nr,   Zgfx950r.   r%   r   r   r   is_hip_cdna4<   s    r2   c                  C   s,   t  } t| j | d uo*| jdko*d| jv S )Nr,   Zgfx12)r!   printr/   r$   r%   r   r   r   is_hip_gfx12A   s    
r4   c                   C   s   t  pt pt S r   )r0   r1   r2   r   r   r   r   is_hip_cdnaG   s    r5   c                  C   s   t  } | d u rdS | jdkS )NFZxpur#   r%   r   r   r   is_xpuK   s    r6   c                  C   s   t  } | d u rdS t| jS )N )r!   strr/   r%   r   r   r   get_archP   s    r9   )rsc                 C   s@  t | tr| f} |du r"tdd}|tt v rttt|}|du rL|jn
t	||j}|du rf|j	n
t||j	}tt|}|j
||| |d}d||dk< |S |rd|v r|j
dd	| tjd}|S |tv r|dd| |S |d
kr|dd| ddtd@ dS |dv r.|dd| dkS td| dS )zp
    Override `rs` if you're calling this function twice and don't want the same
    result for both calls.
    N   )seed)dtype   r   float8   (   r   r   r   l      )r   Zint1Zbool_g        zUnknown dtype )
isinstanceintr   
int_dtypesuint_dtypesnpiinfogetattrminmaxrandintr	   float_dtypesnormalastypeviewr   RuntimeError)shapeZ	dtype_strr:   lowhighrG   r=   xr   r   r   numpy_randomU   s,    



*
rU   )rT   returnc                 C   s   | j j}|tv rD|d}| tt|}ttj	||dtt
|S |rjd|v rjttj	| |dtt
|S |dkr|dkrtj	| |d S tj	| |dS dS )z
    Note: We need dst_type because the type of x can be different from dst_type.
          For example: x is of type `float32`, dst_type is `bfloat16`.
          If dst_type is None, we infer dst_type from x.
    u)devicer?   r   r   N)r=   namerE   lstriprN   rH   rF   r   r)   Ztensortlr   )rT   rX   Zdst_typetZsigned_type_nameZx_signedr   r   r   	to_tritons   s    
r]   c                 C   s   t t|  S r   )r[   Z	str_to_tyr   rT   r   r   r   str_to_triton_dtype   s    r_   )rV   c                 C   sP   t | tjjr| jS t | tjr:tdt| }|	dS t
dt|  d S )Nz^torch\.(\w+)$r>   znot a triton or torch dtype: )rB   r   languager=   rY   r)   rematchr8   group	TypeErrortype)r=   mr   r   r   torch_dtype_name   s    
rg   c                 C   sp   t | tr*| j  ttt| j	S t | t
jr^| j	t
ju rR|    S |   S td|  d S )Nz Not a triton-compatible tensor: )rB   r   basecpunumpyrN   rH   rF   rg   r=   r)   Tensorr   float
ValueErrorr^   r   r   r   to_numpy   s    
 rn   Fc                 C   sl   t  r
dS t sdS tjjj}| r&dnd}ttt|	d}t
|dksRJ |tj d dkoj||kS )	NTF)   r   )ro      .   r   r(   )r   r'   r   ZnvidiaZptxasversiontuplemaprC   splitlenr)   r"   r*   )
byval_onlyZcuda_versionZmin_cuda_versionZcuda_version_tupler   r   r   supports_tma   s    
ry   c                 C   s   | rdS dS d S )NzURequires __grid_constant__ TMA support (NVIDIA Hopper or higher, CUDA 12.0 or higher)zLRequires advanced TMA support (NVIDIA Hopper or higher, CUDA 12.3 or higher)r   )rx   r   r   r   tma_skip_msg   s    rz   )reason)sizealignc                 C   s   t j| t jddS )Nr"   )r=   rX   )r)   emptyr	   )r|   r}   _r   r   r   default_alloc_fn   s    r   )r\   rV   c                 C   s   t | tjjjr| jS | S r   )rB   r   r   jitr   rh   )r\   r   r   r   unwrap_tensor   s    r   )NNN)N)F)F)?r   ra   rj   rF   r)   r   Ztriton.languager`   r[   r   ZpytestZnumpy.randomr   typingr   r   Ztriton.runtime.jitr   r   r   rD   rE   Zintegral_dtypesrL   Zfloat_dtypes_with_bfloat16ZdtypesZdtypes_with_bfloat16Ztorch_float8_dtypesZtorch_dtypessortedsetZ
tma_dtypesr   r!   r'   r+   r-   r0   r1   r2   r4   r5   r6   r9   rU   Zndarrayrk   r]   r8   r=   r_   rg   rn   ry   rz   markZskipifZrequires_tmarC   r   r   r   r   r   r   r   r   <module>   sT   



