o
    `Zh                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
 d dlZd dlmZ d dlmZmZ d dlmZmZmZ g dZg dZee Zg dZed	g Zee Zed	g Zd
dgZdge dg e d	g Zdd Zdd Zdd Z dd Z!dd Z"dd Z#dd Z$dd Z%dd Z&d d! Z'd"d# Z(d7d$ee fd%d&Z)d8d'ej*d(eeej+f fd)d*Z,d'e-d(ej.fd+d,Z/d(e-fd-d.Z0d/d0 Z1d9d2d3Z2d9d4d5Z3ej4j5e2  e3 d6Z6dS ):    N)_path_to_binary)RandomState)OptionalUnion)TensorWrapperreinterprettype_canonicalisation_dict)int8Zint16Zint32Zint64)uint8Zuint16uint32Zuint64)Zfloat16float32Zfloat64bfloat16Zfloat8_e4m3fnZfloat8_e5m2boolr
   c                   C   s   t jdddkS )NZTRITON_INTERPRET01)osenvironget r   r   G/var/www/auris/lib/python3.10/site-packages/triton/_internal_testing.pyis_interpreter      r   c                   C   s   t  rd S tjjj S N)r   tritonZruntimeZdriveractiveget_current_targetr   r   r   r   r      s   r   c                  C      t  } | d u r	dS | jdkS )NFcudar   backendtargetr   r   r   is_cuda#      r"   c                   C   s   t  otj d dkS )Nr   	   )r"   torchr   get_device_capabilityr   r   r   r   	is_hopper(   s   r'   c                  C   r   )NFhipr   r    r   r   r   is_hip,   r#   r)   c                  C   s&   t  } | d u s| jdkrdS | jdkS )Nr(   FZgfx90ar   r   archr    r   r   r   is_hip_mi2001      
r,   c                  C   &   t  } | d u s| jdkrdS | jdv S )Nr(   F)Zgfx940Zgfx941Zgfx942r*   r    r   r   r   is_hip_mi3008   r-   r/   c                  C   r.   )Nr(   FZgfx950r*   r    r   r   r   is_hip_mi350?   r-   r0   c                   C   s   t  pt pt S r   )r,   r/   r0   r   r   r   r   is_hip_cdnaF   r   r1   c                  C   r   )NFZxpur   r    r   r   r   is_xpuJ   r#   r2   c                  C   s   t  } | d u r	dS t| jS )N )r   strr+   r    r   r   r   get_archO   r#   r5   rsc                 C   s8  t | tr| f} |du rtdd}|tt v rOttt|}|du r&|jnt	||j}|du r3|j	nt||j	}tt|}|j
||| |d}d||dk< |S |rad|v ra|j
dd	| tjd}|S |tv ro|dd| |S |d
kr|dd| ddtd@ dS |dv r|dd| dkS td| )zp
    Override `rs` if you're calling this function twice and don't want the same
    result for both calls.
    N   )seed)dtype   r   float8   (   r   r   r   l      )r   Zint1Zbool_g        zUnknown dtype )
isinstanceintr   
int_dtypesuint_dtypesnpiinfogetattrminmaxrandintr	   float_dtypesnormalastypeviewr   RuntimeError)shapeZ	dtype_strr6   lowhighrC   r9   xr   r   r   numpy_randomT   s,   


*rQ   rP   returnc                 C   s   | j j}|tv r"|d}| tt|}ttj	||dtt
|S |r5d|v r5ttj	| |dtt
|S |dkrF|dkrFtj	| |d S tj	| |dS )z
    Note: We need dst_type because the type of x can be different from dst_type.
          For example: x is of type `float32`, dst_type is `bfloat16`.
          If dst_type is None, we infer dst_type from x.
    u)devicer;   r   r   )r9   namerA   lstriprJ   rD   rB   r   r%   Ztensortlr   )rP   rT   Zdst_typetZsigned_type_nameZx_signedr   r   r   	to_tritonr   s   
rY   c                 C   s   t t|  S r   )rW   Z	str_to_tyr   rP   r   r   r   str_to_triton_dtype   s   r[   c                 C   sL   t | tjjr
| jS t | tjrtdt| }|	dS t
dt|  )Nz^torch\.(\w+)$r:   znot a triton or torch dtype: )r>   r   languager9   rU   r%   rematchr4   group	TypeErrortype)r9   mr   r   r   torch_dtype_name   s   
rc   c                 C   sl   t | tr| j  ttt| j	S t | t
jr/| j	t
ju r)|    S |   S td|  )Nz Not a triton-compatible tensor: )r>   r   basecpunumpyrJ   rD   rB   rc   r9   r%   Tensorr   float
ValueErrorrZ   r   r   r   to_numpy   s   
 rj   Fc                 C   sn   t  rdS t s
dS td\}}| rdnd}ttt|d}t|dks*J |tj	
 d d	ko6||kS )
NTFZptxas)   r   )rk      .   r   r$   )r   r"   r   tuplemapr?   splitlenr%   r   r&   )
byval_only_Zcuda_versionZmin_cuda_versionZcuda_version_tupler   r   r   supports_tma   s   ru   c                 C   s   | rdS dS )NzURequires __grid_constant__ TMA support (NVIDIA Hopper or higher, CUDA 12.0 or higher)zLRequires advanced TMA support (NVIDIA Hopper or higher, CUDA 12.3 or higher)r   )rs   r   r   r   tma_skip_msg   s   rv   )reason)NNNr   )F)7r   r]   rf   rB   r%   r   Ztriton.languager\   rW   Ztriton.backends.nvidia.compilerr   ZpytestZnumpy.randomr   typingr   r   Ztriton.runtime.jitr   r   r   r@   rA   Zintegral_dtypesrH   Zfloat_dtypes_with_bfloat16ZdtypesZdtypes_with_bfloat16Ztorch_float8_dtypesZtorch_dtypesr   r   r"   r'   r)   r,   r/   r0   r1   r2   r5   rQ   Zndarrayrg   rY   r4   r9   r[   rc   rj   ru   rv   markZskipifZrequires_tmar   r   r   r   <module>   sN    

 

