a
    kh?                     @  s  d dl mZ ddlmZ ddlmZ ddlmZ ddd	d
ZddddZej	edd Z
ej	eeddd Zej	eedduddZej	edvddZedd Zedd Zedd Zedd Zedd  Zed!d" Zed#d$ Zej	eejd%d&d'd(dwd*d+Zej	eejd,d-d.dxd/d0Zed1d2 Zed3d4 Zed5d6 Zed7d8 Zej	eejd9d&d'd(dyd:d;Zej	eejd<d-d.dzd=d>Zed?d@ ZdddAdBdCZ ej	eejdDdEdFd{ddGdHdDZ!edIdJ Z"ej	eedKd|dLdMZ#edNdO Z$ej	eedPd}dQdRZ%ej	eej&dSdEdFd~ddGdTdSZ'edUdV Z(ej	ee&dWddXdWZ)edddYdZd[Z*eddd\d]Z+eddd^d_d`Z,eddddadbdcZ-eddej.fddddddedfZ/edej.fdddgdhdiZ0eddddjdkdlZ1edej.fdddgdmdnZ2dodp Z3ej	eddqdrZ4edsdt Z5dS )    )annotations   )jit   )core)mathzcore.constexpr)ic                 C  s4   d}t | j}|dkr*|dL }|d7 }qt |S )Nr   r   )r   	constexprvalue)r   log2n r   F/var/www/auris/lib/python3.9/site-packages/triton/language/standard.py_log2
   s    
r   c                 C  s$   | j }t||d @ dko |dkS Nr   r   )r
   r   r	   )r   r   r   r   r   _is_power_of_two   s    r   c                 C  s   | | d | S )z
    Computes the ceiling division of :code:`x` by :code:`div`

    :param x: the input number
    :type x: Block
    :param div: the divisor
    :type div: Block
    r   r   )xdivr   r   r   cdiv   s    r   sigmoidc                 C  s   ddt |    S )Nr   )r   exp)r   r   r   r   r   +   s    softmaxNFc                 C  sJ   |d u rd}n|}| t | ||d }t|}t|||d}t|||S )Nr   	keep_dims)maxr   r   sumZfdiv)r   dimr   Zieee_rounding_dimznumZdenr   r   r   r   2   s    
c                 C  s   t j| | jg|dS )zn
    Returns a contiguous flattened view of :code:`x`.

    :param x: the input tensor
    :type x: Block
    )can_reorder)r   reshapenumel)r   r    r   r   r   ravel@   s    	r#   c                 C  sX   | | | }|| }|| }|| }t || |}|| }|||  }	|| }
|	|
fS )a  
    Transforms the indices of a row-major `size_i * size_j` matrix into
    the indices of a column-major matrix for each group of `size_g` rows.

    For example, for :code:`size_i = size_j = 4` and :code:`size_g = 2`, it will
    transform ::

        [[0 , 1 , 2 , 3 ],
         [4 , 5 , 6 , 7 ],
         [8 , 9 , 10, 11],
         [12, 13, 14, 15]]

    into ::

        [[0, 2,  4 , 6 ],
         [1, 3,  5 , 7 ],
         [8, 10, 12, 14],
         [9, 11, 13, 15]]
    r   minimum)r   jZsize_iZsize_jZsize_gZijZsize_gjZgroup_idZoff_iZnew_iZnew_jr   r   r   	swizzle2dL   s    r'   c                 C  s   t | d|S )a'  
    Returns a tensor filled with the scalar value 0 for the given :code:`shape` and :code:`dtype`.

    :param shape: Shape of the new array, e.g., (8, 16) or (8, )
    :type shape: tuple of ints
    :param dtype: Data-type of the new array, e.g., :code:`tl.float16`
    :type dtype: DType
    r   )r   full)shapedtyper   r   r   zerost   s    
r+   c                 C  s   t | j| jS )z
    Returns a tensor of zeros with the same shape and type as a given tensor.

    :param input: input tensor
    :type input: Tensor
    )r+   r)   r*   )inputr   r   r   
zeros_like   s    r-   c           	      C  sJ   |r| |ko||k }nd}| |kp$|}t || |}t |||}||fS NFr   where)	value1index1value2index2tie_break_lefttiegtZv_retZi_retr   r   r   _argmax_combine   s    r8   c                 C  s   t | |||dS NTr8   r1   r2   r3   r4   r   r   r   _argmax_combine_tie_break_left   s    r<   c                 C  s   t | |||dS r.   r:   r;   r   r   r   _argmax_combine_tie_break_fast   s    r=   c                 C  s   t | |S N)r   maximumabr   r   r   _elementwise_max   s    rC   r?   return_indicesreturn_indices_tie_break_left)Zreturn_indices_argtie_break_argTc                 C  s   t | } |r8|r$t j| |t|dS t j| |t|dS nft | jjt dk rt | j rn| 	t j
} n| j sJ d| 	t j} t j| |t|dS d S Nr       z"Expecting input to be integer type)r   _promote_bfloat16_to_float32_reduce_with_indicesr<   r=   r	   r*   primitive_bitwidthis_floatingtofloat32is_intint32reducerC   r,   axisrD   rE   r   r   r   r   r      s    
r   zmaximum indexr5   )rF   c                 C  s   t | |d||d\}}|S NT)rD   rE   r   )r   r,   rS   r5   r   _retr   r   r   argmax   s    rX   c           	      C  sJ   |r| |ko||k }nd}| |k p$|}t || |}t |||}||fS r.   r/   )	r1   r2   r3   r4   r5   r6   ltZ	value_retZ	index_retr   r   r   _argmin_combine   s    rZ   c                 C  s   t | |||dS r9   rZ   r;   r   r   r   _argmin_combine_tie_break_left   s    r\   c                 C  s   t | |||dS r.   r[   r;   r   r   r   _argmin_combine_tie_break_fast   s    r]   c                 C  s   t | |S r>   r$   r@   r   r   r   _elementwise_min   s    r^   r%   c                 C  s   t | } |r8|r$t j| |t|dS t j| |t|dS n`t | jjdk rt | j rh| 	t j
} n| j szJ d| 	t j} t j| |t|dS d S rG   )r   rI   rJ   r\   r]   r	   r*   rK   rL   rM   rN   rO   rP   rQ   r^   rR   r   r   r   min   s    
r_   zminimum indexc                 C  s   t | |d||d\}}|S rT   )r_   rU   r   r   r   argmin   s    r`   c                 C  s   | | S r>   r   r@   r   r   r   _sum_combine  s    ra   )in_dtyper*   c                 C  sX   t |}|d ur|S d }|  r8| jdk r2t jnd }n|  rT| jdk rPt jnd }|S )NrH   )r   _unwrap_if_constexprZis_int_signedZint_bitwidthrP   Zis_int_unsignedZuint32)rb   r*   	out_dtyper   r   r   _pick_sum_dtype
  s    
re   r   r*   )Z	dtype_arg)r*   c                 C  s0   t | j|}|d ur| |} tj| |t|dS )Nr   )re   r*   rM   r   rQ   ra   )r,   rS   r   r*   rd   r   r   r   r     s    
c                 C  s   | |A S r>   r   r@   r   r   r   _xor_combine%  s    rf   zxor sumc                 C  s&   t | jj d t j| |t|dS )Nz#xor_sum only supported for integersr   )r   static_asserttypescalarrO   rQ   rf   r,   rS   r   r   r   r   xor_sum-  s    rk   c                 C  s   | |B S r>   r   )r   yr   r   r   _or_combine8  s    rm   Z	reduce_ofc                 C  s&   t | jj d t j| |t|dS )Nz%reduce_of only supported for integersr   )r   rg   rh   ri   rO   rQ   rm   rj   r   r   r   	reduce_or=  s    rn   cumsumc                 C  s8   t | } t| j|}|d ur(| |} t | |t|S r>   )r   rI   re   r*   rM   associative_scanra   )r,   rS   reverser*   rd   r   r   r   ro   H  s
    

c                 C  s   | | S r>   r   r@   r   r   r   _prod_combineZ  s    rr   cumprodc                 C  s   t | } t | |t|S r>   )r   rI   rp   rr   )r,   rS   rq   r   r   r   rs   _  s    
)n_dimsr&   c                 C  s:   t dd}t |dg| | d  dg dg|  }|S )Nr   r   r   )r   Zaranger!   )rt   r&   arr   r   r   
_indicatork  s    *rv   c           
      C  sz   t | j}tj| jjdd}| j|dd}|t||d | dA }|j| jdd}t||}t	| |k||A k|| }	|	S )NTZbitwidthsignedZbitcastr   )
r   r"   r   get_int_dtyper*   rK   rM   rk   rv   r0   )
r   flipr   rt   idtypeixZiyrl   Zis_rightrW   r   r   r   _compare_and_swapr  s    

r~   )stageorderc                 C  sF   |dkrt t| j|}n|}t|D ]}t| ||d | } q(| S )zb
    order_type 0 == ascending
    order_type 1 == descending
    order_type 2 == alternating
    r   r   )rv   r   r"   r   static_ranger~   )r   r   r   r{   r   r   r   r   _bitonic_merge_hypercube  s    r   )r   r   rt   c                 C  s6   t | dgt| j }t|||}t || j} | S )Nr   )r   r!   r   r"   r   r)   )r   r   r   rt   hr   r   r   _bitonic_merge  s    r   kr   
descendingc           
      C  s0  |du rt | jd n|}t|t | jd kd t| j| }|du rN|nt|}t| j}t| dg| }td|d D ]}	t||	|	|k rdn|}qt|d |d D ]V}	|rt	|t|jd | dnt
|t|jd | d}t|||	|k rdn|}qt|| jdd d| g } | S )ai  
    Sorts a tensor along a specified dimension.

    :param x: The input tensor to be sorted.
    :type x: Tensor
    :param dim: The dimension along which to sort the tensor. If None, the tensor is sorted along the last dimension. Currently, only sorting along the last dimension is supported.
    :type dim: int, optional
    :param k: the number of top elements to select. If none, assume k = x.shape[dim]
    :type k: int, optional
    :param descending: If set to True, the tensor is sorted in descending order. If set to False, the tensor is sorted in ascending order.
    :type descending: bool, optional
    Nr   +only minor dimension is currently supportedr   )rS   )lenr)   r   rg   r   r"   r!   r   r   r   r_   )
r   r   r   r   r   Zlog_nZlog_krt   r   r   r   r   r   	sort_impl  s    
8 r   r   r   c                 C  s   t | ||dS )Nr   r   )r   r   r   r   r   r   sort  s    r   )r   r   c                 C  s   t | ||ddS )NTr   r   )r   r   r   r   r   r   topk  s    r   c                 C  sP   |d u rt | jd n|}t|t | jd kd t| jd }t| |||S )Nr   r   r   )r   r)   r   rg   r   r   )r   r   r   r   rt   r   r   r   bitonic_merge  s    r   c                 C  sF   t | } t |}| d u r(t|d } | dk r<| t|7 } t | S r   )r   rc   r   r	   )r   r)   r   r   r   _get_flip_dim  s    

r   c                 C  s   t t| j |ko |t| jk  t|| j}t t| j|  t| j| }t j| jj	dd}t 
| j|dd| jd| dg|  | j|d d  }t |D ]}|t||| dA }qt 
|| jj| jdd} | S )z
    Flips a tensor `x` along the dimension `dim`.

    :param x: the first input tensor
    :type x: Block
    :param dim: the dimension to flip along
    :type dim: int
    Trw   ry   Nr   r   )r   rg   r   r)   r   r   r   rz   r*   rK   r!   rM   r   rk   )r   r   r   Zstepsr|   rl   r   r   r   r   r{     s    $<r{   c                 C  sH   t | |}t|jdkr|S t ||jdd d|jd  g S dS )a7  
    Interleaves the values of two tensors along their last dimension. The two tensors must have the same shape.
    Equivalent to `tl.join(a, b).reshape(a.shape[:-1] + [2 * a.shape[-1]])`

    :param a: The first input tensor.
    :type a: Tensor
    :param b: The second input tensor.
    :type b: Tensor
    r   Nr   )r   joinr   r)   r!   )rA   rB   cr   r   r   
interleave  s    r   )NFF)F)NFTF)TF)NFTF)TF)NFN)NF)F)r   FN)r   F)N)N)6
__future__r   Zruntime.jitr    r   r   r   r   Z_tensor_member_fnr   Z_add_math_1arg_docstrr   r   r#   r'   r+   r-   r8   r<   r=   rC   Z_add_reduction_docstrr   rX   rZ   r\   r]   r^   r_   r`   ra   re   r   rf   rk   rm   rn   Z_add_scan_docstrro   rr   rs   rv   r~   r   r   ZCONSTEXPR_0r   r   r   r   r   r{   r   r   r   r   r   <module>   s   	


'










	


	(
