o
    cZhj8                     @  s  d dl mZ ddlmZ ddlmZ ddlmZ ddd	d
ZddddZej	edd Z
ej	eeddd Zej	eeddeddZej	ededdZedd Zedd Zedd Zedd Zedd Zed d! Zed"d# Zej	eejd$d%d&d'dfd*d+Zej	eejd,d-d.dgd/d0Zed1d2 Zed3d4 Zed5d6 Zed7d8 Zej	eejd9d%d&d'dfd:d;Zej	eejd<d-d.dgd=d>Zed?d@ ZdhdCdDZ ej	eejdEdBdFdidjdGdEZ!edHdI Z"ej	eedJdkdKdLZ#ej	ee$dMdldNdMZ%edOdP Z&ej	ee$dQdldRdQZ'edmdTdUZ(edndXdYZ)ej	ed(ej*fdod\d]Z+d^d_ Z,ej	edpd`daZ-edbdc Z.d(S )q    )annotations   )jit   )core)mathicore.constexprc                 C  s4   d}| j }|dkr|dL }|d7 }|dks	t|S )Nr   r   valuer   	constexpr)r   log2n r   G/var/www/auris/lib/python3.10/site-packages/triton/language/standard.py_log2
   s   
r   c                 C  s$   | j }t||d @ dko|dkS )Nr   r   r
   )r   r   r   r   r   _is_power_of_two   s   r   c                 C  s   | | d | S )z
    Computes the ceiling division of :code:`x` by :code:`div`

    :param x: the input number
    :type x: Block
    :param div: the divisor
    :type div: Block
    r   r   )xdivr   r   r   cdiv   s   r   sigmoidc                 C  s   ddt |    S )Nr   )r   exp)r   r   r   r   r   +   s   softmaxFc                 C  s0   | t | d }t|}t|d}t|||S )Nr   )maxr   r   sumZfdiv)r   Zieee_roundingznumZdenr   r   r   r   2   s   

c                 C  s   t j| | jg|dS )zn
    Returns a contiguous flattened view of :code:`x`.

    :param x: the input tensor
    :type x: Block
    )can_reorder)r   reshapenumel)r   r   r   r   r   ravel<   s   	r    c                 C  sX   | | | }|| }|| }|| }t || |}|| }|||  }	|| }
|	|
fS )a  
    Transforms the indices of a row-major `size_i * size_j` matrix into
    the indices of a column-major matrix for each group of `size_g` rows.

    For example, for :code:`size_i = size_j = 4` and :code:`size_g = 2`, it will
    transform ::

        [[0 , 1 , 2 , 3 ],
         [4 , 5 , 6 , 7 ],
         [8 , 9 , 10, 11],
         [12, 13, 14, 15]]

    into ::

        [[0, 2,  4 , 6 ],
         [1, 3,  5 , 7 ],
         [8, 10, 12, 14],
         [9, 11, 13, 15]]
    r   minimum)r   jZsize_iZsize_jZsize_gZijZsize_gjZgroup_idZoff_iZnew_iZnew_jr   r   r   	swizzle2dH   s   r$   c                 C  s   t | d|S )a'  
    Returns a tensor filled with the scalar value 0 for the given :code:`shape` and :code:`dtype`.

    :param shape: Shape of the new array, e.g., (8, 16) or (8, )
    :type shape: tuple of ints
    :param dtype: Data-type of the new array, e.g., :code:`tl.float16`
    :type dtype: DType
    r   )r   full)shapedtyper   r   r   zerosp   s   
r(   c                 C  s   t | j| jS )z
    Returns a tensor of zeros with the same shape and type as a given tensor.

    :param input: input tensor
    :type input: Tensor
    )r(   r&   r'   )inputr   r   r   
zeros_like}   s   r*   c           	      C  sJ   |r| |ko	||k }nd}| |kp|}t || |}t |||}||fS NFr   where)	value1index1value2index2tie_break_lefttiegtZv_retZi_retr   r   r   _argmax_combine      r5   c                 C     t | |||dS NTr5   r.   r/   r0   r1   r   r   r   _argmax_combine_tie_break_left      r;   c                 C  r7   r+   r9   r:   r   r   r   _argmax_combine_tie_break_fast   r<   r=   c                 C     t | |S N)r   maximumabr   r   r   _elementwise_max      rD   r@   return_indicesreturn_indices_tie_break_left)Zreturn_indices_argtie_break_argNTc                 C  s   t | } |r|rt j| |t|dS t j| |t|dS t | jjt dk rEt | j r6| 	t j
} n| j s?J d| 	t j} t j| |t|dS N	keep_dims    z"Expecting input to be integer type)r   _promote_bfloat16_to_float32_reduce_with_indicesr;   r=   r   r'   primitive_bitwidthis_floatingtofloat32is_intint32reducerD   r)   axisrF   rG   rK   r   r   r   r      s   
r   zmaximum indexr2   )rH   c                 C     t | |d||d\}}|S NT)rF   rG   rK   )r   r)   rW   r2   rK   _retr   r   r   argmax      r]   c           	      C  sJ   |r| |ko	||k }nd}| |k p|}t || |}t |||}||fS r+   r,   )	r.   r/   r0   r1   r2   r3   ltZ	value_retZ	index_retr   r   r   _argmin_combine   r6   r`   c                 C  r7   r8   r`   r:   r   r   r   _argmin_combine_tie_break_left   r<   rb   c                 C  r7   r+   ra   r:   r   r   r   _argmin_combine_tie_break_fast   r<   rc   c                 C  r>   r?   r!   rA   r   r   r   _elementwise_min   rE   rd   r"   c                 C  s   t | } |r|rt j| |t|dS t j| |t|dS t | jjdk rBt | j r3| 	t j
} n| j s<J d| 	t j} t j| |t|dS rI   )r   rM   rN   rb   rc   r   r'   rO   rP   rQ   rR   rS   rT   rU   rd   rV   r   r   r   min   s   
re   zminimum indexc                 C  rX   rY   )re   rZ   r   r   r   argmin   r^   rf   c                 C  s   | | S r?   r   rA   r   r   r   _sum_combine      rg   in_dtyper'   c                 C  s^   t |}|d ur|S d }|  r| jdk rt j}|S d }|S |  r-| jdk r+t jnd }|S )NrL   )r   _unwrap_if_constexprZis_int_signedZint_bitwidthrT   Zis_int_unsignedZuint32)ri   r'   	out_dtyper   r   r   _pick_sum_dtype  s   
rl   r   )Z	dtype_argc                 C  s0   t | j|}|d ur| |} tj| |t|dS )NrJ   )rl   r'   rQ   r   rU   rg   )r)   rW   rK   r'   rk   r   r   r   r     s   
c                 C  s   | |A S r?   r   rA   r   r   r   _xor_combine!  rh   rm   zxor sumc                 C  s&   t | jj d t j| |t|dS )Nz#xor_sum only supported for integersrJ   )r   static_asserttypeZscalarrS   rU   rm   )r)   rW   rK   r   r   r   xor_sum)  s   rp   cumsumc                 C     t | } t | |t|S r?   )r   rM   associative_scanrg   r)   rW   reverser   r   r   rq   4     
c                 C  s   | | S r?   r   rA   r   r   r   _prod_combine@  rh   rw   cumprodc                 C  rr   r?   )r   rM   rs   rw   rt   r   r   r   rx   E  rv   n_dimsc                 C  s<  | j |? }|d|  dd|| d  g}t| |}tddd d d d f }tt|d|  dd d d d d f ||j}tt|| dd d d d d f ||j}	t|| j}t|	| j}	tj	| jj
dd}
|j|
dd}|	j|
dd}| j|
dd}|t||	k|k||A t|A }|j| jddS )Nr   r   r   TZbitwidthsignedZbitcast)r   r   r   arangebroadcast_tor   rQ   r'   r&   get_int_dtyperO   r-   r*   )r   flipr   ry   n_outerr&   ymaskleftrightidtypeZileftZirightZixr\   r   r   r   _compare_and_swapQ  s   
40"r   stageorderc                 C  s   | j |? }t||k |dkr6|d|d |   dd| g}tttddddddf || j}n|}t|D ]}t| ||||  |} q=| S )zb
    order_type 0 == ascending
    order_type 1 == descending
    order_type 2 == alternating
    r   r   r   N)	r   r   rn   r   r~   r}   r&   static_ranger   )r   r   r   ry   r   r&   r   r   r   r   r   _bitonic_mergee  s   
.r   dim
descendingc                 C  sv   |du rt | jd n|}t|t | jd kd t| j| }td|d D ]}t| |||k r4dn||} q)| S )a  
    Sorts a tensor along a specified dimension.

    :param x: The input tensor to be sorted.
    :type x: Tensor
    :param dim: The dimension along which to sort the tensor. If None, the tensor is sorted along the last dimension. Currently, only sorting along the last dimension is supported.
    :type dim: int, optional
    :param descending: If set to True, the tensor is sorted in descending order. If set to False, the tensor is sorted in ascending order.
    :type descending: bool, optional
    Nr   z+only minor dimension is currently supportedr   )lenr&   r   rn   r   r   r   )r   r   r   Z_dimry   r   r   r   r   sort~  s   r   c                 C  sJ   t | } t |}| d u rt|d } | t|d ks J dt | S )Nr   z2Currently only support flipping the last dimension)r   rj   r   r   )r   r&   r   r   r   _get_flip_dim  s   


r   c           
      C  s<  t t| jt|| j  t t| j t| j}t| jt| jt|| j  }t j| jj	dd}t 
| j|dddg| }t ||}t dddddf dt dd k}t ||D ],}|}t d|d D ]}	|	|kr|	|d krt ||	}qnt|| |d d|jd}qbt 
|| jj| jdd} | S )	z
    Flips a tensor `x` along the dimension `dim`.

    :param x: the first input tensor
    :type x: Block
    :param dim: the dimension to flip along (currently only final dimension supported)
    :type dim: int
    Trz   r|   r   r   Nr   )rK   r'   )r   rn   r   r&   r   r   r   r   r'   rO   r   rQ   Zexpand_dimsr}   r   r   )
r   r   Zstepsstartr   r   r   r   Zflip2r#   r   r   r   r     s"   
 (r   c                 C  sD   t | |}t|jdkr|S t ||jdd d|jd  g S )a7  
    Interleaves the values of two tensors along their last dimension. The two tensors must have the same shape.
    Equivalent to `tl.join(a, b).reshape(a.shape[:-1] + [2 * a.shape[-1]])`

    :param a: The first input tensor.
    :type a: Tensor
    :param b: The second input tensor.
    :type b: Tensor
    r   Nr   )r   joinr   r&   r   )rB   rC   cr   r   r   
interleave  s   &r   )r   r	   )F)NFTF)TF)ri   r	   r'   r	   )NFN)r'   r	   r+   )r   F)r   r	   ry   r	   )r   r	   r   r	   ry   r	   )r   r	   r   r	   r?   )/
__future__r   Zruntime.jitr    r   r   r   r   Z_tensor_member_fnr   Z_add_math_1arg_docstrr   r   r    r$   r(   r*   r5   r;   r=   rD   Z_add_reduction_docstrr   r]   r`   rb   rc   rd   re   rf   rg   rl   r   rm   rp   Z_add_scan_docstrrq   rw   rx   r   r   ZCONSTEXPR_0r   r   r   r   r   r   r   r   <module>   s    

	


'











	
	
		