o
    wZh                  
   @   s   U d Z ddlZddlmZmZ ddlZg Zee e	d< dej
defddZd	ej
d
edej
fddZdedee defddZg dZdeeejf defddZ				ddej
dej
dej
deej
 fddZdS )zCDefines utilities for interacting with scaled_dot_product_attention    N)OptionalUnion__all__tensorsreturnc                  G   s   t dd | D S )z0Returns True if any of the tensors requires gradc                 s   s    | ]}|j V  qd S )N)Zrequires_grad).0t r	   H/var/www/auris/lib/python3.10/site-packages/torch/nn/attention/_utils.py	<genexpr>   s    z'_input_requires_grad.<locals>.<genexpr>)any)r   r	   r	   r
   _input_requires_grad   s   r   inpt_tensorog_sizec                 C   s"   |  d|kr| dd|f S | S )z'Handles the unpad of the last dimension.N)size)r   r   r	   r	   r
   _postprocess_flash_output   s   r   head_dim_sizescalec                 C   s   |dur|S dt |  S )z
    For FlashAttention we pad the head dimension to be a multiple of 8 so we need to scale the output
    by the original head size and not the padded.
    Ng      ?)mathsqrt)r   r   r	   r	   r
   _calculate_scale   s   r   )
                @         i   i   nc                 C   s   | t v S )z:Returns true if the head dim is supported by FlexAttention)_SUPPORTED_HEAD_DIMS)r    r	   r	   r
   _supported_head_dim%   s   r"           Fquerykeyvalue	attn_maskc                 C   s   | j |j ks| j |j krtd| j  d|j  d|j  d| j|jks)| j|jkr:td| j d|j d|j d|  dk sL| dk sL| dk r`td	|   d
|  d|  dd S )NzLExpected query, key, and value to have the same dtype, but got query.dtype: z, key.dtype: z, and value.dtype: z	 instead.zSExpected query, key, and value to have the same device type, but got query.device: z, key.device: z, and value.device: r   zUExpected query, key, and value to all be  at least 2 dimensional, but got query.dim: z, key.dim: z and value.dim: )Zdtype
ValueErrorZdevicedim)r$   r%   r&   r'   Z	dropout_pZ	is_causalr   r	   r	   r
   _validate_sdpa_input*   s>   	$r*   )Nr#   FN)__doc__r   typingr   r   Ztorchr   liststr__annotations__ZTensorboolr   intr   floatr   r!   ZSymIntr"   r*   r	   r	   r	   r
   <module>   s.   
	