
    JTh              
       r   % S r SSKrSSKJrJr  SSKr/ r\\   \	S'   S\R                  S\4S jrS\R                  S	\S\R                  4S
 jrS\S\\   S\4S jr/ SQrS\\\R$                  4   S\4S jr    SS\R                  S\R                  S\R                  S\\R                     4S jjrg)zCDefines utilities for interacting with scaled_dot_product_attention    N)OptionalUnion__all__tensorsreturnc                  &    [        S U  5       5      $ )z0Returns True if any of the tensors requires gradc              3   8   #    U  H  oR                   v   M     g 7f)N)requires_grad).0ts     Q/var/www/auris/envauris/lib/python3.13/site-packages/torch/nn/attention/_utils.py	<genexpr>'_input_requires_grad.<locals>.<genexpr>   s     01s   )any)r   s    r   _input_requires_gradr      s    0000    inpt_tensorog_sizec                 B    U R                  S5      U:w  a	  U SSU24   $ U $ )z'Handles the unpad of the last dimension.N)size)r   r   s     r   _postprocess_flash_outputr      s.    w&3=))r   head_dim_sizescalec                 >    Ub  U$ S[         R                  " U 5      -  $ )z
For FlashAttention we pad the head dimension to be a multiple of 8 so we need to scale the output
by the original head size and not the padded.
g      ?)mathsqrt)r   r   s     r   _calculate_scaler      s#    
 =)))r   )
                @         i   i   nc                     U [         ;   $ )z:Returns true if the head dim is supported by FlexAttention)_SUPPORTED_HEAD_DIMS)r'   s    r   _supported_head_dimr*   %   s    $$$r   querykeyvalue	attn_maskc           	         U R                   UR                   :w  d  U R                   UR                   :w  a3  [        SU R                    SUR                    SUR                    S35      eU R                  UR                  :w  d  U R                  UR                  :w  a3  [        SU R                   SUR                   SUR                   S35      eU R                  5       S:  d(  UR                  5       S:  d  UR                  5       S:  a?  [        S	U R                  5        S
UR                  5        SUR                  5        S35      eg )NzLExpected query, key, and value to have the same dtype, but got query.dtype: z, key.dtype: z, and value.dtype: z	 instead.zSExpected query, key, and value to have the same device type, but got query.device: z, key.device: z, and value.device: r   zUExpected query, key, and value to all be  at least 2 dimensional, but got query.dim: z, key.dim: z and value.dim: )dtype
ValueErrordevicedim)r+   r,   r-   r.   	dropout_p	is_causalr   s          r   _validate_sdpa_inputr6   *   s"    {{cii5;;%++#=$$)KK=cii[ I  %}I7
 	

 ||szz!U\\U\\%A%%*\\N. M!!&i9
 	

 yy{Q#'')a-599;?cyy{m;swwyk1A%))+iY
 	
 ,;r   )Ng        FN)__doc__r   typingr   r   torchr   liststr__annotations__Tensorboolr   intr   floatr   r)   SymIntr*   r6    r   r   <module>rC      s    I  "  c 15<< 1D 1
5<< # %,, *C * *E * B %5ell!23 % % )-

<<
	
 <<
 %	
r   