a
    hq4                     @   s   d Z ddlmZmZ ddlmZ ddlmZ ddlZddl	m
  mZ ddlmZmZmZmZ ddlmZ ddlmZmZmZmZ g d	Zeje eje eje eje G d
d deZG dd dejZedddZedddZ dS )zCDefines bias subclasses that work with scaled_dot_product_attention    )autoIntEnum)Optional)warnN)can_use_efficient_attentioncan_use_flash_attentionis_flash_attention_available
SDPAParams)_raise_kernel_warnings)_calculate_scale_input_requires_grad_postprocess_flash_output_validate_sdpa_input)causal_upper_leftcausal_lower_rightCausalVariant
CausalBiasc                   @   s   e Zd ZdZe Ze ZdS )r   a+  
    Enum for causal variants used in attention mechanisms.

    Defines two types of causal biases:

    ``UPPER_LEFT``: Represents upper-left triangular bias for standard causal attention.
    The equivalent pytorch code for constructing this bias is:

    .. code-block:: python

        torch.tril(torch.ones(size, dtype=torch.bool))

    For instance, with ``shape=(3,4)``, the materialized bias tensor will be:

    .. code-block:: text

        [[1, 0, 0, 0],
         [1, 1, 0, 0],
         [1, 1, 1, 0]]


    ``LOWER_RIGHT``: Represents lower-right triangular bias, the include values are aligned to the lower
    right corner of the matrix.

    The equivalent pytorch code for constructing this bias is:

    .. code-block:: python

        diagonal_offset = size[1] - size[0]
        torch.tril(
            torch.ones(size, dtype=torch.bool),
            diagonal=diagonal_offset,
        )

    For instance, with ``shape=(3,4)``, the materialized bias tensor will be:

    .. code-block:: text

        [[1, 1, 0, 0],
         [1, 1, 1, 0],
         [1, 1, 1, 1]]

    Note that these variants are equivalent to each other when the sequence lengths of the query and key/value
    tensors are equal since the triangular matrix is square.

    .. warning:: This enum is a prototype and subject to change.
    N)__name__
__module____qualname____doc__r   
UPPER_LEFTLOWER_RIGHT r   r   E/var/www/auris/lib/python3.9/site-packages/torch/nn/attention/bias.pyr   "   s   0r   c                       s   e Zd ZdZeeedddZejej	dddZ
ejej	ddd	Zdeej ej	dddZedej	ej	ej	d eeee eej	d	ddZed fdd	Zdd Z  ZS )r   aN  
    A bias representing causal attention patterns. For an overview of the bias structure, see the :class:`CausalVariant` enum.

    This class is used for defining causal (triangular) attention biases. For construing the bias, there exist
    two factory functions: :func:`causal_upper_left` and :func:`causal_lower_right`.

    Example:

    .. code-block:: python

        from torch.nn.attention.bias import causal_lower_right

        bsz, num_heads, seqlen_q, seqlen_kv, head_dim = 32, 8, 4, 12, 8

        # Create a lower-right causal bias
        attn_bias = causal_lower_right(seqlen_q, seqlen_kv)

        q = torch.randn(
            bsz, num_heads, seqlen_q, head_dim, device="cuda", dtype=torch.float16
        )
        k = torch.randn(
            bsz, num_heads, seqlen_kv, head_dim, device="cuda", dtype=torch.float16
        )
        v = torch.randn(
            bsz, num_heads, seqlen_kv, head_dim, device="cuda", dtype=torch.float16
        )

        out = F.scaled_dot_product_attention(q, k, v, attn_bias)

    .. warning:: This class is a prototype and subject to change.
    )variant	seq_len_q
seq_len_kvc                 C   s>   t |tsJ || _|| _|| _||kr:|tjkr:td dS )a  
        Initializes the CausalBias instance with a specified variant and sequence lengths.

        Args:
            variant (CausalVariant): The type of causal bias to use (either UPPER_LEFT or LOWER_RIGHT).
            seq_len_q (int): The sequence length of the query tensor.
            seq_len_kv (int): The sequence length of the key/value tensor.

        Raises a warning if the LOWER_RIGHT variant is used with seq_len_q > seq_len_kv, as it may produce NaNs.
        zTLower right causal bias will produce NaNs in the output when seq_len_q > seq_len_kv!N)
isinstancer   r   r   r   r   r   )selfr   r   r   r   r   r   __init__x   s    zCausalBias.__init__)devicereturnc                 C   s   t t j| j| j|t jdS )zUpper left causal biasr!   Zdtype)torchtrilonesr   r   boolr   r!   r   r   r   _upper_left   s    zCausalBias._upper_leftc                 C   s.   | j | j }tjtj| j| j |tjd|dS )zLower right causal biasr#   )Zdiagonal)r   r   r$   r%   r&   r'   )r   r!   Zdiagonal_offsetr   r   r   _lower_right   s    zCausalBias._lower_rightNc                 C   sB   |du rt d}| jtjkr(| |S | jtjkr>| |S dS )a  
        Materializes the causal bias into a tensor form.

        Depending on the variant, this method generates either an upper-left or lower-right
        triangular matrix to represent the causal bias.

        Args:
            device (Optional[torch.device]): The device on which to create the tensor. Defaults to CPU.

        Returns:
            torch.Tensor: The materialized bias tensor.
        Ncpu)r$   r!   r   r   r   r)   r   r*   r(   r   r   r   _materialize   s    

zCausalBias._materialize        F)	querykeyvalue	attn_mask	dropout_p	is_causalscale
enable_gqar"   c                 C   s  |rt d|j|jks$|jtjkr>tj| ||d|d||dS |jtjkrt	| ||d||| t
| ||d|||}t|r:| dd dk}	| d}
t|
|}|	rtjj| dd| dd  f} tjj|dd|dd  f}tjj|dd|dd  f}tjjj| |||dd|d	d }t||
S t|rd}t| ||rZd}tjjj| d
d|d
d|d
dddddd|t|j||ddd d
dS t| tj| |||| j|d||dS nt d|j dS )a8  
        Handles the logic for computing attention with the specified causal bias.

        Args:
            query (Tensor): Query tensor; shape :math:`(N, ..., L, E)`.
            key (Tensor): Key tensor; shape :math:`(N, ..., S, E)`.
            value (Tensor): Value tensor; shape :math:`(N, ..., S, Ev)`.
            attn_mask (CausalBias): The type of causal attention to apply.
                A boolean mask where a value of True indicates that the element *should* take part in attention.
                A float mask of the same type as query, key, value that is added to the attention score.
            dropout_p (float): Dropout probability; if greater than 0.0, dropout is applied
            is_causal (bool): If true, assumes upper left causal attention masking and errors if both attn_mask and is_causal
                are set.
            scale (optional float): Scaling factor applied prior to softmax. If None, the default value is set
                to :math:`\frac{1}{\sqrt{E}}`.
            enable_gqa (optional bool): If set to True, Grouped Query Attention (GQA) is enabled, by default it is set to False.

        Returns:
            output (Tensor): Attention output; shape :math:`(N, ..., L, Ev)`.

        Raises:
            ValueError: If the causal bias variant is not a CausalVariant type.

        z.CausalBias should not be used with causal=TrueNT)r1   r2   r3   r4   r5      r   F)r3   Zreturn_debug_maskr4         )
ZbiasZcu_seqlens_qZcu_seqlens_kZmax_seqlen_qZmax_seqlen_kr2   Zcustom_mask_typecompute_log_sumexpr4   Zseqlen_kz<CausalBias.variant must be a CausalVariant type, but found: )
ValueErrorr   r   r   r   r   Fscaled_dot_product_attentionr   r   r	   r   sizer   r$   nn
functionalpadopsZatenZ#_scaled_dot_product_flash_attentionr   r   r   Z_efficient_attention_forwardZ	transposeintr
   r,   r!   )r.   r/   r0   r1   r2   r3   r4   r5   Zsdpa_paramsZneeds_paddingZog_head_sizeZog_scaleoutr:   r   r   r   	_dispatch   s    #





"""	






zCausalBias._dispatchr   c                    s<   |du ri }|t jjju r*| j|i |S t ||||S )zjDefines the behavior of torch.nn.functional.scaled_dot_product_attention when the attn_bias is an AttnBiasN)r$   r?   r@   r=   rE   super__torch_function__)clsfunctypesargskwargs	__class__r   r   rG      s
    zCausalBias.__torch_function__c                 C   s   |    S )N)r,   __repr__)r   r   r   r   rO   )  s    zCausalBias.__repr__)N)r-   FNF)r   N)r   r   r   r   r   rC   r    r$   r!   Tensorr)   r*   r   r,   staticmethodfloatr'   rE   classmethodrG   rO   __classcell__r   r   rM   r   r   W   s0    
    or   )r"   c                  G   s*   t | dksJ d| \}}ttj||S )a&  
    Creates an upper-left triangular causal bias.

    This function generates a upper-left triangular matrix to represent causal attention bias with a
    diagonal offset set so that the inclusive values are aligned to the upper left corner of the matrix.
    This equivalent to the `is_causal=True` argument in `scaled_dot_product_attention`.

    The equivalent pytorch code for constructing this bias is:

    .. code-block:: python

        torch.tril(torch.ones(size, dtype=torch.bool))

    For instance, with `shape=(3,4)`, the materialized bias tensor will be:

    .. code-block:: text

        [[1, 0, 0, 0],
         [1, 1, 0, 0],
         [1, 1, 1, 0]]

    Args:
        size: The size of the bias matrix.

    Returns:
        CausalBias: The UPPER_LEFT triangular causal bias variant.
    r9   z*causal_upper_left only supports 2D tensors)lenr   r   r   r>   r   r   r   r   r   r   -  s    r   c                  G   s*   t | dksJ d| \}}ttj||S )a:  
    Creates a lower-right triangular causal bias.

    This function generates a lower-right triangular matrix to represent causal attention bias with a
    diagonal offset set so that the inclusive values are aligned to the lower right corner of the matrix.

    The equivalent pytorch code for constructing this bias is:

    .. code-block:: python

        diagonal_offset = size[1] - size[0]
        torch.tril(
            torch.ones(size, dtype=torch.bool),
            diagonal=diagonal_offset,
        )

    For instance, with `shape=(3,4)`, the materialized bias tensor will be:

    .. code-block:: text

        [[1, 1, 0, 0],
         [1, 1, 1, 0],
         [1, 1, 1, 1]]

    Args:
        size: The size of the bias matrix.

    Returns:
        CausalBias: The LOWER_RIGHT triangular causal bias variant.
    r9   z+causal_lower_right only supports 2D tensors)rU   r   r   r   rV   r   r   r   r   N  s    r   )!r   enumr   r   typingr   warningsr   r$   Ztorch.nn.functionalr?   r@   r<   Ztorch.backends.cudar   r   r   r	   Ztorch.nn.attentionr
   Ztorch.nn.attention._utilsr   r   r   r   __all__Z_dynamoZallow_in_graphr   rP   r   r   r   r   r   r   r   <module>   s$   5 W!