a
    hB6                     @   s  U d dl Z d dlZd dlmZmZ d dlZd dlmZ ejdedZi Z	e
ejjef ed< eh dZeeeegef dd	d
Zeddd'dd d dejejejeej eeeejdddZee eedddZejeeejdddZejejeeee eejdddZeeddddZejejeeee ejddd Zed!dd(dd d d dd"dd#ejejejeej eej eej eeeeee eee eejejejejf d$d%d&ZdS ))    N)CallableOptional)_dtype_mappings_T)boundONNX_ATEN_DECOMP_TABLE>         
      )op_typeopset_versionreturnc                    s   t t d fdd}|S )zDDecorator to register an ONNX operator with a custom implementation.)funcr   c                    sP   d }t jjd  d| dd| }| tttt jj |< ||  |S )NZopsetzonnx::. )Zmutates_args)torchlibraryZ	custom_opr   getattropsZonnxZregister_fake)r   overloadZtorch_opr   r   r   B/var/www/auris/lib/python3.9/site-packages/torch/onnx/ops/_impl.py	decorator   s    

z_onnx_op.<locals>.decorator)r   )r   r   r   r   r   r   _onnx_op   s    r   ZRotaryEmbedding   F)interleaved	num_headsrotary_embedding_dim)x	cos_cache	sin_cacheposition_idsr   r   r   r   c                   sP   j d } j d }t j dkrd j d }	t|dk fdd |	| }
||||
g}t | tt j dkdd   j d }
|dkr|
} d	d	d	d	d	d	d	|f } d	d	d	d	d	d	|d	f }|d }|d	ur|| }|| }n|}|}|d	d	d	d	d	|f }|d	d	d	d	d	|f }t|d}t|d}|r|d	d	d	d	d	d	dd	df }|d	d	d	d	d	d	dd	df }ntj|dd
d\}}|| ||  }|| ||  }|r
t|d
}t|d
}tj||fd
d}t||j }ntj||fd
d}tj||fd
d}t j dkrLt| j }|S )z_RotaryEmbedding-23 https://onnx.ai/onnx/operators/onnx__RotaryEmbedding.html#rotaryembedding-23r   r	         c                      s   d j  S )NzKnum_heads must be provided for 3D inputs. Received input tensor with shape )shaper   r   r   r   <lambda>>       z%rotary_embedding_23.<locals>.<lambda>   c                   S   s   dS )Nzx should be a 4D tensor by nowr   r   r   r   r   r'   C   r(   NZdim)r%   lenr   _checkZreshapeZ	unsqueezechunkcat)r   r    r!   r"   r   r   r   
batch_sizesequence_lengthhidden_size	head_sizeZ	new_shapeZx_rotateZx_not_rotateZrotary_embedding_dim_halfcossinx1Zx2realimagZx_rotate_concatoutputr   r&   r   rotary_embedding_23+   sn    




  "$r:   )scaler3   r   c                 C   s   | dur| S dt | S )z/Get the scale factor for attention computation.Ng      ?)mathsqrt)r;   r3   r   r   r   _get_scale_factor   s    r>   )tensorr0   r   r   c                 C   s:   | j d | j d  }}|| }| ||||dd S )z1Reshape 3D tensor to 4D for multi-head attention.r	   r$   )r%   view	transpose
contiguous)r?   r0   r   r1   r2   r3   r   r   r   _reshape_3d_to_4d   s    rC   )QKcurrent_q_num_headscurrent_kv_num_headsr;   qk_matmul_output_moder   c              	   C   s6   |dkrt | ||||S tt| |ddS dS )z1Get QK output tensor based on the specified mode.r   r*   N)_compute_qk_output_for_mode_0r   Z
zeros_likematmulrA   )rD   rE   rF   rG   r;   rH   r   r   r   _get_qk_output_for_aten_spda   s
    	
rL   )rF   rG   r   c                    s"   t   dk fdd dS )z-Validate Group Query Attention configuration.r   c                      s   d d  dS )Nzq_num_heads (z%) must be divisible by kv_num_heads (z	) for GQAr   r   rG   rF   r   r   r'      r(   z-_validate_gqa_configuration.<locals>.<lambda>N)r   r-   )rF   rG   r   rM   r   _validate_gqa_configuration   s    
rN   )rD   rE   rF   rG   r;   r   c                 C   s`   |}||kr"|| }|j |dd}t|| jd }t|}| | }	|| }
t|	|
ddS )zDHelper function to compute QK output for qk_matmul_output_mode == 0.r	   r+   r#   rI   r*   )repeat_interleaver>   r%   r<   r=   r   rK   rA   )rD   rE   rF   rG   r;   ZK_for_qkrepeat_factorscale_factor
sqrt_scaleQ_scaledK_scaledr   r   r   rJ      s    	
rJ   Z	Attention        )	is_causalkv_num_headsq_num_headsrH   r;   softcapsoftmax_precision)rD   rE   V	attn_maskpast_key
past_valuerV   rW   rX   rH   r;   rY   rZ   r   c          (      C   s  d\}}}t | j}| jd }t | jdkrvt|dko>|dkdd  | jd }t| ||} t|||}t|||}tt | jdkot |jdkot |jdkdd  | j| }t|
|}
|d	urtj||g|d
n| }|d	urtj||g|d
n| }|| }}| j| }|j| }| j| }|j| }|dkoh|	dkoh|d	u oh|d	u ph|jtj	k}t
|| |rd	}|d	ur|jtj	kr| n|}tjjj| |||d||
t	||kd}t| ||||
|	}n||kr
|| }|j||d
}|j||d
}tj||| j| jd}|rht|d	u dd  ttj||tj	| jd}|| td}|d	ur|jtj	kr|| td}n|| }t|
| jd } t| }!| |! }"||! }#t|"|#dd}$|$}|$| }%|	dkr|%}|dkr|t|%|  }%|	dkr|%}|d	urr|tv rb|%j}&|%tj| }%tj|%dd
}'|'|&}'ntj|%dd
}'ntj|%dd
}'|	dkr|'}t|'|}|dkr|dd  !||d}||||fS )zMAttention-23 https://onnx.ai/onnx/operators/onnx__Attention.html#attention-23)r	   r$   r#   r   r#   c                   S   s   dS )Nz;q_num_heads and kv_num_heads must be provided for 3D inputsr   r   r   r   r   r'      r(   zattention_23.<locals>.<lambda>r	   r)   c                   S   s   dS )Nz'Q, K, and V should be 4D tensors by nowr   r   r   r   r   r'      r(   Nr+   rU   )r\   Z	dropout_prV   r;   Z
enable_gqa)dtypedevicec                   S   s   dS )Nz'Cannot use both is_causal and attn_maskr   r   r   r   r   r'   @  r(   z-infrI   r*   r$   )"r,   r%   r   r-   rC   r>   r/   cloner_   boolrN   nnZ
functionalZscaled_dot_product_attentionrL   rO   zerosr`   ZtrilZonesZmasked_fillfloatr<   r=   rK   rA   tanh-_ATTENTION_23_ALLOWED_INTERMEDIATE_PRECISIONStor   ZONNX_DTYPE_TO_TORCH_DTYPEZsoftmaxrB   r@   )(rD   rE   r[   r\   r]   r^   rV   rW   rX   rH   r;   rY   rZ   Znum_head_dimZsequence_dimZhead_dimZinput_shape_lenr0   Zq_sequence_lengthZq_head_sizeZpresent_keyZpresent_valuerF   rG   Zkv_sequence_lengthZcan_use_sdpaZsdpa_attn_maskr9   Z	qk_outputrP   Z	attn_biasZcausal_maskrQ   rR   rS   rT   Zqk_matmul_outputZqk_with_biasZoriginal_dtypeZ
qk_softmaxr   r   r   attention_23   s    



(



















ri   )N)NNN)r<   typingr   r   r   Ztorch.onnx.opsr   TypeVarr   r   dictZ_opsZ
OpOverload__annotations__	frozensetrg   strintr   ZTensorrb   r:   re   r>   rC   rL   rN   rJ   tupleri   r   r   r   r   <module>   s   

 U   