o
    Zh                     @   sZ  d dl mZmZmZ d dlZd dlZd dlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZ d
dlmZmZmZmZmZmZmZmZmZmZ d
dlmZ ddlmZ e e!Z"G dd deZ#G dd deZ$G dd deZ%G dd deZ&G dd deZ'G dd deZ(G dd deZ)G dd deZ*G dd  d eZ+g d!Z,dS )"    )CallableOptionalTupleN)nn   )Cache)FlashAttentionKwargs)ALL_ATTENTION_FUNCTIONS)Unpack)logging   )
LlamaAttentionLlamaDecoderLayerLlamaForCausalLMLlamaForQuestionAnsweringLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLPLlamaPreTrainedModelapply_rotary_pos_embeager_attention_forward)MistralModel   )Qwen2Configc                       s   e Zd Z fddZ  ZS )Qwen2MLPc                    sR   t  | tj| j| jdd| _tj| j| jdd| _tj| j| jdd| _d S )NFZbias)	super__init__r   Linearhidden_sizeZintermediate_sizeZ	gate_projZup_projZ	down_proj)selfconfig	__class__ V/var/www/auris/lib/python3.10/site-packages/transformers/models/qwen2/modular_qwen2.pyr       s   zQwen2MLP.__init__)__name__
__module____qualname__r   __classcell__r$   r$   r"   r%   r      s    r   c                       s   e Zd Zdedef fddZ		ddejdeejejf de	ej d	e	e
 d
e	ej dee deeje	ej e	eej  f fddZ  ZS )Qwen2Attentionr!   	layer_idxc                    s   t  || tj|j|j| j dd| _tj|j|j| j dd| _	tj|j|j| j dd| _
tj|j| j |jdd| _d S )NTr   F)r   r   r   r   r   Znum_attention_headshead_dimq_projZnum_key_value_headsk_projv_projo_projr    r!   r+   r"   r$   r%   r   (   s
    zQwen2Attention.__init__Nhidden_statesposition_embeddingsattention_maskpast_key_valuecache_positionkwargsreturnc                 K   s~  |j d d }g |d| jR }| ||dd}	| ||dd}
| ||dd}|\}}t|	|
||\}	}
|d urW|||d}||
|| j	|\}
}d }| j
jrqt| j
dd d urq| j	| j
jkrq| j
j}t}| j
jdkr| j
jdkr|dd	rtd
 nt| j
j }|| |	|
||f| jsdn| j| j|d|\}}|jg |dR   }| |}||fS )Nr   r   )sincosr6   sliding_windoweagerZsdpaZoutput_attentionsFz`torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to eager attention. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.g        )Zdropoutscalingr<   )shaper,   r-   viewZ	transposer.   r/   r   updater+   r!   use_sliding_windowgetattrZmax_window_layersr<   r   _attn_implementationgetloggerwarning_oncer	   ZtrainingZattention_dropoutr>   Zreshape
contiguousr0   )r    r2   r3   r4   r5   r6   r7   Zinput_shapeZhidden_shapeZquery_statesZ
key_statesZvalue_statesr;   r:   Zcache_kwargsr<   Zattention_interfaceZattn_outputZattn_weightsr$   r$   r%   forward/   sN   		

zQwen2Attention.forward)NN)r&   r'   r(   r   intr   torchZTensorr   r   r   Z
LongTensorr
   r   rI   r)   r$   r$   r"   r%   r*   '   s&    r*   c                       s&   e Zd Zdedef fddZ  ZS )Qwen2DecoderLayerr!   r+   c                    sR   t    t||d| _t|| _|jr%|jdkr't	d|j d d S d S d S )N)r!   r+   Zflash_attention_2z=Sliding Window Attention is enabled but not implemented for `z)`; unexpected results may be encountered.)
r   r   r*   Z	self_attnr   ZmlprB   rD   rF   rG   r1   r"   r$   r%   r   k   s   

zQwen2DecoderLayer.__init__)r&   r'   r(   r   rJ   r   r)   r$   r$   r"   r%   rL   j   s    rL   c                   @      e Zd ZdS )Qwen2PreTrainedModelNr&   r'   r(   r$   r$   r$   r%   rN   v       rN   c                   @   rM   )
Qwen2ModelNrO   r$   r$   r$   r%   rQ   z   rP   rQ   c                   @   rM   )Qwen2ForCausalLMNrO   r$   r$   r$   r%   rR   ~   rP   rR   c                   @   rM   )Qwen2ForSequenceClassificationNrO   r$   r$   r$   r%   rS      rP   rS   c                   @   rM   )Qwen2ForTokenClassificationNrO   r$   r$   r$   r%   rT      rP   rT   c                   @   rM   )Qwen2ForQuestionAnsweringNrO   r$   r$   r$   r%   rU      rP   rU   )rN   rQ   rR   rS   rT   rU   )-typingr   r   r   rK   Ztorch.utils.checkpointr   Zcache_utilsr   Zmodeling_flash_attention_utilsr   Zmodeling_utilsr	   Zprocessing_utilsr
   utilsr   Zllama.modeling_llamar   r   r   r   r   r   r   r   r   r   Zmistral.modeling_mistralr   Zconfiguration_qwen2r   Z
get_loggerr&   rF   r   r*   rL   rN   rQ   rR   rS   rT   rU   __all__r$   r$   r$   r%   <module>   s.    0
C