
    fTh                        S SK JrJrJr  S SKrS SKrS SKJr  SSKJr  SSK	J
r
  SSKJr  SSKJr  SS	KJr  S
SKJrJrJrJrJrJrJrJrJrJr  S
SKJr  SSKJr  \R@                  " \!5      r" " S S\5      r# " S S\5      r$ " S S\5      r% " S S\5      r& " S S\5      r' " S S\5      r( " S S\5      r) " S S\5      r* " S S \5      r+/ S!Qr,g)"    )CallableOptionalTupleN)nn   )Cache)FlashAttentionKwargs)ALL_ATTENTION_FUNCTIONS)Unpack)logging   )
LlamaAttentionLlamaDecoderLayerLlamaForCausalLMLlamaForQuestionAnsweringLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLPLlamaPreTrainedModelapply_rotary_pos_embeager_attention_forward)MistralModel   )Qwen2Configc                   (   ^  \ rS rSrU 4S jrSrU =r$ )Qwen2MLP   c                 >  > [         TU ]  U5        [        R                  " U R                  U R
                  SS9U l        [        R                  " U R                  U R
                  SS9U l        [        R                  " U R
                  U R                  SS9U l        g )NFbias)	super__init__r   Linearhidden_sizeintermediate_size	gate_projup_proj	down_proj)selfconfig	__class__s     _/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/qwen2/modular_qwen2.pyr"   Qwen2MLP.__init__    ss     4#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWX    )r(   r&   r'   )__name__
__module____qualname____firstlineno__r"   __static_attributes____classcell__r+   s   @r,   r   r      s    Y Yr.   r   c                   B  ^  \ rS rSrS\S\4U 4S jjr  SS\R                  S\	\R                  \R                  4   S\
\R                     S\
\   S	\
\R                     S
\\   S\	\R                  \
\R                     \
\	\R                        4   4S jjrSrU =r$ )Qwen2Attention'   r*   	layer_idxc                   > [         TU ]  X5        [        R                  " UR                  UR
                  U R                  -  SS9U l        [        R                  " UR                  UR                  U R                  -  SS9U l	        [        R                  " UR                  UR                  U R                  -  SS9U l
        [        R                  " UR
                  U R                  -  UR                  SS9U l        g )NTr   F)r!   r"   r   r#   r$   num_attention_headshead_dimq_projnum_key_value_headsk_projv_projo_projr)   r*   r9   r+   s      r,   r"   Qwen2Attention.__init__(   s    +ii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii : :T]] JFL^L^ejkr.   hidden_statesposition_embeddingsattention_maskpast_key_valuecache_positionkwargsreturnc                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      n	U R                  U5      R                  U5      R	                  SS5      n
U R                  U5      R                  U5      R	                  SS5      nUu  p[        XX5      u  pUb$  XUS.nUR                  XU R                  U5      u  pS nU R                  R                  (       aR  [        U R                  SS 5      b:  U R                  U R                  R                  :  a  U R                  R                  n[        nU R                  R                   S:w  ad  U R                  R                   S:X  a-  UR#                  SS	5      (       a  [$        R'                  S
5        O[(        U R                  R                      nU" U U	U
UU4U R*                  (       d  SOU R,                  U R.                  US.UD6u  nnUR0                  " / UQSP76 R3                  5       nU R5                  U5      nUU4$ )Nr   r   )sincosrH   sliding_windoweagersdpaoutput_attentionsFz`torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to eager attention. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.g        )dropoutscalingrO   )shaper<   r=   view	transposer?   r@   r   updater9   r*   use_sliding_windowgetattrmax_window_layersrO   r   _attn_implementationgetloggerwarning_oncer
   trainingattention_dropoutrT   reshape
contiguousrA   )r)   rD   rE   rF   rG   rH   rI   input_shapehidden_shapequery_states
key_statesvalue_statesrN   rM   cache_kwargsrO   attention_interfaceattn_outputattn_weightss                      r,   forwardQwen2Attention.forward/   s!    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&#7RU#[ %#&nUL'5'<'<ZW[WeWegs't$JKK**%5t<H$++"?"??![[77N(?;;++w6{{//69fjjI\^c>d>d##L
 '>dkk>^>^&_#$7
%
  $}}C$2H2HLL)
%
 
%
!\ "));;;;FFHkk+.L((r.   )r?   rA   r=   r@   )NN)r/   r0   r1   r2   r   intr"   torchTensorr   r   r   
LongTensorr   r	   rm   r3   r4   r5   s   @r,   r7   r7   '   s    l{ ls l +/598)||8) #5<<#=>8) !.	8)
 !8) !!1!128) -.8) 
u||Xell3XeELL>Q5RR	S8) 8)r.   r7   c                   4   ^  \ rS rSrS\S\4U 4S jjrSrU =r$ )Qwen2DecoderLayerj   r*   r9   c                    > [         TU ]  5         [        XS9U l        [	        U5      U l        UR                  (       a5  UR                  S:w  a$  [        R                  SUR                   S35        g g g )N)r*   r9   flash_attention_2z=Sliding Window Attention is enabled but not implemented for `z)`; unexpected results may be encountered.)
r!   r"   r7   	self_attnr   mlprY   r\   r^   r_   rB   s      r,   r"   Qwen2DecoderLayer.__init__k   sn    'vKF#$$)D)DH[)[OPVPkPkOl m9 9 *\$r.   )ry   rx   )	r/   r0   r1   r2   r   ro   r"   r3   r4   r5   s   @r,   rt   rt   j   s    { s  r.   rt   c                       \ rS rSrSrg)Qwen2PreTrainedModelv    Nr/   r0   r1   r2   r3   r~   r.   r,   r|   r|   v       r.   r|   c                       \ rS rSrSrg)
Qwen2Modelz   r~   Nr   r~   r.   r,   r   r   z   r   r.   r   c                       \ rS rSrSrg)Qwen2ForCausalLM~   r~   Nr   r~   r.   r,   r   r   ~   r   r.   r   c                       \ rS rSrSrg)Qwen2ForSequenceClassification   r~   Nr   r~   r.   r,   r   r      r   r.   r   c                       \ rS rSrSrg)Qwen2ForTokenClassification   r~   Nr   r~   r.   r,   r   r      r   r.   r   c                       \ rS rSrSrg)Qwen2ForQuestionAnswering   r~   Nr   r~   r.   r,   r   r      r   r.   r   )r|   r   r   r   r   r   )-typingr   r   r   rp   torch.utils.checkpointr   cache_utilsr   modeling_flash_attention_utilsr	   modeling_utilsr
   processing_utilsr   utilsr   llama.modeling_llamar   r   r   r   r   r   r   r   r   r   mistral.modeling_mistralr   configuration_qwen2r   
get_loggerr/   r^   r   r7   rt   r|   r   r   r   r   r   __all__r~   r.   r,   <module>r      s    , ,      B 5 &    4 , 
		H	%Yx Y@)^ @)F	) 		/ 		 		' 		%C 		"= 		 9 	r.   