
    fTh2                        S SK Jr  S SKJrJrJr  S SKrS SKJr  SSK	J
r
Jr  SSKJr  SSKJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJrJrJrJrJrJrJrJ r J!r!  SSK"J#r#  \RH                  " \%5      r&Sr'Sr( " S S\5      r) " S S\5      r* " S S\RV                  5      r, " S S\5      r- " S S\5      r. " S S\5      r/ " S S\5      r0 " S  S!\5      r1 " S" S#\5      r2/ S$Qr3g)%    )partial)CallableOptionalTupleN   )CacheDynamicCache)FlashAttentionKwargs)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)logging   )CLIPMLP)	LlamaAttentionLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassification
LlamaModelLlamaPreTrainedModelLlamaRotaryEmbeddingapply_rotary_pos_embeager_attention_forward   )	PhiConfigzmicrosoft/phi-1r   c                   8  ^  \ rS rSrS\S\4U 4S jjr  SS\R                  S\	\R                  \R                  4   S\
\R                     S\
\   S	\
\R                     S
\	\R                  \
\R                     \
\	\R                        4   4S jjrSrU =r$ )PhiAttention$   config	layer_idxc                   > [         TU ]  X5        [        R                  " UR                  UR
                  U R                  -  SS9U l        [        R                  " UR                  UR                  U R                  -  SS9U l	        [        R                  " UR                  UR                  U R                  -  SS9U l
        [        R                  " UR
                  U R                  -  UR                  SS9U l        U ?[        U R                  UR                  -  5      U l        UR                   U l        U R                   (       ay  [        R"                  " UR                  UR
                  -  UR$                  SS9U l        [        R"                  " UR                  UR
                  -  UR$                  SS9U l        g g )NTbias)epselementwise_affine)super__init__nnLinearhidden_sizenum_attention_headshead_dimq_projnum_key_value_headsk_projv_projdenseo_projintpartial_rotary_factorrotary_ndimsqk_layernorm	LayerNormlayer_norm_epsq_layernormk_layernormselfr   r    	__class__s      [/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/phi/modular_phi.pyr'   PhiAttention.__init__%   s`   +ii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijYYv99DMMI6K]K]dhi
K0L0L LM"//!||""f&@&@@fF[F[pt D  "||""f&@&@@fF[F[pt D	     hidden_statesposition_embeddingsattention_maskpast_key_valuecache_positionreturnc                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      n	U R                  U5      R                  U5      R	                  SS5      n
U R                  U5      R                  U5      R	                  SS5      nU R                  (       a"  U R                  U	5      n	U R                  U
5      n
Uu  pU	SS U R                  24   U	SU R                  S 24   pU
SS U R                  24   U
SU R                  S 24   nn[        UUX5      u  nn[        R                  " X4SS9n	[        R                  " UU4SS9n
Ub$  XUS.nUR                  XU R                  U5      u  p[         nU R"                  R$                  S:w  ad  U R"                  R$                  S:X  a-  UR'                  S	S
5      (       a  [(        R+                  S5        O[,        U R"                  R$                     nU" U U	U
UU4U R.                  (       d  SOU R0                  U R2                  S.UD6u  nnUR4                  " / UQSP76 R7                  5       nU R9                  U5      nUU4$ )Nr   r   .)dim)sincosrE   eagersdpaoutput_attentionsFz`torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to eager attention. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.        )dropoutscaling)shaper,   r-   view	transposer/   r0   r6   r9   r:   r5   r   torchcatupdater    r   r   _attn_implementationgetloggerwarning_oncer   trainingattention_dropoutrQ   reshape
contiguousr1   )r<   rA   rB   rC   rD   rE   kwargsinput_shapehidden_shapequery_states
key_statesvalue_statesrK   rJ   	query_rot
query_passkey_rotkey_passcache_kwargsattention_interfaceattn_outputattn_weightss                         r>   forwardPhiAttention.forward6   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST++L9L))*5J& 1 1 1112d//112 
 s/d////0sD--//0 
 2)WcO	7 yy)!8bAYY2;
%#&nUL'5'<'<ZW[WeWegs't$J(?;;++w6{{//69fjjI\^c>d>d##L
 '>dkk>^>^&_#$7	%
  $}}C$2H2HLL	%
 	%
!\ "));;;;FFHjj-L((r@   )r1   r:   r/   r9   r-   r6   r5   r0   )NN)__name__
__module____qualname____firstlineno__r   r3   r'   rU   Tensorr   r   r   
LongTensorrn   __static_attributes____classcell__r=   s   @r>   r   r   $   s    y S , +/59A)||A) #5<<#=>A) !.	A)
 !A) !!1!12A) 
u||Xell3XeELL>Q5RR	SA) A)r@   r   c                       \ rS rSrSrg)PhiMLPz    Nrp   rq   rr   rs   rv   r|   r@   r>   rz   rz   z       r@   rz   c                     ^  \ rS rSrS\S\4U 4S jjr       SS\R                  S\	\R                     S\	\R                     S\	\\R                        S	\	\   S
\	\   S\	\R                     S\	\\R                  \R                  4      S\\R                  \	\\R                  \R                  4      4   4S jjrSrU =r$ )PhiDecoderLayer~   r   r    c                   > [         TU ]  5         [        XS9U l        [	        U5      U l        [        R                  " UR                  UR                  S9U l
        [        R                  " UR                  5      U l        g )N)r    r$   )r&   r'   r   	self_attnrz   mlpr(   r7   r*   r8   input_layernormDropoutresid_pdropresid_dropoutr;   s      r>   r'   PhiDecoderLayer.__init__   s[    %fB&>!||F,>,>FDYDYZZZ(:(:;r@   rA   rC   position_idsrD   rN   	use_cacherE   rB   rF   c	                     Un
U R                  U5      nU R                  " SUUUUUUUUS.U	D6u  pU R                  U5      nU R                  U R                  U5      5      nX-   U
-   nU4nU(       a  X4-  nU$ )N)rA   rC   r   rD   rN   r   rE   rB   r|   )r   r   r   r   )r<   rA   rC   r   rD   rN   r   rE   rB   r`   residualattn_outputsself_attn_weightsfeed_forward_hidden_statesoutputss                  r>   rn   PhiDecoderLayer.forward   s     !,,]; +/.. 
+
')%)/) 3
+
 
+
' )),7%)%7%78O%P"$AHL "++Gr@   )r   r   r   r   )NNNFFNN)rp   rq   rr   rs   r   r3   r'   rU   rt   r   ru   r   boolFloatTensorrn   rv   rw   rx   s   @r>   r   r   ~   s
   <y <S < 26378<,1$)59KO%||% !.% u//0	%
 !u||!45% $D>% D>% !!1!12% &eELL%,,,F&GH% 
u  (51B1BEDUDU1U+V"WW	X% %r@   r   c                       \ rS rSrSrg)PhiRotaryEmbedding   r|   Nr}   r|   r@   r>   r   r      r~   r@   r   c                       \ rS rSrS rSrg)PhiPreTrainedModel   c                    U R                   R                  n[        U[        R                  5      (       aW  UR
                  R                  R                  SUS9  UR                  b%  UR                  R                  R                  5         g g [        U[        R                  5      (       ad  UR
                  R                  R                  SUS9  UR                  b2  UR
                  R                  UR                     R                  5         g g [        U[        R                  5      (       aJ  UR
                  R                  R                  S5        UR                  R                  R                  5         g g )NrO   )meanstdg      ?)r   initializer_range
isinstancer(   r)   weightdatanormal_r#   zero_	Embeddingpadding_idxr7   fill_)r<   moduler   s      r>   _init_weights PhiPreTrainedModel._init_weights   s   kk++fbii((MM&&CS&9{{&  &&( '--MM&&CS&9!!-""6#5#56<<> .--MM$$S)KK""$ .r@   r|   N)rp   rq   rr   rs   r   rv   r|   r@   r>   r   r      s    %r@   r   c                     ^  \ rS rSrS\4U 4S jjr         SS\\R                     S\\R                     S\\R                     S\\
   S\\R                     S	\\   S
\\   S\\   S\\R                     S\\   S\4S jjrSrU =r$ )PhiModel   r   c           	      h  > [         TU ]  U5        [        R                  " [	        UR
                  5       Vs/ s H  n[        X5      PM     sn5      U l        [        R                  " UR                  5      U l
        [        R                  " UR                  UR                  S9U l        U ?g s  snf )Nr   )r&   r'   r(   
ModuleListrangenum_hidden_layersr   layersr   
embd_pdropembed_dropoutr7   r*   r8   final_layernormnormr;   s      r>   r'   PhiModel.__init__   s     mmAFvG_G_A`aA`I_V/A`a
  ZZ(9(9:!||F,>,>FDYDYZI	 bs   B/	input_idsrC   r   past_key_valuesinputs_embedsr   rN   output_hidden_statesrE   flash_attn_kwargsrF   c
                    Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUS L US L-  (       a  [	        S5      eU R
                  (       a/  U R                  (       a  U(       a  [        R                  S5        SnUc  U R                  U5      nU(       a  Uc
  [        5       nU	cD  Ub  UR                  5       OSn[        R                  " XUR                  S   -   UR                  S9n	Uc  U	R!                  S5      nU R#                  X%XU5      nU R%                  U5      nUnU R'                  X5      nU(       a  SOS nU(       a  SOS nU R(                  S U R                   R*                    H  nU(       a  X4-  nU R
                  (       a?  U R                  (       a.  U R-                  [/        UR0                  40 U
D6UUUUUUU	U5	      nOU" U4UUUUUU	US.U
D6nUS   nU(       d  M~  UUS   4-  nM     U R3                  U5      nU(       a  X4-  n[5        UU(       a  UOS UUS	9$ )
Nz:You must specify exactly one of input_ids or inputs_embedszX`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.Fr   r   )devicer|   )rC   r   rD   rN   r   rE   rB   )last_hidden_stater   rA   
attentions)r   rN   r   r   
ValueErrorgradient_checkpointingr\   rZ   r[   embed_tokensr	   get_seq_lengthrU   arangerR   r   	unsqueeze_update_causal_maskr   
rotary_embr   r   _gradient_checkpointing_funcr   __call__r   r   )r<   r   rC   r   r   r   r   rN   r   rE   r   past_seen_tokenscausal_maskrA   rB   all_hidden_statesall_self_attnsdecoder_layerlayer_outputss                      r>   rn   PhiModel.forward   s    2C1N-TXT_T_TqTq$8$D $++JjJj 	 "+!6IDKK<Q<Q	-t";<YZZ&&4==Yj I  --i8M0*nO!CRC^==?de"\\ ]5H5H5K"KTaThThN )33A6L..>L]
 **=9% #oomJ #7BD0d![[)H4;;+H+HIM#!%55!**t}} $ A AM22H6GH! #%"'
! !.!
!#.!-#2&7'#1(;
! (
! *!,M  =#3"55A JD ,,];  !11&+/8Od+%	
 	
r@   )r   r   r   )	NNNNNNNNN)rp   rq   rr   rs   r   r'   r   rU   ru   rt   r   r   r   r   r
   r   rn   rv   rw   rx   s   @r>   r   r      s    y  151537+/59$(,0/359f
E,,-f
 !.f
 u//0	f

 "%f
   1 12f
 D>f
 $D>f
 'tnf
 !!1!12f
 $$89f
 
!f
 f
r@   r   c                   (   ^  \ rS rSrU 4S jrSrU =r$ )PhiForCausalLMi5  c                    > [         TU ]  U5        [        R                  " UR                  UR
                  SS9U l        g )NTr"   )r&   r'   r(   r)   r*   
vocab_sizelm_head)r<   r   r=   s     r>   r'   PhiForCausalLM.__init__6  s0     yy!3!3V5F5FTRr@   )r   )rp   rq   rr   rs   r'   rv   rw   rx   s   @r>   r   r   5  s    S Sr@   r   c                       \ rS rSrSrg)PhiForSequenceClassificationi;  r|   Nr}   r|   r@   r>   r   r   ;  r~   r@   r   c                       \ rS rSrSrg)PhiForTokenClassificationi?  r|   Nr}   r|   r@   r>   r   r   ?  r~   r@   r   )r   r   r   r   r   )4	functoolsr   typingr   r   r   rU   torch.nnr(   cache_utilsr   r	   modeling_flash_attention_utilsr
   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   clip.modeling_clipr   llama.modeling_llamar   r   r   r   r   r   r   r   r   configuration_phir   
get_loggerrp   rZ   _CHECKPOINT_FOR_DOC_CONFIG_FOR_DOCr   rz   Moduler   r   r   r   r   r   r   __all__r|   r@   r>   <module>r      s     , ,   . B 6 &  (
 
 
 ) 
		H	%' S)> S)l	W 	-bii -`	- 	%- % p
z p
fS% S	#A 		 ; 	r@   