ó
    fT–hD  ã                   óX  • S r SSKJrJrJr  SSKrSSKJr  SSKJ	r	  SSK
Jr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJrJrJrJrJrJrJr  SSKJr  \R<                  " \5      r  " S S\5      r! " S S\5      r" " S S\5      r# " S S\5      r$ " S S\5      r% " S S\5      r&/ SQr'g)zPyTorch BitNet model.é    )ÚCallableÚOptionalÚTupleNé   )ÚCache)ÚFlashAttentionKwargs)ÚCausalLMOutputWithPast)ÚALL_ATTENTION_FUNCTIONS)ÚUnpack)Úloggingé   )ÚGemmaMLP)ÚLlamaAttentionÚLlamaDecoderLayerÚLlamaForCausalLMÚ
LlamaModelÚLlamaRMSNormÚapply_rotary_pos_embÚeager_attention_forwardé   )ÚBitNetConfigc                   ó   • \ rS rSrSrg)ÚBitNetRMSNormé*   © N©Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__Ú__static_attributes__r   ó    Úa/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/bitnet/modular_bitnet.pyr   r   *   ó   † Úr"   r   c                   ó6   ^ • \ rS rSrS\4U 4S jjrS rSrU =r$ )Ú	BitNetMLPé.   Úconfigc                 ój   >• [         TU ]  U5        [        UR                  UR                  S9U l        g ©N)Úeps)ÚsuperÚ__init__r   Úintermediate_sizeÚrms_norm_epsÚffn_sub_norm)Úselfr(   Ú	__class__s     €r#   r-   ÚBitNetMLP.__init__/   s+   ø€ Ü‰Ñ˜Ô Ü)¨&×*BÑ*BÈ×H[ÑH[Ñ\ˆÕr"   c           	      ó¦   • U R                  U R                  U R                  U R                  U5      5      U R	                  U5      -  5      5      nU$ )N)Ú	down_projr0   Úact_fnÚ	gate_projÚup_proj)r1   Úxr5   s      r#   ÚforwardÚBitNetMLP.forward3   sF   € Ø—N‘N 4×#4Ñ#4°T·[±[ÀÇÁÐPQÓARÓ5SÐVZ×VbÑVbÐcdÓVeÑ5eÓ#fÓgˆ	ØÐr"   )r0   )	r   r   r   r    r   r-   r:   r!   Ú__classcell__©r2   s   @r#   r&   r&   .   s   ø† ð]˜|÷ ]÷ð r"   r&   c                   óB  ^ • \ rS rSrS\S\4U 4S jjr  SS\R                  S\	\R                  \R                  4   S\
\R                     S\
\   S	\
\R                     S
\\   S\	\R                  \
\R                     \
\	\R                        4   4S jjrSrU =r$ )ÚBitNetAttentioné8   r(   Ú	layer_idxc                 ój   >• [         TU ]  X5        [        UR                  UR                  S9U l        g r*   )r,   r-   r   Úhidden_sizer/   Úattn_sub_norm)r1   r(   rA   r2   s      €r#   r-   ÚBitNetAttention.__init__9   s+   ø€ Ü‰Ñ˜Ô+Ü*¨6×+=Ñ+=À6×CVÑCVÑWˆÕr"   Úhidden_statesÚposition_embeddingsÚattention_maskÚpast_key_valueÚcache_positionÚkwargsÚreturnc                 óä  • UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      n	U R                  U5      R                  U5      R	                  SS5      n
U R                  U5      R                  U5      R	                  SS5      nUu  pÍ[        XšXÍ5      u  pšUb$  XÜUS.nUR                  X«U R                  U5      u  p«[        nU R                  R                  S:w  ad  U R                  R                  S:X  a-  UR                  SS5      (       a  [        R                  S	5        O[         U R                  R                     nU" U U	U
UU4U R"                  (       d  S
OU R$                  U R&                  S.UD6u  nnUR(                  " / UQSP76 R+                  5       nU R-                  U5      nU R/                  U5      nUU4$ )Néÿÿÿÿr   r   )ÚsinÚcosrJ   ÚeagerÚsdpaÚoutput_attentionsFzã`torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to eager attention. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.g        )ÚdropoutÚscaling)ÚshapeÚhead_dimÚq_projÚviewÚ	transposeÚk_projÚv_projr   ÚupdaterA   r   r(   Ú_attn_implementationÚgetÚloggerÚwarning_oncer
   ÚtrainingÚattention_dropoutrU   ÚreshapeÚ
contiguousrD   Úo_proj)r1   rF   rG   rH   rI   rJ   rK   Úinput_shapeÚhidden_shapeÚquery_statesÚ
key_statesÚvalue_statesrP   rO   Úcache_kwargsÚattention_interfaceÚattn_outputÚattn_weightss                     r#   r:   ÚBitNetAttention.forward=   sã  € ð $×)Ñ)¨#¨2Ð.ˆØ8˜Ð8 bÐ8¨$¯-©-Ñ8ˆà—{‘{ =Ó1×6Ñ6°|ÓD×NÑNÈqÐRSÓTˆØ—[‘[ Ó/×4Ñ4°\ÓB×LÑLÈQÐPQÓRˆ
Ø—{‘{ =Ó1×6Ñ6°|ÓD×NÑNÈqÐRSÓTˆà&‰ˆÜ#7¸ÐRUÓ#[Ñ ˆàÑ%à#&ÀnÑUˆLØ'5×'<Ñ'<¸ZÐW[×WeÑWeÐgsÓ'tÑ$ˆJä(?Ðà;‰;×+Ñ+¨wÓ6Ø{‰{×/Ñ/°6Ó9¸f¿j¹jÐI\Ð^c×>dÑ>dÜ×#Ñ#ðLõô
 '>¸d¿k¹k×>^Ñ>^Ñ&_Ð#á$7ØØØØØð	%
ð  $Ÿ}Ÿ}‘C°$×2HÑ2HØ—L‘Lñ	%
ð ñ	%
Ñ!ˆ\ð "×)Ò)Ð;¨;Ð;¸Ò;×FÑFÓHˆØ×(Ñ(¨Ó5ˆØ—k‘k +Ó.ˆØ˜LÐ(Ð(r"   )rD   )NN)r   r   r   r    r   Úintr-   ÚtorchÚTensorr   r   r   Ú
LongTensorr   r   r:   r!   r<   r=   s   @r#   r?   r?   8   sÊ   ø† ðX˜|ð X¸÷ Xð +/Ø59ñ1)à—|‘|ð1)ð # 5§<¡<°·±Ð#=Ñ>ð1)ð ! §¡Ñ.ð	1)ð
 ! ™ð1)ð ! ×!1Ñ!1Ñ2ð1)ð Ð-Ñ.ð1)ð 
ˆu|‰|˜X e§l¡lÑ3°X¸eÀEÇLÁLÑ>QÑ5RÐRÑ	S÷1)ó 1)r"   r?   c                   ó   • \ rS rSrSrg)ÚBitNetDecoderLayeréq   r   Nr   r   r"   r#   rv   rv   q   r$   r"   rv   c                   ó   • \ rS rSrSrg)ÚBitNetModeléu   r   Nr   r   r"   r#   ry   ry   u   r$   r"   ry   c                   ó>   ^ • \ rS rSrS/rSrSrS\4U 4S jjrSr	U =r
$ )ÚBitNetForCausalLMéy   zlm_head.weightNrL   c                 ó$   >• [         TU ]  " S0 UD6$ )a¤  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should either be in `[0, transformers.,
    config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
    (masked), the loss is only computed for the tokens with labels in `[0, transformers., config.vocab_size]`.

Example:

```python
>>> from transformers import AutoTokenizer, BitNetForCausalLM

>>> model = BitNetForCausalLM.from_pretrained("microsoft/bitnet-b1.58-2B-4T")
>>> tokenizer = AutoTokenizer.from_pretrained("microsoft/bitnet-b1.58-2B-4T")

>>> prompt = f'<|begin_of_text|>User: Hey, are you conscious? Can you talk to me?<|eot_id|>Assistant: '
>>> inputs = tokenizer(prompt, return_tensors="pt")

>>> # Generate
>>> generate_ids = model.generate(inputs.input_ids, max_length=100)
>>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
"User: Hey, are you conscious? Can you talk to me?Assistant: No, I'm not conscious. I'm an artificial intelligence designed to assist with information and tasks. How can I help you today?"
```r   )r,   r:   )r1   Úsuper_kwargsr2   s     €r#   r:   ÚBitNetForCausalLM.forward~   s   ø€ ô4 ‰wŠÑ. Ñ.Ð.r"   r   )r   r   r   r    Ú_tied_weights_keysÚ_tp_planÚ_pp_planr	   r:   r!   r<   r=   s   @r#   r|   r|   y   s*   ø† Ø*Ð+ÐØ€HØ€Hð/ð 
 ÷/õ /r"   r|   )r|   ry   ÚBitNetPreTrainedModel)(Ú__doc__Útypingr   r   r   rr   Úcache_utilsr   Úmodeling_flash_attention_utilsr   Úmodeling_outputsr	   Úmodeling_utilsr
   Úprocessing_utilsr   Úutilsr   Úgemma.modeling_gemmar   Úllama.modeling_llamar   r   r   r   r   r   r   Úconfiguration_bitnetr   Ú
get_loggerr   r`   r   r&   r?   rv   ry   r|   Ú__all__r   r"   r#   Ú<module>r’      s¡   ðñ ç ,Ñ ,ã å  Ý BÝ 6Ý 5Ý &Ý Ý +÷÷ ñ õ /ð 
×	Ò	˜HÓ	%€ô	Lô 	ôô ô6)nô 6)ôr	Ð*ô 	ô	*ô 	ô/Ð(ô /òDr"   