
    fThD                     X   S r SSKJrJrJr  SSKrSSKJr  SSKJ	r	  SSK
Jr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJrJrJrJrJrJrJr  SSKJr  \R<                  " \5      r  " S S\5      r! " S S\5      r" " S S\5      r# " S S\5      r$ " S S\5      r% " S S\5      r&/ SQr'g)zPyTorch BitNet model.    )CallableOptionalTupleN   )Cache)FlashAttentionKwargs)CausalLMOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)logging   )GemmaMLP)LlamaAttentionLlamaDecoderLayerLlamaForCausalLM
LlamaModelLlamaRMSNormapply_rotary_pos_embeager_attention_forward   )BitNetConfigc                       \ rS rSrSrg)BitNetRMSNorm*    N__name__
__module____qualname____firstlineno____static_attributes__r       a/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/bitnet/modular_bitnet.pyr   r   *       r"   r   c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )	BitNetMLP.   configc                 j   > [         TU ]  U5        [        UR                  UR                  S9U l        g N)eps)super__init__r   intermediate_sizerms_norm_epsffn_sub_norm)selfr(   	__class__s     r#   r-   BitNetMLP.__init__/   s+     )&*B*BH[H[\r"   c           	          U R                  U R                  U R                  U R                  U5      5      U R	                  U5      -  5      5      nU$ )N)	down_projr0   act_fn	gate_projup_proj)r1   xr5   s      r#   forwardBitNetMLP.forward3   sF    NN4#4#4T[[PQAR5SVZVbVbcdVe5e#fg	r"   )r0   )	r   r   r   r    r   r-   r:   r!   __classcell__r2   s   @r#   r&   r&   .   s    ]| ] r"   r&   c                   B  ^  \ rS rSrS\S\4U 4S jjr  SS\R                  S\	\R                  \R                  4   S\
\R                     S\
\   S	\
\R                     S
\\   S\	\R                  \
\R                     \
\	\R                        4   4S jjrSrU =r$ )BitNetAttention8   r(   	layer_idxc                 j   > [         TU ]  X5        [        UR                  UR                  S9U l        g r*   )r,   r-   r   hidden_sizer/   attn_sub_norm)r1   r(   rA   r2   s      r#   r-   BitNetAttention.__init__9   s+    +*6+=+=6CVCVWr"   hidden_statesposition_embeddingsattention_maskpast_key_valuecache_positionkwargsreturnc                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      n	U R                  U5      R                  U5      R	                  SS5      n
U R                  U5      R                  U5      R	                  SS5      nUu  p[        XX5      u  pUb$  XUS.nUR                  XU R                  U5      u  p[        nU R                  R                  S:w  ad  U R                  R                  S:X  a-  UR                  SS5      (       a  [        R                  S	5        O[         U R                  R                     nU" U U	U
UU4U R"                  (       d  S
OU R$                  U R&                  S.UD6u  nnUR(                  " / UQSP76 R+                  5       nU R-                  U5      nU R/                  U5      nUU4$ )Nr   r   )sincosrJ   eagersdpaoutput_attentionsFz`torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to eager attention. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.g        )dropoutscaling)shapehead_dimq_projview	transposek_projv_projr   updaterA   r   r(   _attn_implementationgetloggerwarning_oncer
   trainingattention_dropoutrU   reshape
contiguousrD   o_proj)r1   rF   rG   rH   rI   rJ   rK   input_shapehidden_shapequery_states
key_statesvalue_statesrP   rO   cache_kwargsattention_interfaceattn_outputattn_weightss                     r#   r:   BitNetAttention.forward=   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&#7RU#[ %#&nUL'5'<'<ZW[WeWegs't$J(?;;++w6{{//69fjjI\^c>d>d##L
 '>dkk>^>^&_#$7	%
  $}}C$2H2HLL	%
 	%
!\ "));;;;FFH((5kk+.L((r"   )rD   )NN)r   r   r   r    r   intr-   torchTensorr   r   r   
LongTensorr   r   r:   r!   r<   r=   s   @r#   r?   r?   8   s    X| X X +/591)||1) #5<<#=>1) !.	1)
 !1) !!1!121) -.1) 
u||Xell3XeELL>Q5RR	S1) 1)r"   r?   c                       \ rS rSrSrg)BitNetDecoderLayerq   r   Nr   r   r"   r#   rv   rv   q   r$   r"   rv   c                       \ rS rSrSrg)BitNetModelu   r   Nr   r   r"   r#   ry   ry   u   r$   r"   ry   c                   >   ^  \ rS rSrS/rSrSrS\4U 4S jjrSr	U =r
$ )BitNetForCausalLMy   zlm_head.weightNrL   c                 $   > [         TU ]  " S0 UD6$ )a  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should either be in `[0, transformers.,
    config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
    (masked), the loss is only computed for the tokens with labels in `[0, transformers., config.vocab_size]`.

Example:

```python
>>> from transformers import AutoTokenizer, BitNetForCausalLM

>>> model = BitNetForCausalLM.from_pretrained("microsoft/bitnet-b1.58-2B-4T")
>>> tokenizer = AutoTokenizer.from_pretrained("microsoft/bitnet-b1.58-2B-4T")

>>> prompt = f'<|begin_of_text|>User: Hey, are you conscious? Can you talk to me?<|eot_id|>Assistant: '
>>> inputs = tokenizer(prompt, return_tensors="pt")

>>> # Generate
>>> generate_ids = model.generate(inputs.input_ids, max_length=100)
>>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
"User: Hey, are you conscious? Can you talk to me?Assistant: No, I'm not conscious. I'm an artificial intelligence designed to assist with information and tasks. How can I help you today?"
```r   )r,   r:   )r1   super_kwargsr2   s     r#   r:   BitNetForCausalLM.forward~   s    4 w...r"   r   )r   r   r   r    _tied_weights_keys_tp_plan_pp_planr	   r:   r!   r<   r=   s   @r#   r|   r|   y   s*    *+HH/ 
 / /r"   r|   )r|   ry   BitNetPreTrainedModel)(__doc__typingr   r   r   rr   cache_utilsr   modeling_flash_attention_utilsr   modeling_outputsr	   modeling_utilsr
   processing_utilsr   utilsr   gemma.modeling_gemmar   llama.modeling_llamar   r   r   r   r   r   r   configuration_bitnetr   
get_loggerr   r`   r   r&   r?   rv   ry   r|   __all__r   r"   r#   <module>r      s     , ,    B 6 5 &  +   / 
		H	%	L 	 6)n 6)r	* 		* 	/( /Dr"   