o
    ZhD                     @   s  d Z ddlmZmZmZ ddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZmZmZmZmZ ddlmZ eeZ G dd deZ!G dd deZ"G dd deZ#G dd deZ$G dd deZ%G dd deZ&g dZ'dS )zPyTorch BitNet model.    )CallableOptionalTupleN   )Cache)FlashAttentionKwargs)CausalLMOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)logging   )GemmaMLP)LlamaAttentionLlamaDecoderLayerLlamaForCausalLM
LlamaModelLlamaRMSNormapply_rotary_pos_embeager_attention_forward   )BitNetConfigc                   @      e Zd ZdS )BitNetRMSNormN__name__
__module____qualname__ r   r   X/var/www/auris/lib/python3.10/site-packages/transformers/models/bitnet/modular_bitnet.pyr   *       r   c                       s*   e Zd Zdef fddZdd Z  ZS )	BitNetMLPconfigc                    s"   t  | t|j|jd| _d S N)Zeps)super__init__r   Zintermediate_sizerms_norm_epsffn_sub_norm)selfr!   	__class__r   r   r$   /   s   zBitNetMLP.__init__c              	   C   s*   |  | | | || | }|S )N)	down_projr&   Zact_fnZ	gate_projZup_proj)r'   xr*   r   r   r   forward3   s   &zBitNetMLP.forward)r   r   r   r   r$   r,   __classcell__r   r   r(   r   r    .   s    r    c                       s   e Zd Zdedef fddZ		ddejdeejejf de	ej d	e	e
 d
e	ej dee deeje	ej e	eej  f fddZ  ZS )BitNetAttentionr!   	layer_idxc                    s$   t  || t|j|jd| _d S r"   )r#   r$   r   Zhidden_sizer%   attn_sub_norm)r'   r!   r/   r(   r   r   r$   9   s   zBitNetAttention.__init__Nhidden_statesposition_embeddingsattention_maskpast_key_valuecache_positionkwargsreturnc                 K   sR  |j d d }g |d| jR }| ||dd}	| ||dd}
| ||dd}|\}}t|	|
||\}	}
|d urW|||d}||
|| j	|\}
}t
}| jjdkrw| jjdkrq|ddrqtd	 nt| jj }|| |	|
||f| jsd
n| j| jd|\}}|jg |dR   }| |}| |}||fS )Nr   r   )sincosr5   eagerZsdpaZoutput_attentionsFz`torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to eager attention. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.g        )Zdropoutscaling)shapeZhead_dimZq_projviewZ	transposeZk_projZv_projr   updater/   r   r!   Z_attn_implementationgetloggerZwarning_oncer	   ZtrainingZattention_dropoutr<   Zreshape
contiguousr0   Zo_proj)r'   r1   r2   r3   r4   r5   r6   Zinput_shapeZhidden_shapeZquery_statesZ
key_statesZvalue_statesr:   r9   Zcache_kwargsZattention_interfaceZattn_outputZattn_weightsr   r   r   r,   =   sB   	


zBitNetAttention.forward)NN)r   r   r   r   intr$   torchZTensorr   r   r   Z
LongTensorr
   r   r,   r-   r   r   r(   r   r.   8   s&    	r.   c                   @   r   )BitNetDecoderLayerNr   r   r   r   r   rE   q   r   rE   c                   @   r   )BitNetModelNr   r   r   r   r   rF   u   r   rF   c                       s0   e Zd ZdgZdZdZdef fddZ  ZS )BitNetForCausalLMzlm_head.weightNr7   c                    s   t  jdi |S )a$  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, transformers.,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, transformers., config.vocab_size]`.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, BitNetForCausalLM

        >>> model = BitNetForCausalLM.from_pretrained("microsoft/bitnet-b1.58-2B-4T")
        >>> tokenizer = AutoTokenizer.from_pretrained("microsoft/bitnet-b1.58-2B-4T")

        >>> prompt = f'<|begin_of_text|>User: Hey, are you conscious? Can you talk to me?<|eot_id|>Assistant: '
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=100)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "User: Hey, are you conscious? Can you talk to me?Assistant: No, I'm not conscious. I'm an artificial intelligence designed to assist with information and tasks. How can I help you today?"
        ```Nr   )r#   r,   )r'   Zsuper_kwargsr(   r   r   r,   ~   s   zBitNetForCausalLM.forward)	r   r   r   Z_tied_weights_keysZ_tp_planZ_pp_planr   r,   r-   r   r   r(   r   rG   y   s    rG   )rG   rF   ZBitNetPreTrainedModel)(__doc__typingr   r   r   rD   Zcache_utilsr   Zmodeling_flash_attention_utilsr   Zmodeling_outputsr   Zmodeling_utilsr	   Zprocessing_utilsr
   utilsr   Zgemma.modeling_gemmar   Zllama.modeling_llamar   r   r   r   r   r   r   Zconfiguration_bitnetr   Z
get_loggerr   rA   r   r    r.   rE   rF   rG   __all__r   r   r   r   <module>   s(   $	

9"