o
    Zh                     @   s0  d dl mZmZmZ d dlZddlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZmZ d
dlmZmZmZmZ d
dlmZ ddlmZ ddlmZ eeZ dZ!G dd deZ"G dd deZ#G dd deZ$G dd de	eZ%G dd deZ&G dd deZ'G dd deZ(g dZ)dS )     )OptionalTupleUnionN   )Cache)FlashAttentionKwargs)GradientCheckpointingLayer)CausalLMOutputWithPast)Unpack)
LossKwargslogging   )GlmAttentionGlmForCausalLMGlmForSequenceClassificationGlmForTokenClassification)Phi3MLP   )
Glm4Config)Glm4RMSNormzTHUDM/GLM-4-9B-Chat-0414c                   @      e Zd ZdS )Glm4MLPN__name__
__module____qualname__ r   r   T/var/www/auris/lib/python3.10/site-packages/transformers/models/glm4/modular_glm4.pyr   %       r   c                       s   e Zd Zdedef fddZ							ddejdeej d	eej	 d
ee
 dee dee deej	 deeejejf  dee deejeeejejf  f fddZ  ZS )Glm4DecoderLayerconfig	layer_idxc                    sv   t    |j| _t||d| _t|| _t|j|jd| _	t|j|jd| _
t|j|jd| _t|j|jd| _d S )N)r    r!   )Zeps)super__init__Zhidden_sizeGlm4Attention	self_attnr   mlpr   Zrms_norm_epsinput_layernormpost_attention_layernormpost_self_attn_layernormpost_mlp_layernorm)selfr    r!   	__class__r   r   r#   *   s   

zGlm4DecoderLayer.__init__NFhidden_statesattention_maskposition_idspast_key_valueoutput_attentions	use_cachecache_positionposition_embeddingskwargsreturnc	                 K   s   |}
|  |}| jd||||||||d|	\}}| |}|
| }|}
| |}| |}| |}|
| }|f}|rB||f7 }|S )N)r.   r/   r0   r1   r2   r3   r4   r5   r   )r'   r%   r)   r(   r&   r*   )r+   r.   r/   r0   r1   r2   r3   r4   r5   r6   ZresidualZself_attn_weightsZoutputsr   r   r   forward5   s2   
	





zGlm4DecoderLayer.forward)NNNFFNN)r   r   r   r   intr#   torchZTensorr   Z
LongTensorr   boolr   r
   r   ZFloatTensorr8   __classcell__r   r   r,   r   r   )   s<    	
r   c                   @   r   )r$   Nr   r   r   r   r   r$   c   r   r$   c                   @   r   )KwargsForCausalLMNr   r   r   r   r   r=   g   s    r=   c                       s2   e Zd Zdee deeef f fddZ  Z	S )Glm4ForCausalLMsuper_kwargsr7   c                    s   t  jdi |S )ar  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, Glm4ForCausalLM

        >>> model = Glm4ForCausalLM.from_pretrained("THUDM/GLM-4-9B-Chat-0414")
        >>> tokenizer = AutoTokenizer.from_pretrained("THUDM/GLM-4-9B-Chat-0414")

        >>> prompt = "Hey, are you conscious? Can you talk to me?"
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
        ```Nr   )r"   r8   )r+   r?   r,   r   r   r8   k   s   zGlm4ForCausalLM.forward)
r   r   r   r
   r=   r   r   r	   r8   r<   r   r   r,   r   r>   j   s    
r>   c                   @   r   )Glm4ForSequenceClassificationNr   r   r   r   r   r@      r   r@   c                   @   r   )Glm4ForTokenClassificationNr   r   r   r   r   rA      r   rA   )ZGlm4PreTrainedModelZ	Glm4Modelr>   r@   rA   )*typingr   r   r   Ztorch.utils.checkpointr:   Zcache_utilsr   Zmodeling_flash_attention_utilsr   Zmodeling_layersr   Zmodeling_outputsr	   Zprocessing_utilsr
   utilsr   r   Zglm.modeling_glmr   r   r   r   Zphi3.modeling_phi3r   Zconfiguration_glm4r   Zmodeling_glm4r   Z
get_loggerr   loggerZ_CHECKPOINT_FOR_DOCr   r   r$   r=   r>   r@   rA   __all__r   r   r   r   <module>   s,   
: