o
    Zh                     @   s   d dl mZ d dlZd dlmZ d dlZddlmZ ddlm	Z	m
Z
mZmZ ddlmZ dd	lmZ eeZd
ZG dd deZdd ZdddZG dd de	ZG dd de
ZG dd deZG dd deZg dZdS )    )OptionalN   )logging   )LlamaAttentionLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassification)Phi3MLP   )	GlmConfigzTHUDM/glm-4-9bc                   @      e Zd ZdS )GlmMLPN__name__
__module____qualname__ r   r   R/var/www/auris/lib/python3.10/site-packages/transformers/models/glm/modular_glm.pyr   &       r   c                 C   s>   | ddddf }| ddddf }t j| |fdddS )	z*Rotates half the hidden dims of the input..r   Nr   r   dim)torchstackflatten)xx1Zx2r   r   r   rotate_half*   s   r   c                 C   s   | |}| |}|dd|jd d f jddd}|dd|jd d f jddd}|jd }| dd|f | d|df }}|dd|f |d|df }	}
|| t||  }|	| t|	|  }tj||gdd}tj||
gdd}||fS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        position_ids (`torch.Tensor`, *optional*):
            Deprecated and unused.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    .Nr   r   r   )Z	unsqueezeshapeZrepeat_interleaver   r   cat)qkcossinZposition_idsZunsqueeze_dimZ
rotary_dimZq_rotZq_passZk_rotZk_passZq_embedZk_embedr   r   r   apply_rotary_pos_emb1   s   

$$
""r&   c                       s,   e Zd Zddedee f fddZ  ZS )GlmAttentionNconfig	layer_idxc                    s.   t  || tj|j| j |jdd| _d S )NF)Zbias)super__init__nnZLinearZnum_attention_headsZhead_dimZhidden_sizeZo_proj)selfr(   r)   	__class__r   r   r+   \   s    zGlmAttention.__init__)N)r   r   r   r   r   intr+   __classcell__r   r   r.   r   r'   [   s    $r'   c                   @   r   )GlmForCausalLMNr   r   r   r   r   r2   a   r   r2   c                   @   r   )GlmForSequenceClassificationNr   r   r   r   r   r3   e   r   r3   c                   @   r   )GlmForTokenClassificationNr   r   r   r   r   r4   i   r   r4   )ZGlmPreTrainedModelZGlmModelr2   r3   r4   )Nr   )typingr   r   Ztorch.nnr,   Ztorch.utils.checkpointutilsr   Zllama.modeling_llamar   r   r   r	   Zphi3.modeling_phi3r
   Zconfiguration_glmr   Z
get_loggerr   loggerZ_CHECKPOINT_FOR_DOCr   r   r&   r'   r2   r3   r4   __all__r   r   r   r   <module>   s$   

*