o
    Zh                     @   sP  d dl Z d dlmZ d dlZd dlmZ d dlZddlmZ ddl	m
Z
mZmZ ddlmZ ddlmZmZmZmZmZ d	d
lmZ eeZG dd dejZG dd deZG dd deZdd Zd$ddZG dd deZ G dd deZ!G dd deZ"G dd de"eZ#G dd de
Z$G dd  d eZ%G d!d" d"eZ&g d#Z'dS )%    N)Optional   )logging   )GemmaForCausalLMGemmaForSequenceClassificationGemmaForTokenClassification)GraniteAttention)LlamaDecoderLayerLlamaMLP
LlamaModelLlamaPreTrainedModelLlamaRotaryEmbedding   )HeliumConfigc                       s.   e Zd Zd fdd	Zdd Zdd Z  ZS )	HeliumRMSNormư>c                    s&   t    tt|| _|| _d S N)super__init__nn	ParametertorchZonesweightvariance_epsilon)selfhidden_sizeeps	__class__ X/var/www/auris/lib/python3.10/site-packages/transformers/models/helium/modular_helium.pyr   "   s   

zHeliumRMSNorm.__init__c                 C   sR   |j }|tj}|djddd}|t|| j  }| jtj| |S )Nr   T)Zkeepdim)	Zdtypetor   Zfloat32powmeanZrsqrtr   r   )r   Zhidden_statesZinput_dtypeZvariancer    r    r!   forward'   s
   zHeliumRMSNorm.forwardc                 C   s   t | jj d| j S )Nz, eps=)tupler   shaper   )r   r    r    r!   
extra_repr.   s   zHeliumRMSNorm.extra_repr)r   )__name__
__module____qualname__r   r&   r)   __classcell__r    r    r   r!   r   !   s    r   c                   @      e Zd ZdS )HeliumRotaryEmbeddingNr*   r+   r,   r    r    r    r!   r/   2       r/   c                   @   r.   )	HeliumMLPNr0   r    r    r    r!   r2   6   r1   r2   c                 C   s>   | ddddf }| ddddf }t j| |fdddS )	z*Rotates half the hidden dims of the input..r   Nr   r   r"   dim)r   stackflatten)xx1Zx2r    r    r!   rotate_half:   s   r:   c                 C   s   | |}| |}|dd|jd d f jddd}|dd|jd d f jddd}| | t| |  }|| t||  }||fS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        position_ids (`torch.Tensor`, *optional*):
            Deprecated and unused.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    .Nr"   r   r3   )Z	unsqueezer(   Zrepeat_interleaver:   )qkcossinZposition_idsZunsqueeze_dimZq_embedZk_embedr    r    r!   apply_rotary_pos_embA   s   

$$r?   c                       ,   e Zd Zddedee f fddZ  ZS )HeliumAttentionNconfig	layer_idxc                    s:   t  || tj|j|jdd| _dt| j | _	d S )NF)Zbiasr   )
r   r   r   ZLinearr   Zo_projmathsqrtZhead_dimZscalingr   rB   rC   r   r    r!   r   c   s   zHeliumAttention.__init__r   r*   r+   r,   r   r   intr   r-   r    r    r   r!   rA   b       $rA   c                       r@   )HeliumDecoderLayerNrB   rC   c                    s<   t    t|| _t|j|jd| _t|j|jd| _d S )Nr   )	r   r   r2   Zmlpr   r   rms_norm_epsZinput_layernormZpost_attention_layernormrF   r   r    r!   r   j   s   

zHeliumDecoderLayer.__init__r   rG   r    r    r   r!   rJ   i   rI   rJ   c                   @   r.   )HeliumPreTrainedModelNr0   r    r    r    r!   rM   r   r1   rM   c                       "   e Zd Zdef fddZ  ZS )HeliumModelrB   c                    sZ   t    t fddt jD | _t j j	d| _
t | _d| _|   d S )Nc                    s   g | ]}t  |qS r    )rJ   ).0rC   rB   r    r!   
<listcomp>z   s    z(HeliumModel.__init__.<locals>.<listcomp>rK   F)r   r   r   Z
ModuleListrangeZnum_hidden_layersZlayersr   r   rL   Znormr/   Z
rotary_embZgradient_checkpointing	post_initr   rB   r   rQ   r!   r   w   s   
zHeliumModel.__init__r*   r+   r,   r   r   r-   r    r    r   r!   rO   v       rO   c                       rN   )HeliumForCausalLMrB   c                    "   t  | t|| _|   d S r   r   r   rO   modelrT   rU   r   r    r!   r         
zHeliumForCausalLM.__init__rV   r    r    r   r!   rX      rW   rX   c                       rN   )HeliumForSequenceClassificationrB   c                    rY   r   rZ   rU   r   r    r!   r      r\   z(HeliumForSequenceClassification.__init__rV   r    r    r   r!   r]      rW   r]   c                       rN   )HeliumForTokenClassificationrB   c                    rY   r   rZ   rU   r   r    r!   r      r\   z%HeliumForTokenClassification.__init__rV   r    r    r   r!   r^      rW   r^   )rM   rO   rX   r]   r^   )Nr   )(rD   typingr   r   Ztorch.nnr   Ztorch.utils.checkpointutilsr   Zgemma.modeling_gemmar   r   r   Zgranite.modeling_graniter	   Zllama.modeling_llamar
   r   r   r   r   Zconfiguration_heliumr   Z
get_loggerr*   loggerModuler   r/   r2   r:   r?   rA   rJ   rM   rO   rX   r]   r^   __all__r    r    r    r!   <module>   s0   

!	