
    fTh                        S SK r S SKJr  S SKrS SKJr  S SKrSSKJr  SSK	J
r
JrJr  SSKJr  SSKJrJrJrJrJr  S	S
KJr  \R.                  " \5      r " S S\R4                  5      r " S S\5      r " S S\5      rS rS"S jr " S S\5      r  " S S\5      r! " S S\5      r" " S S\"\5      r# " S S\
5      r$ " S S\5      r% " S S \5      r&/ S!Qr'g)#    N)Optional   )logging   )GemmaForCausalLMGemmaForSequenceClassificationGemmaForTokenClassification)GraniteAttention)LlamaDecoderLayerLlamaMLP
LlamaModelLlamaPreTrainedModelLlamaRotaryEmbedding   )HeliumConfigc                   8   ^  \ rS rSrSU 4S jjrS rS rSrU =r$ )HeliumRMSNorm!   c                    > [         TU ]  5         [        R                  " [        R
                  " U5      5      U l        X l        g N)super__init__nn	Parametertorchonesweightvariance_epsilon)selfhidden_sizeeps	__class__s      a/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/helium/modular_helium.pyr   HeliumRMSNorm.__init__"   s-    ll5::k#:; #    c                 V   UR                   nUR                  [        R                  5      nUR	                  S5      R                  SSS9nU[        R                  " X0R                  -   5      -  nU R                  R                  [        R                  5      U-  R                  U5      $ )Nr   T)keepdim)	dtypetor   float32powmeanrsqrtr   r   )r   hidden_statesinput_dtypevariances       r#   forwardHeliumRMSNorm.forward'   s    #))%((7 $$Q',,R,>%H?T?T4T(UUu}}-=AA+NNr%   c                 ^    [        U R                  R                  5       SU R                   3$ )Nz, eps=)tupler   shaper   )r   s    r#   
extra_reprHeliumRMSNorm.extra_repr.   s*    ))*+6$2G2G1HIIr%   )r   r   )gư>)	__name__
__module____qualname____firstlineno__r   r2   r7   __static_attributes____classcell__r"   s   @r#   r   r   !   s    $
OJ Jr%   r   c                       \ rS rSrSrg)HeliumRotaryEmbedding2    Nr9   r:   r;   r<   r=   rC   r%   r#   rA   rA   2       r%   rA   c                       \ rS rSrSrg)	HeliumMLP6   rC   NrD   rC   r%   r#   rG   rG   6   rE   r%   rG   c                 x    U SSSS24   nU SSSS24   n[         R                  " U* U4SS9R                  S5      $ )	z*Rotates half the hidden dims of the input..r   Nr   r   r'   dim)r   stackflatten)xx1x2s      r#   rotate_halfrR   :   sJ    	
319B	
319B;;Ryb)11"55r%   c                 4   UR                  U5      nUR                  U5      nUSSUR                  S   S-  24   R                  SSS9nUSSUR                  S   S-  24   R                  SSS9nX-  [        U 5      U-  -   nX-  [        U5      U-  -   nXg4$ )a  Applies Rotary Position Embedding to the query and key tensors.

Args:
    q (`torch.Tensor`): The query tensor.
    k (`torch.Tensor`): The key tensor.
    cos (`torch.Tensor`): The cosine part of the rotary embedding.
    sin (`torch.Tensor`): The sine part of the rotary embedding.
    position_ids (`torch.Tensor`, *optional*):
        Deprecated and unused.
    unsqueeze_dim (`int`, *optional*, defaults to 1):
        The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
        sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
        that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
        k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
        cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
        the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
Returns:
    `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
.Nr'   r   rJ   )	unsqueezer6   repeat_interleaverR   )qkcossinposition_idsunsqueeze_dimq_embedk_embeds           r#   apply_rotary_pos_embr^   A   s    ( --
&C
--
&C c'SYYr]a'''
(
:
:1"
:
EC
c'SYYr]a'''
(
:
:1"
:
ECw;q>C/0Gw;q>C/0Gr%   c                   >   ^  \ rS rSrSS\S\\   4U 4S jjjrSrU =r	$ )HeliumAttentionb   config	layer_idxc                    > [         TU ]  X5        [        R                  " UR                  UR                  SS9U l        S[        R                  " U R                  5      -  U l	        g )NF)biasr   )
r   r   r   Linearr    o_projmathsqrthead_dimscalingr   rb   rc   r"   s      r#   r   HeliumAttention.__init__c   sI    +ii 2 2F4F4FUS499T]]33r%   )rg   rk   r   
r9   r:   r;   r<   r   r   intr   r=   r>   r?   s   @r#   r`   r`   b   s    4| 4 4 4r%   r`   c                   >   ^  \ rS rSrSS\S\\   4U 4S jjjrSrU =r	$ )HeliumDecoderLayeri   rb   rc   c                    > [         TU ]  5         [        U5      U l        [	        UR
                  UR                  S9U l        [	        UR
                  UR                  S9U l        g )Nr!   )	r   r   rG   mlpr   r    rms_norm_epsinput_layernormpost_attention_layernormrl   s      r#   r   HeliumDecoderLayer.__init__j   sP    V$,V-?-?VEXEXY(5f6H6HfNaNa(b%r%   )rw   ru   rx   r   rn   r?   s   @r#   rq   rq   i   s#    c| c c cr%   rq   c                       \ rS rSrSrg)HeliumPreTrainedModelr   rC   NrD   rC   r%   r#   r{   r{   r   rE   r%   r{   c                   0   ^  \ rS rSrS\4U 4S jjrSrU =r$ )HeliumModelv   rb   c           	      R  > [         TU ]  U5        [        R                  " [	        UR
                  5       Vs/ s H  n[        X5      PM     sn5      U l        [        UR                  UR                  S9U l        [        U5      U l        SU l        U R                  5         g s  snf )Nrt   F)r   r   r   
ModuleListrangenum_hidden_layersrq   layersr   r    rv   normrA   
rotary_embgradient_checkpointing	post_initrl   s      r#   r   HeliumModel.__init__w   s     mmDI&JbJbDcdDcy2Dcd
 "&"4"4&:M:MN	/7&+# 	 es   B$)r   r   r   r   r9   r:   r;   r<   r   r   r=   r>   r?   s   @r#   r~   r~   v   s    
| 
 
r%   r~   c                   0   ^  \ rS rSrS\4U 4S jjrSrU =r$ )HeliumForCausalLM   rb   c                 d   > [         TU ]  U5        [        U5      U l        U R	                  5         g r   r   r   r~   modelr   r   rb   r"   s     r#   r   HeliumForCausalLM.__init__   &      (
r%   r   r   r?   s   @r#   r   r          |  r%   r   c                   0   ^  \ rS rSrS\4U 4S jjrSrU =r$ )HeliumForSequenceClassification   rb   c                 d   > [         TU ]  U5        [        U5      U l        U R	                  5         g r   r   r   s     r#   r   (HeliumForSequenceClassification.__init__   r   r%   r   r   r?   s   @r#   r   r      r   r%   r   c                   0   ^  \ rS rSrS\4U 4S jjrSrU =r$ )HeliumForTokenClassification   rb   c                 d   > [         TU ]  U5        [        U5      U l        U R	                  5         g r   r   r   s     r#   r   %HeliumForTokenClassification.__init__   r   r%   r   r   r?   s   @r#   r   r      r   r%   r   )r{   r~   r   r   r   )Nr   )(rh   typingr   r   torch.nnr   torch.utils.checkpointutilsr   gemma.modeling_gemmar   r   r	   granite.modeling_graniter
   llama.modeling_llamar   r   r   r   r   configuration_heliumr   
get_loggerr9   loggerModuler   rA   rG   rR   r^   r`   rq   r{   r~   r   r   r   __all__rC   r%   r#   <module>r      s           p p 7 v v . 
		H	%JBII J"	0 		 	6B4& 4c* c	0 	' ( &D #> r%   