
    fThS                        S SK JrJrJrJrJrJr  S SKrS SK	r	S SK
r	S SK	Jr  SSKJrJr  SSKJr  SSKJr  SSKJrJr  SS	KJr  S
SKJrJrJrJrJr  S
SKJr  \(       a  SSK J!r!  SS0r"Sr#\RH                  " \%5      r& " S S\5      r' " S S\\5      r( " S S\RR                  5      r* " S S\5      r+ " S S\5      r, " S S\5      r- " S S\5      r. " S S \5      r// S!Qr0g)"    )TYPE_CHECKINGAnyDictListOptionalUnionN)nn   )CacheDynamicCache)PretrainedConfig)BaseModelOutputWithPast)
AddedTokenPreTrainedTokenizer)logging   )LlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLP
LlamaModel)LlamaTokenizer)	TextInput
vocab_fileztokenizer.modelu   ▁c                      ^  \ rS rSrSrSrS/rSSSSSSSS.rS/S	/4S
S/S
/4S
/S
/4S.r                    SU 4S jjr	Sr
U =r$ )GemmaConfig1   a  
This is the configuration class to store the configuration of a [`GemmaModel`]. It is used to instantiate an Gemma
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
defaults will yield a similar configuration to that of the Gemma-7B.
e.g. [google/gemma-7b](https://huggingface.co/google/gemma-7b)
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.
Args:
    vocab_size (`int`, *optional*, defaults to 256000):
        Vocabulary size of the Gemma model. Defines the number of different tokens that can be represented by the
        `inputs_ids` passed when calling [`GemmaModel`]
    hidden_size (`int`, *optional*, defaults to 3072):
        Dimension of the hidden representations.
    intermediate_size (`int`, *optional*, defaults to 24576):
        Dimension of the MLP representations.
    num_hidden_layers (`int`, *optional*, defaults to 28):
        Number of hidden layers in the Transformer decoder.
    num_attention_heads (`int`, *optional*, defaults to 16):
        Number of attention heads for each attention layer in the Transformer decoder.
    num_key_value_heads (`int`, *optional*, defaults to 16):
        This is the number of key_value heads that should be used to implement Grouped Query Attention. If
        `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
        `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
        converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
        by meanpooling all the original heads within that group. For more details checkout [this
        paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
        `num_attention_heads`.
    head_dim (`int`, *optional*, defaults to 256):
        The attention head dimension.
    hidden_act (`str` or `function`, *optional*, defaults to `"gelu_pytorch_tanh"`):
        The legacy activation function. It is overwritten by the `hidden_activation`.
    hidden_activation (`str` or `function`, *optional*):
        The non-linear activation function (function or string) in the decoder. Will default to `"gelu_pytorch_tanh"`
        if not specified. `"gelu_pytorch_tanh"` uses an approximation of the `"gelu"` activation function.
    max_position_embeddings (`int`, *optional*, defaults to 8192):
        The maximum sequence length that this model might ever be used with.
    initializer_range (`float`, *optional*, defaults to 0.02):
        The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
    rms_norm_eps (`float`, *optional*, defaults to 1e-06):
        The epsilon used by the rms normalization layers.
    use_cache (`bool`, *optional*, defaults to `True`):
        Whether or not the model should return the last key/values attentions (not used by all models). Only
        relevant if `config.is_decoder=True`.
    pad_token_id (`int`, *optional*, defaults to 0):
        Padding token id.
    eos_token_id (`int`, *optional*, defaults to 1):
        End of stream token id.
    bos_token_id (`int`, *optional*, defaults to 2):
        Beginning of stream token id.
    tie_word_embeddings (`bool`, *optional*, defaults to `True`):
        Whether to tie weight embeddings
    rope_theta (`float`, *optional*, defaults to 10000.0):
        The base period of the RoPE embeddings.
    attention_bias (`bool`, defaults to `False`, *optional*, defaults to `False`):
        Whether to use a bias in the query, key, value and output projection layers during self-attention.
    attention_dropout (`float`, *optional*, defaults to 0.0):
        The dropout ratio for the attention probabilities.
```python
>>> from transformers import GemmaModel, GemmaConfig
>>> # Initializing a Gemma gemma-7b style configuration
>>> configuration = GemmaConfig()
>>> # Initializing a model from the gemma-7b style configuration
>>> model = GemmaModel(configuration)
>>> # Accessing the model configuration
>>> configuration = model.config
```gemmapast_key_valuescolwiserowwise)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormc                    > Xl         Xl        X l        X0l        X@l        XPl        Xpl        X`l        Xl        Xl	        Xl
        Xl        Xl        UU l        UU l        UU l        [         TU ]D  " SUUUUS.UD6  g )N)pad_token_idbos_token_ideos_token_idtie_word_embeddings )
vocab_sizemax_position_embeddingshidden_sizeintermediate_sizenum_hidden_layersnum_attention_headshead_dimnum_key_value_heads
hidden_acthidden_activationinitializer_rangerms_norm_eps	use_cache
rope_thetaattention_biasattention_dropoutsuper__init__)selfr/   r1   r2   r3   r4   r6   r5   r7   r8   r0   r9   r:   r;   r*   r,   r+   r-   r<   r=   r>   kwargs	__class__s                         _/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/gemma/modular_gemma.pyr@   GemmaConfig.__init__   s    0 %'>$&!2!2#6  #6 $!2!2("$,!2 	
%%% 3		

 	
    )r=   r>   r5   r7   r8   r1   r9   r2   r0   r4   r3   r6   r:   r<   r;   r/   )i  i   i `        rH      gelu_pytorch_tanhNi    g{Gz?ư>Tr      r   Tg     @Fg        )__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planr@   __static_attributes____classcell__rC   s   @rD   r   r   1   s    AF J#4"5%.%.%.%."+ )"+ &(9:#%568IJ!"_$56 & $ +/
 /
rF   r   c            	           \ rS rSrSr          SS\\\\4      4S jjr	S r
S rSS	S
\\   4S jrS r  SS\\   S\S\S
\4S jjrS rSrg)GemmaTokenizer   aa
  
Construct a Gemma tokenizer. Based on byte-level Byte-Pair-Encoding. The default padding token is unset as there is
no padding token in the original model.

Args:
    vocab_file (`str`):
        Path to the vocabulary file.
    unk_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<unk>"`):
        The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
        token instead.
    bos_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<bos>"`):
        The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
    eos_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<eos>"`):
        The end of sequence token.
    pad_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<pad>"`):
        A special token used to make arrays of tokens the same size for batching purpose. Will then be ignored by
        attention mechanisms or loss computation.
    sp_model_kwargs (`Dict[str, Any]`, `Optional`, *optional*):
        Will be passed to the `SentencePieceProcessor.__init__()` method. The [Python wrapper for
        SentencePiece](https://github.com/google/sentencepiece/tree/master/python) can be used, among other things,
        to set:

        - `enable_sampling`: Enable subword regularization.
        - `nbest_size`: Sampling parameters for unigram. Invalid for BPE-Dropout.

          - `nbest_size = {0,1}`: No sampling is performed.
          - `nbest_size > 1`: samples from the nbest_size results.
          - `nbest_size < 0`: assuming that nbest_size is infinite and samples from the all hypothesis (lattice)
            using forward-filtering-and-backward-sampling algorithm.

        - `alpha`: Smoothing parameter for unigram sampling, and dropout probability of merge operations for
          BPE-dropout.

    add_bos_token (`bool`, *optional*, defaults to `True`):
        Whether or not to add an `bos_token` at the start of sequences.
    add_eos_token (`bool`, *optional*, defaults to `False`):
        Whether or not to add an `eos_token` at the end of sequences.
    clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
        Whether or not to cleanup spaces after decoding, cleanup consists in removing potential artifacts like
        extra spaces.
    use_default_system_prompt (`bool`, *optional*, defaults to `False`):
        Whether or not the default system prompt for Gemma should be used.
    spaces_between_special_tokens (`bool`, *optional*, defaults to `False`):
        Whether or not to add spaces between special tokens.
Nsp_model_kwargsc                     Uc  0 OUU l         [        U[        5      (       a  [        USSS9OUn[        U[        5      (       a  [        USSS9OUn[        U[        5      (       a  [        USSS9OUn[        U[        5      (       a  [        USSS9OUnXl        Xpl        Xl        Xl        [        R                  " S0 U R                   D6U l
        U R                  R                  U5        [        R                  " U 4UUUUUUUU	U
US.
UD6  g )NFT)
normalizedspecial)
	bos_token	eos_token	unk_token	pad_tokenadd_bos_tokenadd_eos_tokenr\   clean_up_tokenization_spacesuse_default_system_promptspaces_between_special_tokensr.   )r\   
isinstancestrr   r   rd   re   rg   spmSentencePieceProcessorsp_modelLoadr   r@   )rA   r   rb   r`   ra   rc   r\   rd   re   rf   rg   rh   rB   s                rD   r@   GemmaTokenizer.__init__   s    &5%<r/MWXacfMgMgJyUDImv	MWXacfMgMgJyUDImv	MWXacfMgMgJyUDImv	MWXacfMgMgJyUDImv	$**)B&22JT5I5IJ:&$$	
''+)E&?*G	
 	
rF   c                     [        S5      eNzNot needed for GemmaAttributeErrorrA   s    rD   get_spm_processor GemmaTokenizer.get_spm_processor      344rF   c                     [        S5      erq   rr   rt   s    rD   unk_token_lengthGemmaTokenizer.unk_token_length  rw   rF   textr   returnc                 0    [         R                  " X40 UD6$ )zE
Args:
    text: TextInput
Simply calls PreTrainedTokenizer's method
)r   tokenizerA   r{   rB   s      rD   r~   GemmaTokenizer.tokenize  s     #++DA&AArF   c                 >    U R                   R                  U[        S9$ )zf
Args:
    text: TextInput
Returns a tokenized string. The Gemma tokenizer never adds a prefix space.
)out_type)rm   encoderj   r   s      rD   	_tokenizeGemmaTokenizer._tokenize   s     }}##D3#77rF   	token_idsskip_special_tokensrh   c                 (   / n/ nU H  nU(       a  XpR                   ;   a  M  XpR                  ;   a]  U(       a*  UR                  U R                  R	                  U5      5        UR                  U R                  U   R
                  5        / nM  UR                  U5        M     U(       a*  UR                  U R                  R	                  U5      5        U(       a  SR                  U5      nOSR                  U5      nUR                  [        S5      $ )N  )	all_special_ids_added_tokens_decoderappendrm   decodecontentjoinreplaceSPIECE_UNDERLINE)rA   r   r   rh   rB   	sub_textscurrent_sub_textidss           rD   _decodeGemmaTokenizer._decode(  s     	C"s.B.B'B000#$$T]]%9%9:J%KL  !;!;C!@!H!HI#%  '',  T]]112BCD(+I	*I  !1377rF   c                     / nSnU HG  nX@R                   ;   a$  X0R                  R                  U5      U-   -  n/ nM6  UR                  U5        MI     X0R                  R                  U5      -  nU$ )z:Converts a sequence of tokens (string) in a single string.r   )_added_tokens_encoderrm   r   r   )rA   tokenscurrent_sub_tokens
out_stringtokens        rD   convert_tokens_to_string'GemmaTokenizer.convert_tokens_to_stringE  st    
E222mm223EFNN
%'""))%0  	mm**+=>>
rF   )rd   re   rm   r\   rg   r   )
z<unk>z<bos>z<eos>z<pad>NTFFFF)FF)rM   rN   rO   rP   rQ   r   r   rj   r   r@   ru   ry   r   r~   r   intboolr   r   rV   r.   rF   rD   rZ   rZ      s    ,b 48%*"'&+)
 "$sCx.1)
V55B[ BtCy B8 %*.3	898 "8 (,	8 
8:rF   rZ   c                   J   ^  \ rS rSrS	S\S\4U 4S jjjrS rS rS r	Sr
U =r$ )
GemmaRMSNormiT  dimepsc                    > [         TU ]  5         X l        [        R                  " [
        R                  " U5      5      U l        g )N)r?   r@   r   r	   	Parametertorchzerosweight)rA   r   r   rC   s      rD   r@   GemmaRMSNorm.__init__U  s,    ll5;;s#34rF   c                     U[         R                  " UR                  S5      R                  SSS9U R                  -   5      -  $ )Nr   T)keepdim)r   rsqrtpowmeanr   )rA   xs     rD   _normGemmaRMSNorm._normZ  s4    5;;quuQx}}R}>IJJJrF   c                     U R                  UR                  5       5      nUSU R                  R                  5       -   -  nUR                  U5      $ )Ng      ?)r   floatr   type_as)rA   r   outputs      rD   forwardGemmaRMSNorm.forward]  sC    AGGI& 3!2!2!445~~a  rF   c                 ^    [        U R                  R                  5       SU R                   3$ )Nz, eps=)tupler   shaper   rt   s    rD   
extra_reprGemmaRMSNorm.extra_reprd  s'    ))*+6$((<<rF   )r   r   )rK   )rM   rN   rO   rP   r   r   r@   r   r   r   rV   rW   rX   s   @rD   r   r   T  s0    5C 5e 5 5
K!= =rF   r   c                   (   ^  \ rS rSrU 4S jrSrU =r$ )GemmaMLPih  c                 <  > [         TU ]  5         [        R                  " U R                  U R
                  SS9U l        [        R                  " U R                  U R
                  SS9U l        [        R                  " U R
                  U R                  SS9U l        g )NF)bias)	r?   r@   r	   Linearr1   r2   	gate_projup_proj	down_proj)rA   configrC   s     rD   r@   GemmaMLP.__init__i  sq    4#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWXrF   )r   r   r   )rM   rN   rO   rP   r@   rV   rW   rX   s   @rD   r   r   h  s    Y YrF   r   c                      \ rS rSr         SS\\R                     S\\R                     S\\R                     S\\\	\
\R                     4      S\\R                     S\\   S	\\   S
\\   S\\R                     S\4S jjrSrg)
GemmaModelip  Nr"   r%   position_idsr   r#   r;   output_attentionsoutput_hidden_statescache_positionr|   c
                 d   Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUS L US L-  (       a  [	        S5      eU R
                  (       a/  U R                  (       a  U(       a  [        R                  S5        SnUc  U R                  U5      nU(       a  Uc
  [        5       nU	cD  Ub  UR                  5       OSn[        R                  " XUR                  S   -   UR                  S9n	Uc  U	R!                  S5      nU R#                  X%XU5      nUnU R%                  X5      n[        R&                  " U R                   R(                  S-  UR*                  S9nX-  nU(       a  S	OS nU(       a  S	OS nU R,                  S U R                   R.                    H4  nU(       a  UU4-  nU" UUUUUUU	US
9nUS   nU(       d  M+  UUS   4-  nM6     U R1                  U5      nU(       a  UU4-  n[3        UU(       a  UOS UUS9$ )Nz:You must specify exactly one of input_ids or inputs_embedszX`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.Fr   rL   )deviceg      ?)dtyper.   )r%   r   past_key_valuer   r;   r   position_embeddings)last_hidden_stater   r$   
attentions)r   r   r   r;   
ValueErrorgradient_checkpointingtrainingloggerwarning_oncer&   r   get_seq_lengthr   aranger   r   	unsqueeze_update_causal_mask
rotary_embtensorr1   r   r'   r3   r(   r   )rA   r"   r%   r   r   r#   r;   r   r   r   rB   past_seen_tokenscausal_maskr$   r   
normalizerall_hidden_statesall_self_attnsdecoder_layerlayer_outputss                       rD   r   GemmaModel.forwardq  sI    2C1N-TXT_T_TqTq$8$D $++JjJj 	 "+!6IDKK<Q<Q	-t";<YZZ&&4==Yj I  --i8M0*nO!CRC^==?de"\\ ]5H5H5K"KTaThThN )33A6L..>L]

 & #oomJ
 \\$++"9"93">mFYFYZ
%2 #7BD0d![[)H4;;+H+HIM#!m%55!)*)."3#-$7	M *!,M  =#3"55% J( 		-0  -!11&+/8Od+%	
 	
rF   r.   )	NNNNNNNNN)rM   rN   rO   rP   r   r   
LongTensorTensorr   r   r   FloatTensorr   r   r   rV   r.   rF   rD   r   r   p  s     151537KO59$(,0/359^
E,,-^
 !.^
 u//0	^

 "%tE4E4E/F(F"GH^
   1 12^
 D>^
 $D>^
 'tn^
 !!1!12^
 
!^
 ^
rF   r   c                   (   ^  \ rS rSrU 4S jrSrU =r$ )GemmaForCausalLMi  c                  8   > [        5       R                  " S0 U D6$ )a  
Example:

```python
>>> from transformers import AutoTokenizer, GemmaForCausalLM

>>> model = GemmaForCausalLM.from_pretrained("google/gemma-7b")
>>> tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b")

>>> prompt = "What is your favorite condiment?"
>>> inputs = tokenizer(prompt, return_tensors="pt")

>>> # Generate
>>> generate_ids = model.generate(inputs.input_ids, max_length=30)
>>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
"What is your favorite condiment?"
```r.   )r?   r   )super_kwargsrC   s    rD   r   GemmaForCausalLM.forward  s    $ w...rF   r.   )rM   rN   rO   rP   r   rV   rW   rX   s   @rD   r   r     s    / /rF   r   c                       \ rS rSrSrg)GemmaForSequenceClassificationi  r.   NrM   rN   rO   rP   rV   r.   rF   rD   r   r         rF   r   c                       \ rS rSrSrg)GemmaForTokenClassificationi  r.   Nr   r.   rF   rD   r   r     r   rF   r   )r   rZ   r   r   r   r   GemmaPreTrainedModel)1typingr   r   r   r   r   r   sentencepiecerk   r   torch.utils.checkpointr	   cache_utilsr   r   configuration_utilsr   modeling_outputsr   tokenization_utilsr   r   utilsr   llama.modeling_llamar   r   r   r   r   llama.tokenization_llamar   tokenization_utils_baser   VOCAB_FILES_NAMESr   
get_loggerrM   r   r   rZ   Moduler   r   r   r   r   r   __all__r.   rF   rD   <module>r     s     C B     . 3 7 A   6 4!#45   
		H	%D
" D
NY^%8 Yx=299 =(Yx Y_
 _
D/' /,	%C 		"= 	rF   