o
    ZhS                     @   sf  d dl mZmZmZmZmZmZ d dlZd dl	Z	d dl
Z	d dl	mZ ddlmZmZ ddlmZ ddlmZ ddlmZmZ dd	lmZ d
dlmZmZmZmZmZ d
dlmZ er`ddl m!Z! ddiZ"dZ#e$e%Z&G dd deZ'G dd deeZ(G dd dej)Z*G dd deZ+G dd deZ,G dd deZ-G dd deZ.G dd  d eZ/g d!Z0dS )"    )TYPE_CHECKINGAnyDictListOptionalUnionN)nn   )CacheDynamicCache)PretrainedConfig)BaseModelOutputWithPast)
AddedTokenPreTrainedTokenizer)logging   )LlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLP
LlamaModel)LlamaTokenizer)	TextInput
vocab_fileztokenizer.modelu   ▁c                       s   e Zd ZdZdZdgZddddddddZdgdgfd	d
gd	gfd	gd	gfdZ																				d  fdd	Z  Z	S )!GemmaConfiga  
    This is the configuration class to store the configuration of a [`GemmaModel`]. It is used to instantiate an Gemma
    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
    defaults will yield a similar configuration to that of the Gemma-7B.
    e.g. [google/gemma-7b](https://huggingface.co/google/gemma-7b)
    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.
    Args:
        vocab_size (`int`, *optional*, defaults to 256000):
            Vocabulary size of the Gemma model. Defines the number of different tokens that can be represented by the
            `inputs_ids` passed when calling [`GemmaModel`]
        hidden_size (`int`, *optional*, defaults to 3072):
            Dimension of the hidden representations.
        intermediate_size (`int`, *optional*, defaults to 24576):
            Dimension of the MLP representations.
        num_hidden_layers (`int`, *optional*, defaults to 28):
            Number of hidden layers in the Transformer decoder.
        num_attention_heads (`int`, *optional*, defaults to 16):
            Number of attention heads for each attention layer in the Transformer decoder.
        num_key_value_heads (`int`, *optional*, defaults to 16):
            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
            `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
            by meanpooling all the original heads within that group. For more details checkout [this
            paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
            `num_attention_heads`.
        head_dim (`int`, *optional*, defaults to 256):
            The attention head dimension.
        hidden_act (`str` or `function`, *optional*, defaults to `"gelu_pytorch_tanh"`):
            The legacy activation function. It is overwritten by the `hidden_activation`.
        hidden_activation (`str` or `function`, *optional*):
            The non-linear activation function (function or string) in the decoder. Will default to `"gelu_pytorch_tanh"`
            if not specified. `"gelu_pytorch_tanh"` uses an approximation of the `"gelu"` activation function.
        max_position_embeddings (`int`, *optional*, defaults to 8192):
            The maximum sequence length that this model might ever be used with.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
        rms_norm_eps (`float`, *optional*, defaults to 1e-06):
            The epsilon used by the rms normalization layers.
        use_cache (`bool`, *optional*, defaults to `True`):
            Whether or not the model should return the last key/values attentions (not used by all models). Only
            relevant if `config.is_decoder=True`.
        pad_token_id (`int`, *optional*, defaults to 0):
            Padding token id.
        eos_token_id (`int`, *optional*, defaults to 1):
            End of stream token id.
        bos_token_id (`int`, *optional*, defaults to 2):
            Beginning of stream token id.
        tie_word_embeddings (`bool`, *optional*, defaults to `True`):
            Whether to tie weight embeddings
        rope_theta (`float`, *optional*, defaults to 10000.0):
            The base period of the RoPE embeddings.
        attention_bias (`bool`, defaults to `False`, *optional*, defaults to `False`):
            Whether to use a bias in the query, key, value and output projection layers during self-attention.
        attention_dropout (`float`, *optional*, defaults to 0.0):
            The dropout ratio for the attention probabilities.
    ```python
    >>> from transformers import GemmaModel, GemmaConfig
    >>> # Initializing a Gemma gemma-7b style configuration
    >>> configuration = GemmaConfig()
    >>> # Initializing a model from the gemma-7b style configuration
    >>> model = GemmaModel(configuration)
    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```Zgemmapast_key_valuesZcolwiseZrowwise)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnorm      `           gelu_pytorch_tanhN    {Gz?ư>Tr      r        @F        c                    s   || _ |
| _|| _|| _|| _|| _|| _|| _|| _|	| _	|| _
|| _|| _|| _|| _|| _t jd||||d| d S )N)pad_token_idbos_token_ideos_token_idtie_word_embeddings )
vocab_sizemax_position_embeddingshidden_sizeintermediate_sizenum_hidden_layersnum_attention_headshead_dimnum_key_value_heads
hidden_acthidden_activationinitializer_rangerms_norm_eps	use_cache
rope_thetaattention_biasattention_dropoutsuper__init__)selfr5   r7   r8   r9   r:   r<   r;   r=   r>   r6   r?   r@   rA   r0   r2   r1   r3   rB   rC   rD   kwargs	__class__r4   V/var/www/auris/lib/python3.10/site-packages/transformers/models/gemma/modular_gemma.pyrF      s0   
zGemmaConfig.__init__)r#   r$   r%   r&   r'   r'   r(   r)   Nr*   r+   r,   Tr   r-   r   Tr.   Fr/   )
__name__
__module____qualname____doc__Z
model_typeZkeys_to_ignore_at_inferenceZbase_model_tp_planZbase_model_pp_planrF   __classcell__r4   r4   rI   rK   r   1   sJ    C


r   c                	   @   s   e Zd ZdZ										dd	eeeef  fd
dZdd Z	dd Z
dddee fddZdd Z		ddee dededefddZdd ZdS ) GemmaTokenizera
  
    Construct a Gemma tokenizer. Based on byte-level Byte-Pair-Encoding. The default padding token is unset as there is
    no padding token in the original model.

    Args:
        vocab_file (`str`):
            Path to the vocabulary file.
        unk_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<unk>"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.
        bos_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<bos>"`):
            The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
        eos_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<eos>"`):
            The end of sequence token.
        pad_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<pad>"`):
            A special token used to make arrays of tokens the same size for batching purpose. Will then be ignored by
            attention mechanisms or loss computation.
        sp_model_kwargs (`Dict[str, Any]`, `Optional`, *optional*):
            Will be passed to the `SentencePieceProcessor.__init__()` method. The [Python wrapper for
            SentencePiece](https://github.com/google/sentencepiece/tree/master/python) can be used, among other things,
            to set:

            - `enable_sampling`: Enable subword regularization.
            - `nbest_size`: Sampling parameters for unigram. Invalid for BPE-Dropout.

              - `nbest_size = {0,1}`: No sampling is performed.
              - `nbest_size > 1`: samples from the nbest_size results.
              - `nbest_size < 0`: assuming that nbest_size is infinite and samples from the all hypothesis (lattice)
                using forward-filtering-and-backward-sampling algorithm.

            - `alpha`: Smoothing parameter for unigram sampling, and dropout probability of merge operations for
              BPE-dropout.

        add_bos_token (`bool`, *optional*, defaults to `True`):
            Whether or not to add an `bos_token` at the start of sequences.
        add_eos_token (`bool`, *optional*, defaults to `False`):
            Whether or not to add an `eos_token` at the end of sequences.
        clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
            Whether or not to cleanup spaces after decoding, cleanup consists in removing potential artifacts like
            extra spaces.
        use_default_system_prompt (`bool`, *optional*, defaults to `False`):
            Whether or not the default system prompt for Gemma should be used.
        spaces_between_special_tokens (`bool`, *optional*, defaults to `False`):
            Whether or not to add spaces between special tokens.
    <unk><bos><eos><pad>NTFsp_model_kwargsc                 K   s   |d u ri n|| _ t|trt|dddn|}t|tr#t|dddn|}t|tr1t|dddn|}t|tr?t|dddn|}|| _|| _|| _|
| _tj	di | j | _
| j
| tj| f||||||||	|
|d
| d S )NFT)
normalizedZspecial)
	bos_token	eos_token	unk_token	pad_tokenadd_bos_tokenadd_eos_tokenrV   clean_up_tokenization_spacesuse_default_system_promptspaces_between_special_tokensr4   )rV   
isinstancestrr   r   r\   r]   r_   spmZSentencePieceProcessorsp_modelLoadr   rF   )rG   r   rZ   rX   rY   r[   rV   r\   r]   r^   r_   r`   rH   r4   r4   rK   rF      s6   
zGemmaTokenizer.__init__c                 C      t dNzNot needed for GemmaAttributeErrorrG   r4   r4   rK   get_spm_processor     z GemmaTokenizer.get_spm_processorc                 C   rf   rg   rh   rj   r4   r4   rK   unk_token_length  rl   zGemmaTokenizer.unk_token_lengthtextr   returnc                 K   s   t j| |fi |S )ze
        Args:
            text: TextInput
        Simply calls PreTrainedTokenizer's method
        )r   tokenizerG   rn   rH   r4   r4   rK   rp     s   zGemmaTokenizer.tokenizec                 K   s   | j j|tdS )z
        Args:
            text: TextInput
        Returns a tokenized string. The Gemma tokenizer never adds a prefix space.
        )Zout_type)rd   encoderb   rq   r4   r4   rK   	_tokenize   s   zGemmaTokenizer._tokenize	token_idsskip_special_tokensr`   c                 K   s   g }g }|D ]+}|r|| j v rq|| jv r,|r || j| || j| j g }q|| q|r=|| j| |rEd|}nd|}|tdS )N  )	Zall_special_idsZ_added_tokens_decoderappendrd   decodecontentjoinreplaceSPIECE_UNDERLINE)rG   rt   ru   r`   rH   Z	sub_textsZcurrent_sub_textZidsr4   r4   rK   _decode(  s"   

zGemmaTokenizer._decodec                 C   sT   g }d}|D ]}|| j v r|| j|| 7 }g }q|| q|| j|7 }|S )z:Converts a sequence of tokens (string) in a single string.rw   )Z_added_tokens_encoderrd   ry   rx   )rG   tokensZcurrent_sub_tokensZ
out_stringtokenr4   r4   rK   convert_tokens_to_stringE  s   
z'GemmaTokenizer.convert_tokens_to_string)
rR   rS   rT   rU   NTFFFF)FF)rL   rM   rN   rO   r   r   rb   r   rF   rk   rm   r   rp   rs   intboolr~   r   r4   r4   r4   rK   rQ      s>    1
+
rQ   c                       s@   e Zd Zddedef fddZdd Zdd	 Zd
d Z  Z	S )GemmaRMSNormr,   dimepsc                    s&   t    || _tt|| _d S )N)rE   rF   r   r   	ParametertorchZzerosweight)rG   r   r   rI   r4   rK   rF   U  s   
zGemmaRMSNorm.__init__c                 C   s$   |t |djddd| j  S )Nr   T)Zkeepdim)r   Zrsqrtpowmeanr   )rG   xr4   r4   rK   _normZ  s   $zGemmaRMSNorm._normc                 C   s*   |  | }|d| j   }||S )Ng      ?)r   floatr   Ztype_as)rG   r   outputr4   r4   rK   forward]  s   
zGemmaRMSNorm.forwardc                 C   s   t | jj d| j S )Nz, eps=)tupler   shaper   rj   r4   r4   rK   
extra_reprd  s   zGemmaRMSNorm.extra_repr)r,   )
rL   rM   rN   r   r   rF   r   r   r   rP   r4   r4   rI   rK   r   T  s
    r   c                          e Zd Z fddZ  ZS )GemmaMLPc                    sP   t    tj| j| jdd| _tj| j| jdd| _tj| j| jdd| _d S )NF)Zbias)	rE   rF   r   ZLinearr7   r8   Z	gate_projZup_projZ	down_proj)rG   configrI   r4   rK   rF   i  s   
zGemmaMLP.__init__)rL   rM   rN   rF   rP   r4   r4   rI   rK   r   h      r   c                   @   s   e Zd Z									ddeej deej deej deeee	ej
 f  deej
 dee dee d	ee d
eej defddZdS )
GemmaModelNr   r   position_idsr   r   rA   output_attentionsoutput_hidden_statescache_positionro   c
                 K   s  |d ur|n| j j}|d ur|n| j j}|d ur|n| j j}|d u |d uA r*td| jr9| jr9|r9td d}|d u rB| 	|}|rK|d u rKt
 }|	d u rg|d urW| nd}tj|||jd  |jd}	|d u rp|	d}| |||	||}|}| ||}tj| j jd |jd}|| }|rd	nd }|rd	nd }| jd | j j D ]"}|r||f7 }||||||||	|d
}|d }|r||d f7 }q| |}|r||f7 }t||r|nd ||dS )Nz:You must specify exactly one of input_ids or inputs_embedszX`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.Fr   r-   )deviceg      ?)dtyper4   )r   r   Zpast_key_valuer   rA   r   position_embeddings)Zlast_hidden_stater   r   Z
attentions)r   r   r   rA   
ValueErrorZgradient_checkpointingZtrainingloggerZwarning_oncer    r   Zget_seq_lengthr   Zaranger   r   Z	unsqueezeZ_update_causal_maskZ
rotary_embZtensorr7   r   r!   r9   r"   r   )rG   r   r   r   r   r   rA   r   r   r   rH   Zpast_seen_tokensZcausal_maskr   r   Z
normalizerZall_hidden_statesZall_self_attnsZdecoder_layerZlayer_outputsr4   r4   rK   r   q  sr   






zGemmaModel.forward)	NNNNNNNNN)rL   rM   rN   r   r   Z
LongTensorZTensorr   r
   r   ZFloatTensorr   r   r   r4   r4   r4   rK   r   p  s>    	
r   c                       r   )GemmaForCausalLMc                     s   t  jdi | S )a|  
        Example:

        ```python
        >>> from transformers import AutoTokenizer, GemmaForCausalLM

        >>> model = GemmaForCausalLM.from_pretrained("google/gemma-7b")
        >>> tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b")

        >>> prompt = "What is your favorite condiment?"
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "What is your favorite condiment?"
        ```Nr4   )rE   r   )Zsuper_kwargsrI   r4   rK   r     s   zGemmaForCausalLM.forward)rL   rM   rN   r   rP   r4   r4   rI   rK   r     r   r   c                   @      e Zd ZdS )GemmaForSequenceClassificationNrL   rM   rN   r4   r4   r4   rK   r         r   c                   @   r   )GemmaForTokenClassificationNr   r4   r4   r4   rK   r     r   r   )r   rQ   r   r   r   r   ZGemmaPreTrainedModel)1typingr   r   r   r   r   r   Zsentencepiecerc   r   Ztorch.utils.checkpointr   Zcache_utilsr
   r   Zconfiguration_utilsr   Zmodeling_outputsr   Ztokenization_utilsr   r   utilsr   Zllama.modeling_llamar   r   r   r   r   Zllama.tokenization_llamar   Ztokenization_utils_baser   ZVOCAB_FILES_NAMESr}   Z
get_loggerrL   r   r   rQ   Moduler   r   r   r   r   r   __all__r4   r4   r4   rK   <module>   s8    
  b