
    fTh                      h    S r SSKrSSKJr  SSKJr  \R                  " \5      r " S S\5      r	S/r
g)zMAMBA2 configuration    N   )PretrainedConfig)loggingc                   |   ^  \ rS rSrSrSrSSSSSSSS	S
SSSSSSSSSSSSSS\" S5      4SSSSS4U 4S jjrSrU =r	$ )Mamba2Config   aP  
This is the configuration class to store the configuration of a [`Mamba2Model`]. It is used to instantiate a MAMBA2
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
defaults will yield a similar configuration to that of the MAMBA2
[state-spaces/mamba2-2.8b](https://huggingface.co/state-spaces/mamba2-2.8b) architecture.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.


Args:
    num_heads (`int`, *optional*, defaults to 128):
        Number of heads for the evolution matrices of mamba 2.
    head_dim (`int`, *optional*, defaults to 64):
        Dimension of each head.
    vocab_size (`int`, *optional*, defaults to 32768):
        Vocabulary size of the MAMBA2 model. Defines the number of different tokens that can be represented by the
        `inputs_ids` passed when calling [`Mamba2Model`].
    hidden_size (`int`, *optional*, defaults to 4096):
        Dimensionality of the embeddings and hidden states.
    state_size (`int`, *optional*, defaults to 128): shape of the state space latents.
    num_hidden_layers (`int`, *optional*, defaults to 64):
        Number of hidden layers in the model.
    layer_norm_epsilon (`float`, *optional*, defaults to 1e-05):
        The epsilon to use in the layer normalization layers.
    pad_token_id (`int`, *optional*, defaults to 1):
        Padding token id.
    bos_token_id (`int`, *optional*, defaults to 0):
        The id of the beginning of sentence token in the vocabulary.
    eos_token_id (`int`, *optional*, defaults to 2):
        The id of the end of sentence token in the vocabulary.
    expand (`int`, *optional*, defaults to 2): Expanding factor used to determine the intermediate size.
    conv_kernel (`int`, *optional*, defaults to 4): Size of the convolution kernel.
    n_groups (`int`, *optional*, defaults to 8):
        Number of groups for the evolution matrices of mamba 2.
    use_bias (`bool`, *optional*, defaults to `False`):
        Whether or not to use bias in ["in_proj", "out_proj"] of the mixer block
    use_conv_bias (`bool`, *optional*, defaults to `True`):
        Whether or not to use bias in the convolution layer of the mixer block.
    hidden_act (`str`, *optional*, defaults to `"silu"`):
        The non-linear activation function (function or string) in the decoder.
    initializer_range (`float`, *optional*, defaults to 0.1):
        The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
    residual_in_fp32 (`bool`, *optional*, defaults to `True`):
        Whether or not residuals should be in `float32`. If set to `False` residuals will keep the same `dtype` as the rest of the model
    time_step_rank (`Union[int,str]`, *optional*, defaults to `"auto"`):
        Rank of the discretization projection matrix. `"auto"` means that it will default to `math.ceil(self.hidden_size / 16)`
    time_step_min (`float`, *optional*, defaults to 0.001):
        Minimum `time_step` used to bound `dt_proj.bias`.
    time_step_max (`float`, *optional*, defaults to 0.1):
        Maximum `time_step` used to bound `dt_proj.bias`.
    time_step_floor (`float`, *optional*, defaults to 0.0001):
        Minimum clamping value of the `dt_proj.bias` layer initialization.
    time_step_limit (`tuple`, *optional*, defaults to `(0.0, inf)`):
        Accepted range of time step values.
    rescale_prenorm_residual (`bool`, *optional*, defaults to `False`):
        Whether or not to rescale `out_proj` weights when initializing.
    use_cache (`bool`, *optional*, defaults to `True`):
        Whether or not the cache should be used.
    rms_norm (`bool`, *optional*, defaults to `True`):
        Whether to use RMS norm or not.
    chunk_size (`int`, *optional*, defaults to 256):
        Size of the chunks that will comprise the sequence.
    tie_word_embeddings (`bool`, *optional*, defaults to `False`):
        Whether to tie word embeddings or not.


Example:

```python
>>> from transformers import Mamba2Config, Mamba2Model

>>> # Initializing a Mamba2 configuration
>>> configuration = Mamba2Config()

>>> # Initializing a model (with random weights) from the configuration
>>> model = Mamba2Model(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```mamba2   @   i   i   gh㈵>   r            FTsilug?autogMbP?g-C6?g        inf   c                 8  > XK-  X-  :w  a  [        SXK-   SX-   S35      eX0l        X@l        XPl        X`l        Xpl        Xl        Xl        Xl        Xl	        Xl
        Xl        Xl        UU l        UU l        US:X  a#  [        R                   " U R                  S-  5      OUU l        UU l        UU l        UU l        UU l        UU l        UU l        Xl        Xl        X l        UU l        XPl        UU l        UU l        UU l        [>        TU ]  " SU	U
UUS.UD6  g )Nz2Inconsistent configuration: hidden_size * expand (z#) must equal num_heads * head_dim (z).r      )bos_token_ideos_token_idpad_token_idtie_word_embeddings )!
ValueError
vocab_sizehidden_size
state_sizenum_hidden_layerslayer_norm_epsilonconv_kernelexpandr   r   r   use_biasuse_conv_bias
hidden_actinitializer_rangemathceiltime_step_ranktime_step_mintime_step_maxtime_step_floorrescale_prenorm_residualresidual_in_fp32	use_cachen_groups	num_headshead_dimrms_norm
chunk_sizetime_step_limitr   super__init__)selfr1   r2   r   r   r   r   r    r   r   r   r"   r!   r0   r#   r$   r%   r&   r.   r)   r*   r+   r,   r5   r-   r/   r3   r4   r   kwargs	__class__s                                 g/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/mamba2/configuration_mamba2.pyr7   Mamba2Config.__init__o   sI   @  i&:;() *()-  %&$!2"4&((( *$!2BPTZBZdii(8(82(=>`n**.(@% 0" "  $$.#6  	
%%% 3		

 	
    )r   r4   r!   r   r"   r2   r%   r   r&   r    r0   r1   r   r   r-   r.   r3   r   r   r,   r5   r+   r*   r)   r#   r/   r$   r   )
__name__
__module____qualname____firstlineno____doc__
model_typefloatr7   __static_attributes____classcell__)r:   s   @r;   r   r      s|    Pd J eEl+!&!;L
 L
r=   r   )rB   r'   configuration_utilsr   utilsr   
get_loggerr>   loggerr   __all__r   r=   r;   <module>rL      s?      3  
		H	%a
# a
H 
r=   