
    fTh	                     h    S r SSKrSSKJr  SSKJr  \R                  " \5      r " S S\5      r	S/r
g)zMAMBA configuration    N   )PretrainedConfig)loggingc                   d   ^  \ rS rSrSrSr                        SU 4S jjrSrU =r$ )MambaConfig   a  
This is the configuration class to store the configuration of a [`MambaModel`]. It is used to instantiate a MAMBA
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
defaults will yield a similar configuration to that of the MAMBA
[state-spaces/mamba-2.8b](https://huggingface.co/state-spaces/mamba-2.8b) architecture.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.


Args:
    vocab_size (`int`, *optional*, defaults to 50280):
        Vocabulary size of the MAMBA model. Defines the number of different tokens that can be represented by the
        `inputs_ids` passed when calling [`MambaModel`].
    hidden_size (`int`, *optional*, defaults to 768):
        Dimensionality of the embeddings and hidden states.
    state_size (`int`, *optional*, defaults to 16): shape of the state space latents.
    num_hidden_layers (`int`, *optional*, defaults to 32):
        Number of hidden layers in the model.
    layer_norm_epsilon (`float`, *optional*, defaults to 1e-05):
        The epsilon to use in the layer normalization layers.
    pad_token_id (`int`, *optional*, defaults to 0):
        Padding token id.
    bos_token_id (`int`, *optional*, defaults to 0):
        The id of the beginning of sentence token in the vocabulary.
    eos_token_id (`int`, *optional*, defaults to 0):
        The id of the end of sentence token in the vocabulary.
    expand (`int`, *optional*, defaults to 2): Expanding factor used to determine the intermediate size.
    conv_kernel (`int`, *optional*, defaults to 4): Size of the convolution kernel.
    use_bias (`bool`, *optional*, defaults to `False`):
        Whether or not to use bias in ["in_proj", "out_proj"] of the mixer block
    use_conv_bias (`bool`, *optional*, defaults to `True`):
        Whether or not to use bias in the convolution layer of the mixer block.
    hidden_act (`str`, *optional*, defaults to `"silu"`):
        The non-linear activation function (function or string) in the decoder.
    initializer_range (`float`, *optional*, defaults to 0.1):
        The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
    residual_in_fp32 (`bool`, *optional*, defaults to `True`):
        Whether or not residuals should be in `float32`. If set to `False` residuals will keep the same `dtype` as the rest of the model
    time_step_rank (`Union[int,str]`, *optional*, defaults to `"auto"`):
        Rank of the discretization projection matrix. `"auto"` means that it will default to `math.ceil(self.hidden_size / 16)`
    time_step_scale (`float`, *optional*, defaults to 1.0):
        Scale used used to scale `dt_proj.bias`.
    time_step_min (`float`, *optional*, defaults to 0.001):
        Minimum `time_step` used to bound `dt_proj.bias`.
    time_step_max (`float`, *optional*, defaults to 0.1):
        Maximum `time_step` used to bound `dt_proj.bias`.
    time_step_init_scheme (`float`, *optional*, defaults to `"random"`):
        Init scheme used for `dt_proj.weight`. Should be one of `["random","uniform"]`
    time_step_floor (`float`, *optional*, defaults to 0.0001):
        Minimum clamping value of the `dt_proj.bias` layer initialization.
    rescale_prenorm_residual (`bool`, *optional*, defaults to `False`):
        Whether or not to rescale `out_proj` weights when initializing.
    use_cache (`bool`, *optional*, defaults to `True`):
        Whether or not the cache should be used.
    use_mambapy (`bool`, *optional*, defaults to `False`):
        Determines the fallback strategy during training if the CUDA-based official implementation of Mamba is not available. If `True`, the mamba.py implementation is used. If `False`, the naive and slower implementation is used. Consider switching to the naive version if memory is limited.


Example:

```python
>>> from transformers import MambaConfig, MambaModel

>>> # Initializing a Mamba configuration
>>> configuration = MambaConfig()

>>> # Initializing a model (with random weights) from the configuration
>>> model = MambaModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```mambac                   > Xl         X l        X0l        X@l        XPl        Xl        Xl        [        XR                  -  5      U l        Xpl	        Xl
        X`l        Xl        Xl        Xl        Xl        US:X  a#  [         R"                  " U R                  S-  5      OUU l        UU l        UU l        UU l        UU l        UU l        UU l        Xl        UU l        UU l        [8        TU ]t  " SXxUS.UD6  g )Nauto   )bos_token_ideos_token_idpad_token_id )
vocab_sizehidden_size
state_sizenum_hidden_layerslayer_norm_epsilonconv_kernelexpandintintermediate_sizer   r   r   use_biasuse_conv_bias
hidden_actinitializer_rangemathceiltime_step_ranktime_step_scaletime_step_mintime_step_maxtime_step_init_schemetime_step_floorrescale_prenorm_residualresidual_in_fp32	use_cacheuse_mambapysuper__init__)selfr   r   r   r   r   r   r   r   r   r   r   r   r   r   r'   r    r!   r"   r#   r$   r%   r&   r(   r)   kwargs	__class__s                             e/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/mamba/configuration_mamba.pyr+   MambaConfig.__init__g   s    8 %&$!2"4&!$V.>.>%>!?((( *$!2BPTZBZdii(8(82(=>`n.**%:".(@% 0"&sl\hslrs    )r   r   r   r   r   r   r   r   r   r   r   r&   r'   r   r%   r$   r#   r"   r    r!   r   r(   r   r)   r   )ih  i   r       gh㈵>r   r   r         FTsilu皙?Tr   g      ?gMbP?r6   randomg-C6?FTF)	__name__
__module____qualname____firstlineno____doc__
model_typer+   __static_attributes____classcell__)r.   s   @r/   r   r      sg    HT J &!&36t 6tr1   r   )r<   r   configuration_utilsr   utilsr   
get_loggerr8   loggerr   __all__r   r1   r/   <module>rE      s@      3  
		H	%Ct" CtL /r1   