
    fTh                     `    S r SSKJr  SSKJr  \R
                  " \5      r " S S\5      rS/r	g)zCANINE model configuration   )PretrainedConfig)loggingc                   Z   ^  \ rS rSrSrSr                   SU 4S jjrSrU =r$ )CanineConfig   a  
This is the configuration class to store the configuration of a [`CanineModel`]. It is used to instantiate an
CANINE model according to the specified arguments, defining the model architecture. Instantiating a configuration
with the defaults will yield a similar configuration to that of the CANINE
[google/canine-s](https://huggingface.co/google/canine-s) architecture.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.


Args:
    hidden_size (`int`, *optional*, defaults to 768):
        Dimension of the encoder layers and the pooler layer.
    num_hidden_layers (`int`, *optional*, defaults to 12):
        Number of hidden layers in the deep Transformer encoder.
    num_attention_heads (`int`, *optional*, defaults to 12):
        Number of attention heads for each attention layer in the Transformer encoders.
    intermediate_size (`int`, *optional*, defaults to 3072):
        Dimension of the "intermediate" (i.e., feed-forward) layer in the Transformer encoders.
    hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
        The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
        `"relu"`, `"selu"` and `"gelu_new"` are supported.
    hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
        The dropout probability for all fully connected layers in the embeddings, encoders, and pooler.
    attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
        The dropout ratio for the attention probabilities.
    max_position_embeddings (`int`, *optional*, defaults to 16384):
        The maximum sequence length that this model might ever be used with.
    type_vocab_size (`int`, *optional*, defaults to 16):
        The vocabulary size of the `token_type_ids` passed when calling [`CanineModel`].
    initializer_range (`float`, *optional*, defaults to 0.02):
        The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
    layer_norm_eps (`float`, *optional*, defaults to 1e-12):
        The epsilon used by the layer normalization layers.
    pad_token_id (`int`, *optional*, defaults to 0):
        Padding token id.
    bos_token_id (`int`, *optional*, defaults to 57344):
        Beginning of stream token id.
    eos_token_id (`int`, *optional*, defaults to 57345):
        End of stream token id.
    downsampling_rate (`int`, *optional*, defaults to 4):
        The rate at which to downsample the original character sequence length before applying the deep Transformer
        encoder.
    upsampling_kernel_size (`int`, *optional*, defaults to 4):
        The kernel size (i.e. the number of characters in each window) of the convolutional projection layer when
        projecting back from `hidden_size`*2 to `hidden_size`.
    num_hash_functions (`int`, *optional*, defaults to 8):
        The number of hash functions to use. Each hash function has its own embedding matrix.
    num_hash_buckets (`int`, *optional*, defaults to 16384):
        The number of hash buckets to use.
    local_transformer_stride (`int`, *optional*, defaults to 128):
        The stride of the local attention of the first shallow Transformer encoder. Defaults to 128 for good
        TPU/XLA memory alignment.

Example:

```python
>>> from transformers import CanineConfig, CanineModel

>>> # Initializing a CANINE google/canine-s style configuration
>>> configuration = CanineConfig()

>>> # Initializing a model (with random weights) from the google/canine-s style configuration
>>> model = CanineModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```caninec                    > [         TU ]  " SXUS.UD6  Xl        Xl        X l        X0l        X@l        XPl        X`l        Xpl	        Xl
        Xl        Xl        Xl        UU l        UU l        UU l        UU l        g )N)pad_token_idbos_token_ideos_token_id )super__init__max_position_embeddingshidden_sizenum_hidden_layersnum_attention_headsintermediate_size
hidden_acthidden_dropout_probattention_probs_dropout_probinitializer_rangetype_vocab_sizelayer_norm_epsdownsampling_rateupsampling_kernel_sizenum_hash_functionsnum_hash_bucketslocal_transformer_stride)selfr   r   r   r   r   r   r   r   r   r   r   r
   r   r   r   r   r   r   r   kwargs	__class__s                        g/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/canine/configuration_canine.pyr   CanineConfig.__init__`   s    . 	sl\hslrs'>$&!2#6 !2$#6 ,H)!2., "3&<#"4 0(@%    )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )i      r&   i   gelu皙?r(    @     g{Gz?g-q=    i   i     r,      r)      )	__name__
__module____qualname____firstlineno____doc__
model_typer   __static_attributes____classcell__)r"   s   @r#   r   r      sX    CJ J %( % !$)*A *Ar%   r   N)
r3   configuration_utilsr   utilsr   
get_loggerr/   loggerr   __all__r   r%   r#   <module>r<      s>    ! 3  
		H	%rA# rAj 
r%   