
    fTh-+                         S r SSKJr  SSKJr  SSKJr  SSKJr  SSK	J
r
  \
R                  " \5      r " S S	\5      r " S
 S\5      rS	S/rg)zXLM configuration    )OrderedDict)Mapping   )PretrainedConfig)
OnnxConfig)loggingc                      ^  \ rS rSrSrSrSSSSS.r                                 SU 4S	 jjrS
rU =r	$ )	XLMConfig   a  
This is the configuration class to store the configuration of a [`XLMModel`] or a [`TFXLMModel`]. It is used to
instantiate a XLM model according to the specified arguments, defining the model architecture. Instantiating a
configuration with the defaults will yield a similar configuration to that of the
[FacebookAI/xlm-mlm-en-2048](https://huggingface.co/FacebookAI/xlm-mlm-en-2048) architecture.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Args:
    vocab_size (`int`, *optional*, defaults to 30145):
        Vocabulary size of the BERT model. Defines the number of different tokens that can be represented by the
        `inputs_ids` passed when calling [`XLMModel`] or [`TFXLMModel`].
    emb_dim (`int`, *optional*, defaults to 2048):
        Dimensionality of the encoder layers and the pooler layer.
    n_layer (`int`, *optional*, defaults to 12):
        Number of hidden layers in the Transformer encoder.
    n_head (`int`, *optional*, defaults to 16):
        Number of attention heads for each attention layer in the Transformer encoder.
    dropout (`float`, *optional*, defaults to 0.1):
        The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
    attention_dropout (`float`, *optional*, defaults to 0.1):
        The dropout probability for the attention mechanism
    gelu_activation (`bool`, *optional*, defaults to `True`):
        Whether or not to use *gelu* for the activations instead of *relu*.
    sinusoidal_embeddings (`bool`, *optional*, defaults to `False`):
        Whether or not to use sinusoidal positional embeddings instead of absolute positional embeddings.
    causal (`bool`, *optional*, defaults to `False`):
        Whether or not the model should behave in a causal manner. Causal models use a triangular attention mask in
        order to only attend to the left-side context instead if a bidirectional context.
    asm (`bool`, *optional*, defaults to `False`):
        Whether or not to use an adaptive log softmax projection layer instead of a linear layer for the prediction
        layer.
    n_langs (`int`, *optional*, defaults to 1):
        The number of languages the model handles. Set to 1 for monolingual models.
    use_lang_emb (`bool`, *optional*, defaults to `True`)
        Whether to use language embeddings. Some models use additional language embeddings, see [the multilingual
        models page](http://huggingface.co/transformers/multilingual.html#xlm-language-embeddings) for information
        on how to use them.
    max_position_embeddings (`int`, *optional*, defaults to 512):
        The maximum sequence length that this model might ever be used with. Typically set this to something large
        just in case (e.g., 512 or 1024 or 2048).
    embed_init_std (`float`, *optional*, defaults to 2048^-0.5):
        The standard deviation of the truncated_normal_initializer for initializing the embedding matrices.
    init_std (`int`, *optional*, defaults to 50257):
        The standard deviation of the truncated_normal_initializer for initializing all weight matrices except the
        embedding matrices.
    layer_norm_eps (`float`, *optional*, defaults to 1e-12):
        The epsilon used by the layer normalization layers.
    bos_index (`int`, *optional*, defaults to 0):
        The index of the beginning of sentence token in the vocabulary.
    eos_index (`int`, *optional*, defaults to 1):
        The index of the end of sentence token in the vocabulary.
    pad_index (`int`, *optional*, defaults to 2):
        The index of the padding token in the vocabulary.
    unk_index (`int`, *optional*, defaults to 3):
        The index of the unknown token in the vocabulary.
    mask_index (`int`, *optional*, defaults to 5):
        The index of the masking token in the vocabulary.
    is_encoder(`bool`, *optional*, defaults to `True`):
        Whether or not the initialized model should be a transformer encoder or decoder as seen in Vaswani et al.
    summary_type (`string`, *optional*, defaults to "first"):
        Argument used when doing sequence summary. Used in the sequence classification and multiple choice models.

        Has to be one of the following options:

            - `"last"`: Take the last token hidden state (like XLNet).
            - `"first"`: Take the first token hidden state (like BERT).
            - `"mean"`: Take the mean of all tokens hidden states.
            - `"cls_index"`: Supply a Tensor of classification token position (like GPT/GPT-2).
            - `"attn"`: Not implemented now, use multi-head attention.
    summary_use_proj (`bool`, *optional*, defaults to `True`):
        Argument used when doing sequence summary. Used in the sequence classification and multiple choice models.

        Whether or not to add a projection after the vector extraction.
    summary_activation (`str`, *optional*):
        Argument used when doing sequence summary. Used in the sequence classification and multiple choice models.

        Pass `"tanh"` for a tanh activation to the output, any other value will result in no activation.
    summary_proj_to_labels (`bool`, *optional*, defaults to `True`):
        Used in the sequence classification and multiple choice models.

        Whether the projection outputs should have `config.num_labels` or `config.hidden_size` classes.
    summary_first_dropout (`float`, *optional*, defaults to 0.1):
        Used in the sequence classification and multiple choice models.

        The dropout ratio to be used after the projection and activation.
    start_n_top (`int`, *optional*, defaults to 5):
        Used in the SQuAD evaluation script.
    end_n_top (`int`, *optional*, defaults to 5):
        Used in the SQuAD evaluation script.
    mask_token_id (`int`, *optional*, defaults to 0):
        Model agnostic parameter to identify masked tokens when generating text in an MLM context.
    lang_id (`int`, *optional*, defaults to 1):
        The ID of the language used by the model. This parameter is used when generating text in a given language.

Examples:

```python
>>> from transformers import XLMConfig, XLMModel

>>> # Initializing a XLM configuration
>>> configuration = XLMConfig()

>>> # Initializing a model (with random weights) from the configuration
>>> model = XLMModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```xlmemb_dimn_headsn_layers
vocab_size)hidden_sizenum_attention_headsnum_hidden_layersn_wordsc"                   > Xl         X l        X0l        X@l        XPl        X`l        Xpl        Xl        Xl        Xl	        Xl
        Xl        Xl        UU l        UU l        UU l        UU l        UU l        UU l        Xl        Xl        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        SU";   a
  U"S   U l        [@        T#U ]  " SU U!S.U"D6  g)zConstructs XLMConfig.r   )pad_token_idbos_token_idN )"r   r   r   r   dropoutattention_dropoutgelu_activationsinusoidal_embeddingscausalasmn_langsuse_lang_emblayer_norm_eps	bos_index	eos_index	pad_index	unk_index
mask_index
is_encodermax_position_embeddingsembed_init_stdinit_stdsummary_typesummary_use_projsummary_activationsummary_proj_to_labelssummary_first_dropoutstart_n_top	end_n_topmask_token_idlang_idr   super__init__)$selfr   r   r   r   r   r   r   r   r   r   r   r    r(   r)   r!   r*   r"   r#   r$   r%   r&   r'   r+   r,   r-   r.   r/   r0   r1   r2   r3   r   r   kwargs	__class__s$                                      a/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/xlm/configuration_xlm.pyr5   XLMConfig.__init__   s    L % !2.%:"(,""""$$'>$, ( 0"4&<#%:"&"*!),DLXlXQWX    ) r   r   r"   r   r   r   r)   r1   r#   r   r*   r'   r3   r!   r&   r2   r(   r   r   r   r   r$   r   r0   r-   r/   r.   r+   r,   r%   r    r   )!iu  i         皙?r>   TFFF   Ti   g;f?g-q=g{Gz?r   r?      r      TfirstTNTr>   rA   rA   r   r   r@   r   )
__name__
__module____qualname____firstlineno____doc__
model_typeattribute_mapr5   __static_attributes____classcell__)r8   s   @r9   r
   r
      s    m^ J ('	M # #!#!EIY IYr;   r
   c                   @    \ rS rSr\S\\\\\4   4   4S j5       rSr	g)XLMOnnxConfig   returnc                 b    U R                   S:X  a  SSSS.nOSSS.n[        SU4SU4S	U4/5      $ )
Nzmultiple-choicebatchchoicesequence)r   r?   r@   )r   r?   	input_idsattention_masktoken_type_ids)taskr   )r6   dynamic_axiss     r9   inputsXLMOnnxConfig.inputs   sO    99))&8
CL&:6Ll+!<0!<0
 	
r;   r   N)
rC   rD   rE   rF   propertyr   strintrY   rJ   r   r;   r9   rM   rM      s.    
WS#X%6 67 
 
r;   rM   N)rG   collectionsr   typingr   configuration_utilsr   onnxr   utilsr   
get_loggerrC   loggerr
   rM   __all__r   r;   r9   <module>rf      sU     #  3   
		H	%AY  AYJ
J 
  
(r;   