
    fTh;                         S r SSKJr  SSKJr  \R
                  " \5      r " S S\5      r " S S\5      r	 " S S	\5      r
S	/rg
)zIdefics model configuration   )PretrainedConfig)loggingc                   T   ^  \ rS rSrSrSrSS0r            SU 4S jjrSrU =r	$ )	IdeficsVisionConfig   a  
This is the configuration class to store the configuration of a [`IdeficsModel`]. It is used to instantiate an
Idefics model according to the specified arguments, defining the model architecture. Instantiating a configuration
with the defaults will yield a similar configuration to that of the Idefics-9B.

e.g. [HuggingFaceM4/idefics-9b](https://huggingface.co/HuggingFaceM4/idefics-9b)

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Args:
    embed_dim (`int`, *optional*, defaults to 768):
        Dimensionality of the encoder layers and the pooler layer. (elsewhere referred to as `hidden_size`)
    image_size (`int`, *optional*, defaults to 224):
        The size (resolution) of each image.
    intermediate_size (`int`, *optional*, defaults to 5120):
        Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
    patch_size (`int`, *optional*, defaults to 14):
        The size (resolution) of each patch.
    num_hidden_layers (`int`, *optional*, defaults to 32):
        Number of hidden layers in the Transformer encoder.
    num_attention_heads (`int`, *optional*, defaults to 16):
        Number of attention heads for each attention layer in the Transformer encoder.
    num_channels (`int`, *optional*, defaults to 3):
        Number of image channels.
    hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
        The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
        `"relu"`, `"selu"` and `"gelu_new"` `"quick_gelu"` are supported.
    layer_norm_eps (`float`, *optional*, defaults to 1e-05):
        The epsilon used by the layer normalization layers.
    attention_dropout (`float`, *optional*, defaults to 0.0):
        The dropout ratio for the attention probabilities.
    initializer_range (`float`, *optional*, defaults to 0.02):
        The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
    initializer_factor (`float`, *optional*, defaults to 1.0):
        A factor for initializing all weight matrices (should be kept to 1.0, used internally for initialization
        testing).
idefics_visionhidden_size	embed_dimc                    > Xl         X l        X0l        X@l        XPl        X`l        Xpl        Xl        Xl        Xl	        Xl
        Xl        [        TU ]4  " S0 UD6  g N )r
   
image_sizeintermediate_size
patch_sizenum_hidden_layersnum_attention_headsnum_channelslayer_norm_epsattention_dropoutinitializer_rangeinitializer_factor
hidden_actsuper__init__)selfr
   r   r   r   r   r   r   r   r   r   r   r   kwargs	__class__s                 i/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/idefics/configuration_idefics.pyr   IdeficsVisionConfig.__init__J   sY      #$!2$!2#6 (,!2!2"4$"6"    )r   r
   r   r   r   r   r   r   r   r   r   r   )i      i             r   gelugh㈵>        {Gz?g      ?)
__name__
__module____qualname____firstlineno____doc__
model_typeattribute_mapr   __static_attributes____classcell__r   s   @r   r   r      sL    %N "J{M # #r    r   c                   @   ^  \ rS rSrSrSr      SU 4S jjrSrU =r$ )IdeficsPerceiverConfigj   aw  
This is the configuration class to store the configuration of a [`IdeficsModel`]. It is used to instantiate an
Idefics model according to the specified arguments, defining the model architecture. Instantiating a configuration
with the defaults will yield a similar configuration to that of the Idefics-9B.

e.g. [HuggingFaceM4/idefics-9b](https://huggingface.co/HuggingFaceM4/idefics-9b)

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Args:
    use_resampler (`bool`, *optional*, defaults to `False`):
        Whether or not to use the resampler
    resampler_n_latents (`int`, *optional*, defaults to 64):
        Number of latent embeddings to resample ("compress") the input sequence to (usually < 128).
    resampler_depth (`int`, *optional*, defaults to 6):
        Depth of the Perceiver Resampler (Transformer w/ cross attention). Should be shallow (< 3).
    resampler_n_heads (`int`, *optional*, defaults to 16):
        Number of heads in each Transformer block (for multi-headed self-attention).
    resampler_head_dim (`int`, *optional*, defaults to 96):
        Dimensionality of each head projection in the Transformer block.
    qk_layer_norms_perceiver (`bool`, *optional*, defaults to `False`):
        Whether or not to use qk layer norms in perceiver
idefics_percieverc                 n   > Xl         X l        X0l        X@l        XPl        X`l        [        TU ]  " S0 UD6  g r   )use_resamplerresampler_n_latentsresampler_depthresampler_n_headsresampler_head_dimqk_layer_norms_perceiverr   r   )	r   r7   r8   r9   r:   r;   r<   r   r   s	           r   r   IdeficsPerceiverConfig.__init__   s9     +#6 .!2"4(@%"6"r    )r<   r9   r;   r:   r8   r7   )F@      r$   `   F)	r(   r)   r*   r+   r,   r-   r   r/   r0   r1   s   @r   r3   r3   j   s-    2 %J !&# #r    r3   c                   v   ^  \ rS rSrSrSr\\S.rSSSSS	S	S
SSSS
SSSSSSSSSS/ SS/ SSS4U 4S jjr	Sr
U =r$ )IdeficsConfig   a  
This is the configuration class to store the configuration of a [`IdeficsModel`]. It is used to instantiate an
Idefics model according to the specified arguments, defining the model architecture. Instantiating a configuration
with the defaults will yield a similar configuration to that of the Idefics-9B.

e.g. [HuggingFaceM4/idefics-9b](https://huggingface.co/HuggingFaceM4/idefics-9b)

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Args:
    additional_vocab_size (`int`, *optional*, defaults to 0):
        Additional vocabulary size of the model, typically for the special "<img>" token. Additional vocab tokens
        are always trainable whereas regular vocab tokens can be frozen or not.
    vocab_size (`int`, *optional*, defaults to 32000):
        Vocabulary size of the Idefics model. Defines the number of different tokens that can be represented by the
        `inputs_ids` passed when calling [`~IdeficsModel`]
    hidden_size (`int`, *optional*, defaults to 4096):
        Dimension of the hidden representations.
    intermediate_size (`int`, *optional*, defaults to 11008):
        Dimension of the MLP representations.
    num_hidden_layers (`int`, *optional*, defaults to 32):
        Number of hidden layers in the Transformer encoder.
    num_attention_heads (`int`, *optional*, defaults to 32):
        Number of attention heads for each attention layer in the Transformer encoder.
    dropout (`float`, *optional*, defaults to 0.0):
        The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
    hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
        The non-linear activation function (function or string) in the decoder.
    initializer_range (`float`, *optional*, defaults to 0.02):
        The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
    alpha_initializer (`str`, *optional*, defaults to `"zeros"`):
        Initialization type for the alphas.
    alphas_initializer_range (`float`, *optional*, defaults to 0.0):
        The standard deviation of the truncated_normal_initializer for initializing the alphas in the Gated Cross
        Attention.
    alpha_type (`str`, *optional*, defaults to `"float"`):
        Whether the gating alphas should be vectors or single floats.
    rms_norm_eps (`float`, *optional*, defaults to 1e-6):
        The epsilon used by the rms normalization layers.
    use_cache (`bool`, *optional*, defaults to `True`):
        Whether or not the model should return the last key/values attentions (not used by all models). Only
        relevant if `config.is_decoder=True`.
    pad_token_id (`int`, *optional*, defaults to 0)
        Padding token id.
    bos_token_id (`int`, *optional*, defaults to 1)
        Beginning of stream token id.
    eos_token_id (`int`, *optional*, defaults to 2)
        End of stream token id.
    tie_word_embeddings(`bool`, *optional*, defaults to `False`):
        Whether to tie weight embeddings
    cross_layer_interval (`int`, *optional*, default to 1)
        Interval for cross attention (from text to image) layers.
    qk_layer_norms (`bool`, *optional*, defaults to `False`): Whether to add layer norm after q and k
    freeze_text_layers (`bool`, *optional*, defaults to `True`): Whether to freeze text layers
    freeze_text_module_exceptions (`bool`, *optional*, defaults to `[]`):
        Exceptions to freezing text layers when `freeze_text_layers` is `True`
    freeze_lm_head (`bool`, *optional*, defaults to `False`): Whether to freeze lm head
    freeze_vision_layers (`bool`, *optional*, defaults to `True`):  Whether to freeze vision layers
    freeze_vision_module_exceptions (`bool`, *optional*, defaults to `[]`):
        Exceptions to freezing vision layers when `freeze_vision_layers` is `True`
    use_resampler (`bool`, *optional*, defaults to `False`): Whether to use the Resampler
    vision_config (`IdeficsVisionConfig`,  *optional*): Custom vision config or dict
    perceiver_config (`IdeficsPerceiverConfig`,  *optional*): Custom perceiver config or dict

Example:

```python
>>> from transformers import IdeficsModel, IdeficsConfig

>>> # Initializing a Idefics idefics-9b style configuration
>>> configuration = IdeficsConfig()

>>> # Initializing a model from the idefics-9b style configuration
>>> model = IdeficsModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```idefics)perceiver_configvision_configi }      i   i +  r#   r&   silur'   zerosfloatgư>T      FNc                   > Xl         X l        X0l        X@l        XPl        X`l        Xpl        Xl        Xl        Xl	        Xl
        Xl        Xl        Xl        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        Uc  [-        5       U l        OB[1        U[2        5      (       a  [-        S0 UD6U l        O[1        U[,        5      (       a  UU l        Uc  [5        5       U l        OB[1        U[2        5      (       a  [5        S0 UD6U l        O[1        U[4        5      (       a  UU l        [8        TU ]t  " SUUUUS.UD6  g )N)pad_token_idbos_token_ideos_token_idtie_word_embeddingsr   )
vocab_sizeadditional_vocab_sizer	   r   r   r   dropoutr   r   alpha_initializeralphas_initializer_range
alpha_typerms_norm_eps	use_cachecross_layer_intervalqk_layer_normsfreeze_vision_layersfreeze_text_layersfreeze_text_module_exceptionsfreeze_vision_module_exceptionsfreeze_lm_headr7   r3   rE   
isinstancedictr   rF   r   r   )r   rR   rS   r	   r   r   r   rT   r   r   rU   rV   rW   rX   rY   rN   rO   rP   rQ   rZ   r[   r]   r^   r`   r\   r_   r7   rF   rE   r   r   s                                 r   r   IdeficsConfig.__init__   sH   @ %%:"&!2!2#6 $!2!2(@%$("$8!,$8!"4-J*/N,,*#$:$<D!($//$:$N=M$ND!(*@AA$4D! !4!6Dt,,!4!E}!ED':;;!.D 	
%%% 3		

 	
r    )rS   rU   rW   rV   rZ   rT   r`   r]   r^   r\   r_   r   r	   r   r   r   r   rE   r[   rX   rY   r7   rF   rR   )r(   r)   r*   r+   r,   r-   r3   r   sub_configsr   r/   r0   r1   s   @r   rB   rB      s~    N` J'=PcdK !!$!&(!(*;N
 N
r    rB   N)r,   configuration_utilsr   utilsr   
get_loggerr(   loggerr   r3   rB   __all__r   r    r   <module>rj      sZ   ( " 3  
		H	%J#* J#Z-#- -#`b
$ b
V 
r    