o
    Zh                     @   sP   d Z ddlmZ ddlmZ ddlmZmZ ee	Z
G dd deZdgZdS )	zLlava model configuration   )PretrainedConfig)logging   )CONFIG_MAPPING
AutoConfigc                       sH   e Zd ZdZdZddiZeedZ							
		d fdd	Z  Z	S )LlavaConfigaI
  
    This is the configuration class to store the configuration of a [`LlavaForConditionalGeneration`]. It is used to instantiate an
    Llava model according to the specified arguments, defining the model architecture. Instantiating a configuration
    with the defaults will yield a similar configuration to that of the Llava-9B.

    e.g. [llava-hf/llava-9b](https://huggingface.co/llava-hf/llava-9b)

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Args:
        vision_config (`Union[AutoConfig, dict]`,  *optional*, defaults to `CLIPVisionConfig`):
            The config object or dictionary of the vision backbone.
        text_config (`Union[AutoConfig, dict]`, *optional*, defaults to `LlamaConfig`):
            The config object or dictionary of the text backbone.
        image_token_index (`int`, *optional*, defaults to 32000):
            The image token index to encode the image prompt.
        projector_hidden_act (`str`, *optional*, defaults to `"gelu"`):
            The activation function used by the multimodal projector.
        vision_feature_select_strategy (`str`, *optional*, defaults to `"default"`):
            The feature selection strategy used to select the vision feature from the vision backbone.
            Can be one of `"default"` or `"full"`.
        vision_feature_layer (`Union[int, List[int]]`, *optional*, defaults to -2):
            The index of the layer to select the vision feature. If multiple indices are provided,
            the vision feature of the corresponding indices will be concatenated to form the
            vision features.
        image_seq_length (`int`, *optional*, defaults to 576):
            Sequence length of one image embedding.
        multimodal_projector_bias (`bool`, *optional*, defaults to `True`):
            Whether to use bias in the multimodal projector.

    Example:

    ```python
    >>> from transformers import LlavaForConditionalGeneration, LlavaConfig, CLIPVisionConfig, LlamaConfig

    >>> # Initializing a CLIP-vision config
    >>> vision_config = CLIPVisionConfig()

    >>> # Initializing a Llama config
    >>> text_config = LlamaConfig()

    >>> # Initializing a Llava llava-1.5-7b style configuration
    >>> configuration = LlavaConfig(vision_config, text_config)

    >>> # Initializing a model from the llava-1.5-7b style configuration
    >>> model = LlavaForConditionalGeneration(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```ZllavaZimage_token_idimage_token_index)text_configvision_configN }  geludefault@  Tc	           
   
      s  || _ || _|| _|dvrtd| || _|| _t|tr7d|v r'|d nd|d< t|d  di |}n|d u rItd ddddd	d
ddd}|| _	t|trid|v rY|d nd|d< t|d  di |}n	|d u rrtd  }|| _
|| _t jdi |	 d S )N)r   fullzGvision_feature_select_strategy should be one of 'default', 'full'.Got: 
model_typeZclip_vision_modeli   i      iP        r   i   )Zintermediate_sizeZhidden_sizeZ
patch_sizeZ
image_sizeZnum_hidden_layersZnum_attention_headsZ
vocab_sizeZprojection_dimllama )r   projector_hidden_actimage_seq_length
ValueErrorvision_feature_select_strategyvision_feature_layer
isinstancedictr   r
   r	   multimodal_projector_biassuper__init__)
selfr
   r	   r   r   r   r   r   r   kwargs	__class__r   \/var/www/auris/lib/python3.10/site-packages/transformers/models/llava/configuration_llava.pyr    S   sF   


zLlavaConfig.__init__)NNr   r   r   r   r   T)
__name__
__module____qualname____doc__r   Zattribute_mapr   Zsub_configsr    __classcell__r   r   r#   r%   r      s    4
r   N)r)   Zconfiguration_utilsr   utilsr   autor   r   Z
get_loggerr&   loggerr   __all__r   r   r   r%   <module>   s   

s