o
    Zh                     @   s@   d Z ddlmZ ddlmZ eeZG dd deZdgZ	dS )zUDOP model configuration   )PretrainedConfig)loggingc                       sv   e Zd ZdZdZdgZddddZdd	d
ddddddddiddiddigddddddddd	dddf fdd	Z  ZS ) 
UdopConfiga  
    This is the configuration class to store the configuration of a [`UdopForConditionalGeneration`]. It is used to
    instantiate a UDOP model according to the specified arguments, defining the model architecture. Instantiating a
    configuration with the defaults will yield a similar configuration to that of the UDOP
    [microsoft/udop-large](https://huggingface.co/microsoft/udop-large) architecture.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Arguments:
        vocab_size (`int`, *optional*, defaults to 33201):
            Vocabulary size of the UDOP model. Defines the number of different tokens that can be represented by the
            `inputs_ids` passed when calling [`UdopForConditionalGeneration`].
        d_model (`int`, *optional*, defaults to 1024):
            Size of the encoder layers and the pooler layer.
        d_kv (`int`, *optional*, defaults to 64):
            Size of the key, query, value projections per attention head. The `inner_dim` of the projection layer will
            be defined as `num_heads * d_kv`.
        d_ff (`int`, *optional*, defaults to 4096):
            Size of the intermediate feed forward layer in each `UdopBlock`.
        num_layers (`int`, *optional*, defaults to 24):
            Number of hidden layers in the Transformer encoder and decoder.
        num_decoder_layers (`int`, *optional*):
            Number of hidden layers in the Transformer decoder. Will use the same value as `num_layers` if not set.
        num_heads (`int`, *optional*, defaults to 16):
            Number of attention heads for each attention layer in the Transformer encoder and decoder.
        relative_attention_num_buckets (`int`, *optional*, defaults to 32):
            The number of buckets to use for each attention layer.
        relative_attention_max_distance (`int`, *optional*, defaults to 128):
            The maximum distance of the longer sequences for the bucket separation.
        relative_bias_args (`List[dict]`, *optional*, defaults to `[{'type': '1d'}, {'type': 'horizontal'}, {'type': 'vertical'}]`):
            A list of dictionaries containing the arguments for the relative bias layers.
        dropout_rate (`float`, *optional*, defaults to 0.1):
            The ratio for all dropout layers.
        layer_norm_epsilon (`float`, *optional*, defaults to 1e-06):
            The epsilon used by the layer normalization layers.
        initializer_factor (`float`, *optional*, defaults to 1.0):
            A factor for initializing all weight matrices (should be kept to 1, used internally for initialization
            testing).
        feed_forward_proj (`string`, *optional*, defaults to `"relu"`):
            Type of feed forward layer to be used. Should be one of `"relu"` or `"gated-gelu"`. Udopv1.1 uses the
            `"gated-gelu"` feed forward projection. Original Udop uses `"relu"`.
        is_encoder_decoder (`bool`, *optional*, defaults to `True`):
            Whether the model should behave as an encoder/decoder or not.
        use_cache (`bool`, *optional*, defaults to `True`):
            Whether or not the model should return the last key/values attentions (not used by all models).
        pad_token_id (`int`, *optional*, defaults to 0):
            The id of the padding token in the vocabulary.
        eos_token_id (`int`, *optional*, defaults to 1):
            The id of the end-of-sequence token in the vocabulary.
        max_2d_position_embeddings (`int`, *optional*, defaults to 1024):
            The maximum absolute position embeddings for relative position encoding.
        image_size (`int`, *optional*, defaults to 224):
            The size of the input images.
        patch_size (`int`, *optional*, defaults to 16):
            The patch size used by the vision encoder.
        num_channels (`int`, *optional*, defaults to 3):
            The number of channels in the input images.
    ZudopZpast_key_valuesd_model	num_heads
num_layers)Zhidden_sizeZnum_attention_headsZnum_hidden_layersi  i   @   i      N          typeZ1d
horizontalverticalg?gư>g      ?ZreluT          r   c                    s  || _ || _|| _|| _|| _|d ur|n| j| _|| _|| _|	| _|| _	|| _
|| _|| _|| _|| _|| _|| _|| _t|
tsFtd|
| _| jd}|d | _|d dk| _t|dkrg|d dksmt|dkrutd| d	t jd|||d
| d S )Nz6`relative_bias_args` should be a list of dictionaries.-r   Zgatedr      z`feed_forward_proj`: z is not a valid activation function of the dense layer.Please make sure `feed_forward_proj` is of the format `gated-{ACT_FN}` or `{ACT_FN}`, e.g. 'gated-gelu' or 'relu')pad_token_ideos_token_idis_encoder_decoder )
vocab_sizer   d_kvd_ffr   num_decoder_layersr   relative_attention_num_bucketsrelative_attention_max_distancedropout_ratelayer_norm_epsiloninitializer_factorfeed_forward_proj	use_cachemax_2d_position_embeddings
image_size
patch_sizenum_channels
isinstancelist	TypeErrorrelative_bias_argssplitZdense_act_fnZis_gated_actlen
ValueErrorsuper__init__)selfr   r   r   r   r   r   r   r   r   r,   r    r!   r"   r#   r   r$   r   r   r%   r&   r'   r(   kwargsZact_info	__class__r   Z/var/www/auris/lib/python3.10/site-packages/transformers/models/udop/configuration_udop.pyr1   Y   sH   

$

zUdopConfig.__init__)	__name__
__module____qualname____doc__Z
model_typeZkeys_to_ignore_at_inferenceZattribute_mapr1   __classcell__r   r   r4   r6   r      s8    <r   N)
r:   Zconfiguration_utilsr   utilsr   Z
get_loggerr7   loggerr   __all__r   r   r   r6   <module>   s   
 
	