o
    Zh9                     @   sl   d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	 dd	l
mZ eeZG d
d deZdgZdS )zDPT model configuration    N   )PretrainedConfig)logging) verify_backbone_config_arguments   )CONFIG_MAPPING)	BitConfigc                $       s   e Zd ZdZdZddddddddd	d
ddddg ddg dg ddddddddddg dddgdddddddf$ fdd	Zdd  Zed!d" Z  Z	S )#	DPTConfigah  
    This is the configuration class to store the configuration of a [`DPTModel`]. It is used to instantiate an DPT
    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
    defaults will yield a similar configuration to that of the DPT
    [Intel/dpt-large](https://huggingface.co/Intel/dpt-large) architecture.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.


    Args:
        hidden_size (`int`, *optional*, defaults to 768):
            Dimensionality of the encoder layers and the pooler layer.
        num_hidden_layers (`int`, *optional*, defaults to 12):
            Number of hidden layers in the Transformer encoder.
        num_attention_heads (`int`, *optional*, defaults to 12):
            Number of attention heads for each attention layer in the Transformer encoder.
        intermediate_size (`int`, *optional*, defaults to 3072):
            Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
        hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
            The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
            `"relu"`, `"selu"` and `"gelu_new"` are supported.
        hidden_dropout_prob (`float`, *optional*, defaults to 0.0):
            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
        attention_probs_dropout_prob (`float`, *optional*, defaults to 0.0):
            The dropout ratio for the attention probabilities.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
        layer_norm_eps (`float`, *optional*, defaults to 1e-12):
            The epsilon used by the layer normalization layers.
        image_size (`int`, *optional*, defaults to 384):
            The size (resolution) of each image.
        patch_size (`int`, *optional*, defaults to 16):
            The size (resolution) of each patch.
        num_channels (`int`, *optional*, defaults to 3):
            The number of input channels.
        is_hybrid (`bool`, *optional*, defaults to `False`):
            Whether to use a hybrid backbone. Useful in the context of loading DPT-Hybrid models.
        qkv_bias (`bool`, *optional*, defaults to `True`):
            Whether to add a bias to the queries, keys and values.
        backbone_out_indices (`List[int]`, *optional*, defaults to `[2, 5, 8, 11]`):
            Indices of the intermediate hidden states to use from backbone.
        readout_type (`str`, *optional*, defaults to `"project"`):
            The readout type to use when processing the readout token (CLS token) of the intermediate hidden states of
            the ViT backbone. Can be one of [`"ignore"`, `"add"`, `"project"`].

            - "ignore" simply ignores the CLS token.
            - "add" passes the information from the CLS token to all other tokens by adding the representations.
            - "project" passes information to the other tokens by concatenating the readout to all other tokens before
              projecting the
            representation to the original feature dimension D using a linear layer followed by a GELU non-linearity.
        reassemble_factors (`List[int]`, *optional*, defaults to `[4, 2, 1, 0.5]`):
            The up/downsampling factors of the reassemble layers.
        neck_hidden_sizes (`List[str]`, *optional*, defaults to `[96, 192, 384, 768]`):
            The hidden sizes to project to for the feature maps of the backbone.
        fusion_hidden_size (`int`, *optional*, defaults to 256):
            The number of channels before fusion.
        head_in_index (`int`, *optional*, defaults to -1):
            The index of the features to use in the heads.
        use_batch_norm_in_fusion_residual (`bool`, *optional*, defaults to `False`):
            Whether to use batch normalization in the pre-activate residual units of the fusion blocks.
        use_bias_in_fusion_residual (`bool`, *optional*, defaults to `True`):
            Whether to use bias in the pre-activate residual units of the fusion blocks.
        add_projection (`bool`, *optional*, defaults to `False`):
            Whether to add a projection layer before the depth estimation head.
        use_auxiliary_head (`bool`, *optional*, defaults to `True`):
            Whether to use an auxiliary head during training.
        auxiliary_loss_weight (`float`, *optional*, defaults to 0.4):
            Weight of the cross-entropy loss of the auxiliary head.
        semantic_loss_ignore_index (`int`, *optional*, defaults to 255):
            The index that is ignored by the loss function of the semantic segmentation model.
        semantic_classifier_dropout (`float`, *optional*, defaults to 0.1):
            The dropout ratio for the semantic classification head.
        backbone_featmap_shape (`List[int]`, *optional*, defaults to `[1, 1024, 24, 24]`):
            Used only for the `hybrid` embedding type. The shape of the feature maps of the backbone.
        neck_ignore_stages (`List[int]`, *optional*, defaults to `[0, 1]`):
            Used only for the `hybrid` embedding type. The stages of the readout layers to ignore.
        backbone_config (`Union[Dict[str, Any], PretrainedConfig]`, *optional*):
            The configuration of the backbone model. Only used in case `is_hybrid` is `True` or in case you want to
            leverage the [`AutoBackbone`] API.
        backbone (`str`, *optional*):
            Name of backbone to use when `backbone_config` is `None`. If `use_pretrained_backbone` is `True`, this
            will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone`
            is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights.
        use_pretrained_backbone (`bool`, *optional*, defaults to `False`):
            Whether to use pretrained weights for the backbone.
        use_timm_backbone (`bool`, *optional*, defaults to `False`):
            Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers
            library.
        backbone_kwargs (`dict`, *optional*):
            Keyword arguments to be passed to AutoBackbone when loading from a checkpoint
            e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set.
        pooler_output_size (`int`, *optional*):
           Dimensionality of the pooler layer. If None, defaults to `hidden_size`.
        pooler_act (`str`, *optional*, defaults to `"tanh"`):
           The activation function to be used by the pooler. Keys of ACT2FN are supported for Flax and
           Pytorch, and elements of https://www.tensorflow.org/api_docs/python/tf/keras/activations are
           supported for Tensorflow.

    Example:

    ```python
    >>> from transformers import DPTModel, DPTConfig

    >>> # Initializing a DPT dpt-large style configuration
    >>> configuration = DPTConfig()

    >>> # Initializing a model from the dpt-large style configuration
    >>> model = DPTModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```Zdpt      i   Zgelug        g{Gz?g-q=     r   FT)r            project)   r      g      ?)`      r   r
      Ng?   g?)r   i      r   r   r   tanhc%           )         s  t  jdi |% || _|| _d}&| jrY|d u r$ddg dg ddd}t|tr6td tdi |}nt|t	r>|}n	t
d	|j d
|| _|| _|| _|dkrXt
dn9|d usa|d urd}&t|trv|d}'t|' }(|(|}|| _d | _g | _t|!| |||"d n	d | _d | _g | _|| _| | _|!| _|"| _|| _|| _|| _|| _|| _|	| _|
| _|| _|| _|| _ |&| _!|&rd n|| _"|dvrt
d|| _#|| _$|| _%|| _&|| _'|| _(|| _)|| _*|| _+|| _,|| _-|| _.|| _/|| _0|#r|#n|| _1|$| _2d S )NFZsameZ
bottleneck)r   r   	   )Zstage1Zstage2Zstage3T)Zglobal_paddingZ
layer_typeZdepthsZout_featuresZembedding_dynamic_paddingz.Initializing the config with a `BiT` backbone.zBbackbone_config must be a dictionary or a `PretrainedConfig`, got .r   z<Readout type must be 'project' when using `DPT-hybrid` mode.
model_type)use_timm_backboneuse_pretrained_backbonebackbonebackbone_configbackbone_kwargs)ignoreaddr   z8Readout_type must be one of ['ignore', 'add', 'project'] )3super__init__hidden_size	is_hybrid
isinstancedictloggerinfor   r   
ValueError	__class__r!   backbone_featmap_shapeneck_ignore_stagesgetr   	from_dictr   r    r   r   r"   num_hidden_layersnum_attention_headsintermediate_sizehidden_dropout_probattention_probs_dropout_problayer_norm_eps
image_size
patch_sizenum_channelsqkv_biasuse_autobackbonebackbone_out_indices
hidden_actinitializer_rangereadout_typereassemble_factorsneck_hidden_sizesfusion_hidden_sizehead_in_index!use_batch_norm_in_fusion_residualuse_bias_in_fusion_residualadd_projectionuse_auxiliary_headauxiliary_loss_weightsemantic_loss_ignore_indexsemantic_classifier_dropoutpooler_output_size
pooler_act))selfr(   r4   r5   r6   r@   r7   r8   rA   r9   r:   r;   r<   r)   r=   r?   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   r0   r1   r!   r    r   r   r"   rN   rO   kwargsr>   Zbackbone_model_typeZconfig_classr/   r%   X/var/www/auris/lib/python3.10/site-packages/transformers/models/dpt/configuration_dpt.pyr'      s   (






zDPTConfig.__init__c                 C   s6   t | j}|d dur| j |d< | jj|d< |S )z
        Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`]. Returns:
            `Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance,
        r!   Nr   )copydeepcopy__dict__r!   to_dictr/   r   )rP   outputr%   r%   rS   rW     s
   zDPTConfig.to_dictc                 C   s   | j d urdt| j iS i S )Nr!   )r!   type)rP   r%   r%   rS   sub_configs'  s   zDPTConfig.sub_configs)
__name__
__module____qualname____doc__r   r'   rW   propertyrZ   __classcell__r%   r%   rR   rS   r	      sX    r 	r	   )r^   rT   Zconfiguration_utilsr   utilsr   Zutils.backbone_utilsr   Zauto.configuration_autor   bitr   Z
get_loggerr[   r,   r	   __all__r%   r%   r%   rS   <module>   s   
  
