
    fTh                         S r SSKJr  SSKJr  SSKJr  SSKJr  SSK	J
r
  SSKJr  \R                  " \5      r " S	 S
\5      r " S S\
5      rS
S/rg)zMobileViTV2 model configuration    OrderedDict)Mapping)version   )PretrainedConfig)
OnnxConfig)loggingc                   h   ^  \ rS rSrSrSrSSSSSSS	S
SSS/ SQS
S/ SQ/ SQSSSS4U 4S jjrSrU =r$ )MobileViTV2Config   a  
This is the configuration class to store the configuration of a [`MobileViTV2Model`]. It is used to instantiate a
MobileViTV2 model according to the specified arguments, defining the model architecture. Instantiating a
configuration with the defaults will yield a similar configuration to that of the MobileViTV2
[apple/mobilevitv2-1.0](https://huggingface.co/apple/mobilevitv2-1.0) architecture.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Args:
    num_channels (`int`, *optional*, defaults to 3):
        The number of input channels.
    image_size (`int`, *optional*, defaults to 256):
        The size (resolution) of each image.
    patch_size (`int`, *optional*, defaults to 2):
        The size (resolution) of each patch.
    expand_ratio (`float`, *optional*, defaults to 2.0):
        Expansion factor for the MobileNetv2 layers.
    hidden_act (`str` or `function`, *optional*, defaults to `"swish"`):
        The non-linear activation function (function or string) in the Transformer encoder and convolution layers.
    conv_kernel_size (`int`, *optional*, defaults to 3):
        The size of the convolutional kernel in the MobileViTV2 layer.
    output_stride (`int`, *optional*, defaults to 32):
        The ratio of the spatial resolution of the output to the resolution of the input image.
    classifier_dropout_prob (`float`, *optional*, defaults to 0.1):
        The dropout ratio for attached classifiers.
    initializer_range (`float`, *optional*, defaults to 0.02):
        The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
    layer_norm_eps (`float`, *optional*, defaults to 1e-05):
        The epsilon used by the layer normalization layers.
    aspp_out_channels (`int`, *optional*, defaults to 512):
        Number of output channels used in the ASPP layer for semantic segmentation.
    atrous_rates (`List[int]`, *optional*, defaults to `[6, 12, 18]`):
        Dilation (atrous) factors used in the ASPP layer for semantic segmentation.
    aspp_dropout_prob (`float`, *optional*, defaults to 0.1):
        The dropout ratio for the ASPP layer for semantic segmentation.
    semantic_loss_ignore_index (`int`, *optional*, defaults to 255):
        The index that is ignored by the loss function of the semantic segmentation model.
    n_attn_blocks (`List[int]`, *optional*, defaults to `[2, 4, 3]`):
        The number of attention blocks in each MobileViTV2Layer
    base_attn_unit_dims (`List[int]`, *optional*, defaults to `[128, 192, 256]`):
        The base multiplier for dimensions of attention blocks in each MobileViTV2Layer
    width_multiplier (`float`, *optional*, defaults to 1.0):
        The width multiplier for MobileViTV2.
    ffn_multiplier (`int`, *optional*, defaults to 2):
        The FFN multiplier for MobileViTV2.
    attn_dropout (`float`, *optional*, defaults to 0.0):
        The dropout in the attention layer.
    ffn_dropout (`float`, *optional*, defaults to 0.0):
        The dropout between FFN layers.

Example:

```python
>>> from transformers import MobileViTV2Config, MobileViTV2Model

>>> # Initializing a mobilevitv2-small style configuration
>>> configuration = MobileViTV2Config()

>>> # Initializing a model from the mobilevitv2-small style configuration
>>> model = MobileViTV2Model(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```mobilevitv2r         g       @swish    g?g{Gz?gh㈵>i   )            )r      r   )      r   g      ?g        c                    > [         TU ]  " S0 UD6  Xl        X l        X0l        X@l        XPl        X`l        Xpl        Xl	        Xl
        Xl        UU l        UU l        UU l        UU l        UU l        Xl        Xl        Xl        Xl        Xl        g )N )super__init__num_channels
image_size
patch_sizeexpand_ratio
hidden_actconv_kernel_sizeoutput_strideinitializer_rangelayer_norm_epsn_attn_blocksbase_attn_unit_dimswidth_multiplierffn_multiplierffn_dropoutattn_dropoutclassifier_dropout_probaspp_out_channelsatrous_ratesaspp_dropout_probsemantic_loss_ignore_index)selfr   r   r    r!   r"   r#   r$   r-   r%   r&   r.   r/   r0   r1   r'   r(   r)   r*   r,   r+   kwargs	__class__s                         q/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/mobilevitv2/configuration_mobilevitv2.pyr   MobileViTV2Config.__init__c   s    0 	"6"($$($ 0*!2,*#6  0,&('>$ "3(!2*D'    )r0   r.   r/   r,   r(   r-   r#   r!   r+   r*   r"   r   r%   r&   r'   r   r$   r    r1   r)   )	__name__
__module____qualname____firstlineno____doc__
model_typer   __static_attributes____classcell__)r4   s   @r5   r   r      s[    @D J  # #&++/E /Er7   r   c                       \ rS rSr\R
                  " S5      r\S\\	\\
\	4   4   4S j5       r\S\\	\\
\	4   4   4S j5       r\S\4S j5       rSrg)	MobileViTV2OnnxConfig   z1.11returnc                 (    [        SSSSSS.4/5      $ )Npixel_valuesbatchr   heightwidth)r      r   r   r   r2   s    r5   inputsMobileViTV2OnnxConfig.inputs   s     ^^PX]d-efghhr7   c                 l    U R                   S:X  a  [        SSS04/5      $ [        SSS04SSS04/5      $ )Nzimage-classificationlogitsr   rF   last_hidden_statepooler_output)taskr   rJ   s    r5   outputsMobileViTV2OnnxConfig.outputs   sL    99..Aw< 89::!4q'l CoXY[bWcEdeffr7   c                     g)Ng-C6?r   rJ   s    r5   atol_for_validation)MobileViTV2OnnxConfig.atol_for_validation   s    r7   r   N)r8   r9   r:   r;   r   parsetorch_onnx_minimum_versionpropertyr   strintrK   rR   floatrU   r>   r   r7   r5   rA   rA      s    !(v!6iWS#X%6 67 i i ggc3h&7!78 g g U  r7   rA   N)r<   collectionsr   typingr   	packagingr   configuration_utilsr   onnxr	   utilsr
   
get_loggerr8   loggerr   rA   __all__r   r7   r5   <module>rf      sY    & #   3   
		H	%tE( tEnJ &  7
8r7   