
    fTh                         S r SSKJr  SSKJr  SSKJr  SSKJr  SSK	J
r
  SSKJr  \R                  " \5      r " S	 S
\5      r " S S\
5      rS
S/rg)zLeViT model configuration    OrderedDict)Mapping)version   )PretrainedConfig)
OnnxConfig)loggingc                   h   ^  \ rS rSrSrSrSSSSSS/ S	Q/ S
Q/ SQ/ SQS/ SQ/ SQS4U 4S jjrSrU =r$ )LevitConfig   a  
This is the configuration class to store the configuration of a [`LevitModel`]. It is used to instantiate a LeViT
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
defaults will yield a similar configuration to that of the LeViT
[facebook/levit-128S](https://huggingface.co/facebook/levit-128S) architecture.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Args:
    image_size (`int`, *optional*, defaults to 224):
        The size of the input image.
    num_channels (`int`, *optional*, defaults to 3):
        Number of channels in the input image.
    kernel_size (`int`, *optional*, defaults to 3):
        The kernel size for the initial convolution layers of patch embedding.
    stride (`int`, *optional*, defaults to 2):
        The stride size for the initial convolution layers of patch embedding.
    padding (`int`, *optional*, defaults to 1):
        The padding size for the initial convolution layers of patch embedding.
    patch_size (`int`, *optional*, defaults to 16):
        The patch size for embeddings.
    hidden_sizes (`List[int]`, *optional*, defaults to `[128, 256, 384]`):
        Dimension of each of the encoder blocks.
    num_attention_heads (`List[int]`, *optional*, defaults to `[4, 8, 12]`):
        Number of attention heads for each attention layer in each block of the Transformer encoder.
    depths (`List[int]`, *optional*, defaults to `[4, 4, 4]`):
        The number of layers in each encoder block.
    key_dim (`List[int]`, *optional*, defaults to `[16, 16, 16]`):
        The size of key in each of the encoder blocks.
    drop_path_rate (`int`, *optional*, defaults to 0):
        The dropout probability for stochastic depths, used in the blocks of the Transformer encoder.
    mlp_ratios (`List[int]`, *optional*, defaults to `[2, 2, 2]`):
        Ratio of the size of the hidden layer compared to the size of the input layer of the Mix FFNs in the
        encoder blocks.
    attention_ratios (`List[int]`, *optional*, defaults to `[2, 2, 2]`):
        Ratio of the size of the output dimension compared to input dimension of attention layers.
    initializer_range (`float`, *optional*, defaults to 0.02):
        The standard deviation of the truncated_normal_initializer for initializing all weight matrices.

Example:

```python
>>> from transformers import LevitConfig, LevitModel

>>> # Initializing a LeViT levit-128S style configuration
>>> configuration = LevitConfig()

>>> # Initializing a model (with random weights) from the levit-128S style configuration
>>> model = LevitModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```levit   r            )      i  )         )r   r   r   )r   r   r   r   )r   r   r   g{Gz?c                 (  > [         TU ]  " S0 UD6  Xl        X l        X0l        X@l        XPl        Xpl        Xl        Xl	        Xl
        Xl        X`l        Xl        Xl        Xl        SU
S   US   U
S   -  SSS/SU
S   US   U
S   -  SSS//U l        g )N	Subsampler   r   r   r    )super__init__
image_sizenum_channelskernel_sizestridepaddinghidden_sizesnum_attention_headsdepthskey_dimdrop_path_rate
patch_sizeattention_ratio	mlp_ratioinitializer_rangedown_ops)selfr   r   r   r    r!   r'   r"   r#   r$   r%   r&   r)   r(   r*   kwargs	__class__s                   e/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/levit/configuration_levit.pyr   LevitConfig.__init__X   s    $ 	"6"$(&(#6 ,$."!2'!*l1o&CQ1M'!*l1o&CQ1M
    )r(   r$   r+   r&   r"   r   r*   r   r%   r)   r#   r   r!   r'   r    )	__name__
__module____qualname____firstlineno____doc__
model_typer   __static_attributes____classcell__)r.   s   @r/   r   r      sF    5n J $&!$
 $
r1   r   c                   |    \ rS rSr\R
                  " S5      r\S\\	\\
\	4   4   4S j5       r\S\4S j5       rSrg)LevitOnnxConfigr   z1.11returnc                 (    [        SSSSSS.4/5      $ )Npixel_valuesbatchr   heightwidth)r   r   r   r   r   r,   s    r/   inputsLevitOnnxConfig.inputs   s&    WHQX!YZ
 	
r1   c                     g)Ng-C6?r   rB   s    r/   atol_for_validation#LevitOnnxConfig.atol_for_validation   s    r1   r   N)r2   r3   r4   r5   r   parsetorch_onnx_minimum_versionpropertyr   strintrC   floatrF   r8   r   r1   r/   r;   r;      sX    !(v!6
WS#X%6 67 
 
 U  r1   r;   N)r6   collectionsr   typingr   	packagingr   configuration_utilsr   onnxr	   utilsr
   
get_loggerr2   loggerr   r;   __all__r   r1   r/   <module>rW      sW      #   3   
		H	%^
" ^
Dj   +
,r1   