
    fTh3                         S r SSKJr  SSKJrJrJrJr  SSKJ	r	  SSK
Jr  SSKJr  SSKJr  SS	KJr  \(       a  SS
KJr  SSKJr  \R*                  " \5      r " S S\5      r " S S\5      rSS/rg)zLayoutLMv3 model configuration    )OrderedDict)TYPE_CHECKINGAnyMappingOptional)version   )PretrainedConfig)
OnnxConfig) compute_effective_axis_dimension)logging)ProcessorMixin)
TensorTypec                   p   ^  \ rS rSrSrSr                              SU 4S jjrSrU =r$ )LayoutLMv3Config$   a  
This is the configuration class to store the configuration of a [`LayoutLMv3Model`]. It is used to instantiate an
LayoutLMv3 model according to the specified arguments, defining the model architecture. Instantiating a
configuration with the defaults will yield a similar configuration to that of the LayoutLMv3
[microsoft/layoutlmv3-base](https://huggingface.co/microsoft/layoutlmv3-base) architecture.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Args:
    vocab_size (`int`, *optional*, defaults to 50265):
        Vocabulary size of the LayoutLMv3 model. Defines the number of different tokens that can be represented by
        the `inputs_ids` passed when calling [`LayoutLMv3Model`].
    hidden_size (`int`, *optional*, defaults to 768):
        Dimension of the encoder layers and the pooler layer.
    num_hidden_layers (`int`, *optional*, defaults to 12):
        Number of hidden layers in the Transformer encoder.
    num_attention_heads (`int`, *optional*, defaults to 12):
        Number of attention heads for each attention layer in the Transformer encoder.
    intermediate_size (`int`, *optional*, defaults to 3072):
        Dimension of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
    hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
        The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
        `"relu"`, `"selu"` and `"gelu_new"` are supported.
    hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
        The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
    attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
        The dropout ratio for the attention probabilities.
    max_position_embeddings (`int`, *optional*, defaults to 512):
        The maximum sequence length that this model might ever be used with. Typically set this to something large
        just in case (e.g., 512 or 1024 or 2048).
    type_vocab_size (`int`, *optional*, defaults to 2):
        The vocabulary size of the `token_type_ids` passed when calling [`LayoutLMv3Model`].
    initializer_range (`float`, *optional*, defaults to 0.02):
        The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
    layer_norm_eps (`float`, *optional*, defaults to 1e-5):
        The epsilon used by the layer normalization layers.
    max_2d_position_embeddings (`int`, *optional*, defaults to 1024):
        The maximum value that the 2D position embedding might ever be used with. Typically set this to something
        large just in case (e.g., 1024).
    coordinate_size (`int`, *optional*, defaults to `128`):
        Dimension of the coordinate embeddings.
    shape_size (`int`, *optional*, defaults to `128`):
        Dimension of the width and height embeddings.
    has_relative_attention_bias (`bool`, *optional*, defaults to `True`):
        Whether or not to use a relative attention bias in the self-attention mechanism.
    rel_pos_bins (`int`, *optional*, defaults to 32):
        The number of relative position bins to be used in the self-attention mechanism.
    max_rel_pos (`int`, *optional*, defaults to 128):
        The maximum number of relative positions to be used in the self-attention mechanism.
    max_rel_2d_pos (`int`, *optional*, defaults to 256):
        The maximum number of relative 2D positions in the self-attention mechanism.
    rel_2d_pos_bins (`int`, *optional*, defaults to 64):
        The number of 2D relative position bins in the self-attention mechanism.
    has_spatial_attention_bias (`bool`, *optional*, defaults to `True`):
        Whether or not to use a spatial attention bias in the self-attention mechanism.
    visual_embed (`bool`, *optional*, defaults to `True`):
        Whether or not to add patch embeddings.
    input_size (`int`, *optional*, defaults to `224`):
        The size (resolution) of the images.
    num_channels (`int`, *optional*, defaults to `3`):
        The number of channels of the images.
    patch_size (`int`, *optional*, defaults to `16`)
        The size (resolution) of the patches.
    classifier_dropout (`float`, *optional*):
        The dropout ratio for the classification head.

Example:

```python
>>> from transformers import LayoutLMv3Config, LayoutLMv3Model

>>> # Initializing a LayoutLMv3 microsoft/layoutlmv3-base style configuration
>>> configuration = LayoutLMv3Config()

>>> # Initializing a model (with random weights) from the microsoft/layoutlmv3-base style configuration
>>> model = LayoutLMv3Model(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```
layoutlmv3c                   > [         T U ]  " SUUUUUUUUU	U
UUUUUS.UD6  UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l	        UU l
        UU l        UU l        UU l        UU l        UU l        UU l        g )N)
vocab_sizehidden_sizenum_hidden_layersnum_attention_headsintermediate_size
hidden_acthidden_dropout_probattention_probs_dropout_probmax_position_embeddingstype_vocab_sizeinitializer_rangelayer_norm_epspad_token_idbos_token_ideos_token_id )super__init__max_2d_position_embeddingscoordinate_size
shape_sizehas_relative_attention_biasrel_pos_binsmax_rel_poshas_spatial_attention_biasrel_2d_pos_binsmax_rel_2d_pos
text_embedvisual_embed
input_sizenum_channels
patch_sizeclassifier_dropout)!selfr   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r'   r(   r)   r*   r+   r,   r.   r/   r-   r0   r1   r2   r3   r4   r5   kwargs	__class__s!                                   o/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/layoutlmv3/configuration_layoutlmv3.pyr&   LayoutLMv3Config.__init__y   s    D 	 	
!#/ 3/! 3)E$;+/)%%%	
  !	
$ +E'.$+F((&*D'.,$($($"4    )r5   r(   r*   r-   r2   r'   r/   r,   r3   r4   r.   r+   r)   r0   r1   )iY  i      r<   i   gelu皙?r>   i      g{Gz?h㈵>   r   r?   i      rB   T    rB   @      TTT   r	      N)	__name__
__module____qualname____firstlineno____doc__
model_typer&   __static_attributes____classcell__)r8   s   @r9   r   r   $   sy    Pd J %( ##'$(#'?B5 B5r;   r   c                       \ rS rSr\R
                  " S5      r\S\\	\\
\	4   4   4S j5       r\S\4S j5       r\S\
4S j5       r       SSS	S
\
S\
S\S\S   S\
S\
S\
S\\	\4   4S jjrSrg)LayoutLMv3OnnxConfig   z1.12returnc                     U R                   S;   a%  [        SSSS.4SSSS.4SSSS.4SSS	S
SS.4/5      $ [        SSSS.4SSSS.4SSSS.4SSS	S.4/5      $ )N)zquestion-answeringzsequence-classification	input_idsbatchsequence)r   rA   attention_maskbboxpixel_valuesr3   heightwidth)r   rA   r?   r	   )taskr   r6   s    r9   inputsLayoutLMv3OnnxConfig.inputs   s     99II g*"=>%7z'BCZ89#^U\%]^	   g*"=>Z89%7z'BC#^%DE	 r;   c                     g)Nr@   r$   r^   s    r9   atol_for_validation(LayoutLMv3OnnxConfig.atol_for_validation   s    r;   c                     g)Nr<   r$   r^   s    r9   default_onnx_opset'LayoutLMv3OnnxConfig.default_onnx_opset   s    r;   N	processorr   
batch_size
seq_lengthis_pair	frameworkr   r3   image_widthimage_heightc	           	         [        UR                  SS5        [        U[        R                  SS9nUR
                  R                  U5      n	[        U[        R                  U	S9nSR                  UR
                  R                  /5      U-  //U-  n
/ SQ//U-  nU R                  X&X5      n[        U" UU
UUS95      nU$ )aU  
Generate inputs to provide to the ONNX exporter for the specific framework

Args:
    processor ([`ProcessorMixin`]):
        The processor associated with this model configuration.
    batch_size (`int`, *optional*, defaults to -1):
        The batch size to export the model for (-1 means dynamic axis).
    seq_length (`int`, *optional*, defaults to -1):
        The sequence length to export the model for (-1 means dynamic axis).
    is_pair (`bool`, *optional*, defaults to `False`):
        Indicate if the input is a pair (sentence 1, sentence 2).
    framework (`TensorType`, *optional*, defaults to `None`):
        The framework (PyTorch or TensorFlow) that the processor will generate tensors for.
    num_channels (`int`, *optional*, defaults to 3):
        The number of channels of the generated images.
    image_width (`int`, *optional*, defaults to 40):
        The width of the generated images.
    image_height (`int`, *optional*, defaults to 40):
        The height of the generated images.

Returns:
    Mapping[str, Any]: holding the kwargs to provide to the model's forward function
	apply_ocrFr   )fixed_dimensionnum_token_to_add )0   T   I   rB   )textboxesreturn_tensors)setattrimage_processorr   r   default_fixed_batch	tokenizernum_special_tokens_to_adddefault_fixed_sequencejoin	unk_token_generate_dummy_imagesdict)r6   rg   rh   ri   rj   rk   r3   rl   rm   token_to_add
dummy_textdummy_bboxesdummy_imager_   s                 r9   generate_dummy_inputs*LayoutLMv3OnnxConfig.generate_dummy_inputs   s    J 		));> 6
(F(FYZ

 !**DDWM5
(I(I\h

 xx!4!4!>!> ?@:MNOR\\
 ++,z9 11*Lf"(	
 r;   r$   )r   FNr	   (   r   )rH   rI   rJ   rK   r   parsetorch_onnx_minimum_versionpropertyr   strintr_   floatrb   re   boolr   r   r   rN   r$   r;   r9   rQ   rQ      s   !(v!6WS#X%6 67  * U   C   ,0C#C C 	C
 C L)C C C C 
c	C Cr;   rQ   N)rL   collectionsr   typingr   r   r   r   	packagingr   configuration_utilsr
   onnxr   
onnx.utilsr   utilsr   processing_utilsr   r   
get_loggerrH   loggerr   rQ   __all__r$   r;   r9   <module>r      sk    % # 8 8  3  :  2# 
		H	%W5' W5td: dN 5
6r;   