
    fTh                     `    S r SSKJr  SSKJr  \R
                  " \5      r " S S\5      rS/r	g)z&Funnel Transformer model configuration   )PretrainedConfig)loggingc                      ^  \ rS rSrSrSrSSS.rS/ SQS	S
SSSSSSSSSS	SSSSSS4U 4S jjr\S 5       r	\	R                  S 5       r	\S 5       r\R                  S 5       rSrU =r$ )FunnelConfig   a,  
This is the configuration class to store the configuration of a [`FunnelModel`] or a [`TFBertModel`]. It is used to
instantiate a Funnel Transformer model according to the specified arguments, defining the model architecture.
Instantiating a configuration with the defaults will yield a similar configuration to that of the Funnel
Transformer [funnel-transformer/small](https://huggingface.co/funnel-transformer/small) architecture.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Args:
    vocab_size (`int`, *optional*, defaults to 30522):
        Vocabulary size of the Funnel transformer. Defines the number of different tokens that can be represented
        by the `inputs_ids` passed when calling [`FunnelModel`] or [`TFFunnelModel`].
    block_sizes (`List[int]`, *optional*, defaults to `[4, 4, 4]`):
        The sizes of the blocks used in the model.
    block_repeats (`List[int]`, *optional*):
        If passed along, each layer of each block is repeated the number of times indicated.
    num_decoder_layers (`int`, *optional*, defaults to 2):
        The number of layers in the decoder (when not using the base model).
    d_model (`int`, *optional*, defaults to 768):
        Dimensionality of the model's hidden states.
    n_head (`int`, *optional*, defaults to 12):
        Number of attention heads for each attention layer in the Transformer encoder.
    d_head (`int`, *optional*, defaults to 64):
        Dimensionality of the model's heads.
    d_inner (`int`, *optional*, defaults to 3072):
        Inner dimension in the feed-forward blocks.
    hidden_act (`str` or `callable`, *optional*, defaults to `"gelu_new"`):
        The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
        `"relu"`, `"silu"` and `"gelu_new"` are supported.
    hidden_dropout (`float`, *optional*, defaults to 0.1):
        The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
    attention_dropout (`float`, *optional*, defaults to 0.1):
        The dropout probability for the attention probabilities.
    activation_dropout (`float`, *optional*, defaults to 0.0):
        The dropout probability used between the two layers of the feed-forward blocks.
    initializer_range (`float`, *optional*, defaults to 0.1):
        The upper bound of the *uniform initializer* for initializing all weight matrices in attention layers.
    initializer_std (`float`, *optional*):
        The standard deviation of the *normal initializer* for initializing the embedding matrix and the weight of
        linear layers. Will default to 1 for the embedding matrix and the value given by Xavier initialization for
        linear layers.
    layer_norm_eps (`float`, *optional*, defaults to 1e-09):
        The epsilon used by the layer normalization layers.
    pooling_type (`str`, *optional*, defaults to `"mean"`):
        Possible values are `"mean"` or `"max"`. The way pooling is performed at the beginning of each block.
    attention_type (`str`, *optional*, defaults to `"relative_shift"`):
        Possible values are `"relative_shift"` or `"factorized"`. The former is faster on CPU/GPU while the latter
        is faster on TPU.
    separate_cls (`bool`, *optional*, defaults to `True`):
        Whether or not to separate the cls token when applying pooling.
    truncate_seq (`bool`, *optional*, defaults to `True`):
        When using `separate_cls`, whether or not to truncate the last token when pooling, to avoid getting a
        sequence length that is not a multiple of 2.
    pool_q_only (`bool`, *optional*, defaults to `True`):
        Whether or not to apply the pooling only to the query or to query, key and values for the attention layers.
funneld_modeln_head)hidden_sizenum_attention_headsi:w  )   r   r   N   i      @   i   gelu_newg?g        g&.>meanrelative_shiftTc                   > Xl         X l        Uc  S/[        U5      -  OUU l        [        U5      [        U R                  5      :X  d   S5       eX@l        XPl        X`l        Xpl        Xl        Xl	        Xl
        Xl        Xl        Xl        Xl        Xl        US;   d   SU S35       eUU l        US;   d   SU S35       eUU l        UU l        UU l        UU l        [*        TU ]X  " S0 UD6  g )	N   z>`block_sizes` and `block_repeats` should have the same length.)r   maxzGot z< for `pooling_type` but only 'mean' and 'max' are supported.)r   
factorizedzO for `attention_type` but only 'relative_shift' and 'factorized' are supported. )
vocab_sizeblock_sizeslenblock_repeatsnum_decoder_layersr	   r
   d_headd_inner
hidden_acthidden_dropoutattention_dropoutactivation_dropoutinitializer_rangeinitializer_stdlayer_norm_epspooling_typeattention_typeseparate_clstruncate_seqpool_q_onlysuper__init__)selfr   r   r   r   r	   r
   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   kwargs	__class__s                         g/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/funnel/configuration_funnel.pyr-   FunnelConfig.__init__Y   s0   0 %&7D7LaS3{#33R_;3t'9'9#:: 	
L	
: #5$,!2"4!2.,  
 
 	] ,[\	] 
 ) "
 
 	r .!!pq	r 
 -((&"6"    c                 ,    [        U R                  5      $ N)sumr   r.   s    r1   num_hidden_layersFunnelConfig.num_hidden_layers       4##$$r3   c                     [        S5      e)NzYThis model does not support the setting of `num_hidden_layers`. Please set `block_sizes`.NotImplementedErrorr.   values     r1   r8   r9      s    !g
 	
r3   c                 ,    [        U R                  5      $ r5   )r   r   r7   s    r1   
num_blocksFunnelConfig.num_blocks   r:   r3   c                     [        S5      e)NzRThis model does not support the setting of `num_blocks`. Please set `block_sizes`.r<   r>   s     r1   rA   rB      s    !"vwwr3   )r#   r"   r(   r   r   r   r   r	   r    r!   r$   r%   r&   r
   r   r+   r'   r)   r*   r   )__name__
__module____qualname____firstlineno____doc__
model_typeattribute_mapr-   propertyr8   setterrA   __static_attributes____classcell__)r0   s   @r1   r   r      s    8t J 'M '+8#t % % 
 

 % % x xr3   r   N)
rH   configuration_utilsr   utilsr   
get_loggerrD   loggerr   __all__r   r3   r1   <module>rT      s>    - 3  
		H	%Kx# Kx\ 
r3   