
    fTh"                         S r SSKJr  SSKJrJrJrJr  SSKJ	r	J
r
Jr  SSKJr  SSKJrJr  SSKJr  \R&                  " \5      r " S	 S
\5      r " S S\5      rS
S/rg)zGPT-J model configuration    )OrderedDict)AnyListMappingOptional   )PreTrainedTokenizer
TensorTypeis_torch_available)PretrainedConfig)OnnxConfigWithPastPatchingSpec)loggingc                   d   ^  \ rS rSrSrSrSSSSS.r                 SU 4S	 jjrS
rU =r	$ )
GPTJConfig   a  
This is the configuration class to store the configuration of a [`GPTJModel`]. It is used to instantiate a GPT-J
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
defaults will yield a similar configuration to that of the GPT-J
[EleutherAI/gpt-j-6B](https://huggingface.co/EleutherAI/gpt-j-6B) architecture. Configuration objects inherit from
[`PretrainedConfig`] and can be used to control the model outputs. Read the documentation from [`PretrainedConfig`]
for more information.

Args:
    vocab_size (`int`, *optional*, defaults to 50400):
        Vocabulary size of the GPT-J model. Defines the number of different tokens that can be represented by the
        `inputs_ids` passed when calling [`GPTJModel`].
    n_positions (`int`, *optional*, defaults to 2048):
        The maximum sequence length that this model might ever be used with. Typically set this to something large
        just in case (e.g., 512 or 1024 or 2048).
    n_embd (`int`, *optional*, defaults to 4096):
        Dimensionality of the embeddings and hidden states.
    n_layer (`int`, *optional*, defaults to 28):
        Number of hidden layers in the Transformer encoder.
    n_head (`int`, *optional*, defaults to 16):
        Number of attention heads for each attention layer in the Transformer encoder.
    rotary_dim (`int`, *optional*, defaults to 64):
        Number of dimensions in the embedding that Rotary Position Embedding is applied to.
    n_inner (`int`, *optional*, defaults to None):
        Dimensionality of the inner feed-forward layers. `None` will set it to 4 times n_embd
    activation_function (`str`, *optional*, defaults to `"gelu_new"`):
        Activation function, to be selected in the list `["relu", "silu", "gelu", "tanh", "gelu_new"]`.
    resid_pdrop (`float`, *optional*, defaults to 0.1):
        The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
    embd_pdrop (`int`, *optional*, defaults to 0.1):
        The dropout ratio for the embeddings.
    attn_pdrop (`float`, *optional*, defaults to 0.1):
        The dropout ratio for the attention.
    layer_norm_epsilon (`float`, *optional*, defaults to 1e-5):
        The epsilon to use in the layer normalization layers.
    initializer_range (`float`, *optional*, defaults to 0.02):
        The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
    use_cache (`bool`, *optional*, defaults to `True`):
        Whether or not the model should return the last key/values attentions (not used by all models).

Example:

```python
>>> from transformers import GPTJModel, GPTJConfig

>>> # Initializing a GPT-J 6B configuration
>>> configuration = GPTJConfig()

>>> # Initializing a model from the configuration
>>> model = GPTJModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```gptjn_positionsn_embdn_headn_layer)max_position_embeddingshidden_sizenum_attention_headsnum_hidden_layersc                    > Xl         X l        X0l        X@l        XPl        Xpl        X`l        Xl        Xl        Xl	        Xl
        Xl        Xl        Xl        Xl        UU l        [         TU ]D  " SUUUS.UD6  g )N)bos_token_ideos_token_idtie_word_embeddings )
vocab_sizer   r   r   r   n_inner
rotary_dimactivation_functionresid_pdrop
embd_pdrop
attn_pdroplayer_norm_epsiloninitializer_range	use_cacher   r   super__init__)selfr!   r   r   r   r   r#   r"   r$   r%   r&   r'   r(   r)   r*   r   r   r   kwargs	__class__s                      c/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/gptj/configuration_gptj.pyr,   GPTJConfig.__init__]   s    * %&$#6 &$$"4!2"(( 	
%LVi	
ms	
    )r$   r'   r   r&   r   r)   r(   r   r   r"   r   r   r%   r#   r*   r!   )i  i   i         @   Ngelu_new        r7   r7   gh㈵>g{Gz?TP  r8   F)
__name__
__module____qualname____firstlineno____doc__
model_typeattribute_mapr,   __static_attributes____classcell__r/   s   @r0   r   r      sb    5n J#0'&	M &!%)
 )
r2   r   c                   
  ^  \ rS rSr   SS\S\S\\\      S\	4U 4S jjjr
\S\\\\\4   4   4S j5       r\S\4S	 j5       r\S\4S
 j5       r    SS\S\S\S\	S\\   S\\\4   4U 4S jjjr\S\4S j5       rSrU =r$ )GPTJOnnxConfig   configtaskpatching_specsuse_pastc                 ~   > [         TU ]  XX4S9  [        U R                  SS 5      (       d  SU R                  l        g g )N)rG   rH   rI   pad_token_idr   )r+   r,   getattr_configrK   )r-   rF   rG   rH   rI   r/   s        r0   r,   GPTJOnnxConfig.__init__   s;     	>]t||^T::()DLL% ;r2   returnc                     [        SSSS.05      nU R                  (       a  U R                  USS9  SSS.US'   U$ SSS.US'   U$ )	N	input_idsbatchsequence)r      inputs)	directionzpast_sequence + sequenceattention_mask)r   rI   fill_with_past_key_values_)r-   common_inputss     r0   rU   GPTJOnnxConfig.inputs   sa    #[g*2M$NO==++MX+N29>X.YM*+  3:j.IM*+r2   c                 .    U R                   R                  $ N)rM   r   r-   s    r0   
num_layersGPTJOnnxConfig.num_layers   s    ||###r2   c                 .    U R                   R                  $ r\   )rM   r   r]   s    r0   r   "GPTJOnnxConfig.num_attention_heads   s    ||"""r2   	tokenizer
batch_size
seq_lengthis_pair	frameworkc           	      r  > [         [        U ]  XX4US9n[        SUS   05      nU R                  (       a  [        5       (       d  [        S5      eSS KnUS   R                  u  pU
S-   nU	U R                  UU R                  R                  U R                  -  4n[        U R                  5       Vs/ s H$  oR                  U5      UR                  U5      4PM&     snUS'   US   US'   U R                  (       a6  US   R                  nWR!                  US   UR#                  W	WUS9/S	S
9US'   U$ s  snf )N)rc   rd   re   rf   rQ   zACannot generate dummy past_keys inputs without PyTorch installed.r      past_key_valuesrW   )dtyperT   )dim)r+   r   generate_dummy_inputsr   rI   r   
ValueErrortorchshaper   rM   r   ranger^   zerosrj   catones)r-   rb   rc   rd   re   rf   rY   ordered_inputsrn   rR   seqlenpast_key_values_length
past_shape_
mask_dtyper/   s                  r0   rl   $GPTJOnnxConfig.generate_dummy_inputs   s^    0$M`i N 

 %k=3M%NO ==%'' !dee -k : @ @)/!&,,*LL,,0H0HH	
 QVVZVeVePf5Pf1[[,ekk*.EFPf501 ,99I+J'(=='(89??J/4yy 015::eE[cm:3nouv 09 0N+, 5s   2+D4c                     g)N   r    r]   s    r0   default_onnx_opset!GPTJOnnxConfig.default_onnx_opset   s    r2   r    )defaultNF)r   FN)r9   r:   r;   r<   r   strr   r   r   boolr,   propertyr   intrU   r^   r   r	   r
   r   rl   r}   r@   rA   rB   s   @r0   rD   rD      s(    7;
* 
* 
* !l!34	
*
 
* 
* WS#X%6 67   $C $ $ #S # # *.*&* * 	*
 * J'* 
c	* *X C  r2   rD   N)r=   collectionsr   typingr   r   r   r    r	   r
   r   configuration_utilsr   onnxr   r   utilsr   
get_loggerr9   loggerr   rD   __all__r    r2   r0   <module>r      sa      # / / C C 3 4  
		H	%i
! i
ZN' Nb )
*r2   