
    eThJ                       S SK Jr  S SKrS SKJr  S SKJrJrJr  S SK	r
SSKJr  \ " S S\5      5       r\ " S	 S
\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S \5      5       r\ " S! S"\5      5       r\ " S# S$\5      5       r\ " S% S&\5      5       r\ " S' S(\5      5       r\ " S) S*\5      5       r\ " S+ S,\5      5       r\ " S- S.\5      5       r \ " S/ S0\5      5       r!\ " S1 S2\5      5       r"\ " S3 S4\5      5       r#\ " S5 S6\5      5       r$\ " S7 S8\5      5       r%\ " S9 S:\5      5       r&\ " S; S<\5      5       r'g)=    )annotationsN)	dataclass)ListOptionalTuple   )ModelOutputc                  D    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'   S	r	g)
TFBaseModelOutput   a1  
Base class for model's outputs, with potential hidden states and attentions.

Args:
    last_hidden_state (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
        Sequence of hidden-states at the output of the last layer of the model.
    hidden_states (`tuple(tf.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
NOptional[tf.Tensor]last_hidden_stateTuple[tf.Tensor] | Nonehidden_states
attentions )
__name__
__module____qualname____firstlineno____doc__r   __annotations__r   r   __static_attributes__r       X/var/www/auris/envauris/lib/python3.13/site-packages/transformers/modeling_tf_outputs.pyr   r      s+    & .2*1-1M*1*.J'.r   r   c                  6    \ rS rSr% SrSrS\S'   SrS\S'   Srg)	 TFBaseModelOutputWithNoAttention4   a  
Base class for model's outputs, with potential hidden states.

Args:
    last_hidden_state (`tf.Tensor` shape `(batch_size, num_channels, height, width)`):
        Sequence of hidden-states at the output of the last layer of the model.
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings, if the model has an embedding layer, + one for
        the output of each layer) of shape `(batch_size, num_channels, height, width)`.

        Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
Nr   r   Optional[Tuple[tf.Tensor, ...]]r   r   )	r   r   r   r   r   r   r   r   r   r   r   r   r   r   4   s      .2*159M29r   r   c                  R    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'   Sr	S\S	'   S
r
g)TFBaseModelOutputWithPoolingG   am  
Base class for model's outputs that also contains a pooling of the last hidden states.

Args:
    last_hidden_state (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
        Sequence of hidden-states at the output of the last layer of the model.
    pooler_output (`tf.Tensor` of shape `(batch_size, hidden_size)`):
        Last layer hidden-state of the first token of the sequence (classification token) further processed by a
        Linear layer and a Tanh activation function. The Linear layer weights are trained from the next sentence
        prediction (classification) objective during pretraining.

        This output is usually *not* a good summary of the semantic content of the input, you're often better with
        averaging or pooling the sequence of hidden-states for the whole input sequence.
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
Nr   r   pooler_outputr   r   r   r   )r   r   r   r   r   r   r   r#   r   r   r   r   r   r   r!   r!   G   s6    4 .2*1)-M&--1M*1*.J'.r   r!   c                  D    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'   S	r	g)
*TFBaseModelOutputWithPoolingAndNoAttentioni   ap  
Base class for model's outputs that also contains a pooling of the last hidden states.

Args:
    last_hidden_state (`tf.Tensor` of shape `(batch_size, num_channels, height, width)`):
        Sequence of hidden-states at the output of the last layer of the model.
    pooler_output (`tf.Tensor` of shape `(batch_size, hidden_size)`):
        Last layer hidden-state after a pooling operation on the spatial dimensions.
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings, if the model has an embedding layer, + one for
        the output of each layer) of shape `(batch_size, num_channels, height, width)`.

        Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
Nr   r   r#   r   r   r   )
r   r   r   r   r   r   r   r#   r   r   r   r   r   r%   r%   i   s+     .2*1)-M&-59M29r   r%   c                  n    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'   Sr	S	\S
'   Sr
S	\S'   SrS	\S'   Srg).TFBaseModelOutputWithPoolingAndCrossAttentions   a	  
Base class for model's outputs that also contains a pooling of the last hidden states.

Args:
    last_hidden_state (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
        Sequence of hidden-states at the output of the last layer of the model.
    pooler_output (`tf.Tensor` of shape `(batch_size, hidden_size)`):
        Last layer hidden-state of the first token of the sequence (classification token) further processed by a
        Linear layer and a Tanh activation function. The Linear layer weights are trained from the next sentence
        prediction (classification) objective during pretraining.

        This output is usually *not* a good summary of the semantic content of the input, you're often better with
        averaging or pooling the sequence of hidden-states for the whole input sequence.
    past_key_values (`List[tf.Tensor]`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
        List of `tf.Tensor` of length `config.n_layers`, with each tensor of shape `(2, batch_size, num_heads,
        sequence_length, embed_size_per_head)`).

        Contains pre-computed hidden-states (key and values in the attention blocks) that can be used (see
        `past_key_values` input) to speed up sequential decoding.
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
    cross_attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights of the decoder's cross-attention layer, after the attention softmax, used to compute the
        weighted average in the cross-attention heads.
Nr   r   r#   List[tf.Tensor] | Nonepast_key_valuesr   r   r   cross_attentionsr   )r   r   r   r   r   r   r   r#   r+   r   r   r,   r   r   r   r   r(   r(      sN    $L .2*1)-M&-.2O+2-1M*1*.J'.04-4r   r(   c                  R    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S	'   Sr	S\S
'   Sr
g)TFBaseModelOutputWithPast   a  
Base class for model's outputs that may also contain a past key/values (to speed up sequential decoding).

Args:
    last_hidden_state (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
        Sequence of hidden-states at the output of the last layer of the model.

        If `past_key_values` is used only the last hidden-state of the sequences of shape `(batch_size, 1,
        hidden_size)` is output.
    past_key_values (`List[tf.Tensor]`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
        List of `tf.Tensor` of length `config.n_layers`, with each tensor of shape `(2, batch_size, num_heads,
        sequence_length, embed_size_per_head)`).

        Contains pre-computed hidden-states (key and values in the attention blocks) that can be used (see
        `past_key_values` input) to speed up sequential decoding.
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
Nr   r   r*   r+   r   r   r   r   )r   r   r   r   r   r   r   r+   r   r   r   r   r   r   r.   r.      s6    8 .2*1.2O+2-1M*1*.J'.r   r.   c                  R    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'   Sr	S\S	'   S
r
g)$TFBaseModelOutputWithCrossAttentions   a  
Base class for model's outputs, with potential hidden states and attentions.

Args:
    last_hidden_state (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
        Sequence of hidden-states at the output of the last layer of the model.
    hidden_states (`tuple(tf.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
    cross_attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights of the decoder's cross-attention layer, after the attention softmax, used to compute the
        weighted average in the cross-attention heads.
Nr   r   r   r   r   r,   r   )r   r   r   r   r   r   r   r   r   r,   r   r   r   r   r1   r1      s7    2 .2*1-1M*1*.J'.04-4r   r1   c                  `    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S	'   Sr	S\S
'   Sr
S\S'   Srg)+TFBaseModelOutputWithPastAndCrossAttentions   ae  
Base class for model's outputs that may also contain a past key/values (to speed up sequential decoding).

Args:
    last_hidden_state (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
        Sequence of hidden-states at the output of the last layer of the model.

        If `past_key_values` is used only the last hidden-state of the sequences of shape `(batch_size, 1,
        hidden_size)` is output.
    past_key_values (`List[tf.Tensor]`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
        List of `tf.Tensor` of length `config.n_layers`, with each tensor of shape `(2, batch_size, num_heads,
        sequence_length, embed_size_per_head)`).

        Contains pre-computed hidden-states (key and values in the attention blocks) that can be used (see
        `past_key_values` input) to speed up sequential decoding.
    hidden_states (`tuple(tf.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
    cross_attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights of the decoder's cross-attention layer, after the attention softmax, used to compute the
        weighted average in the cross-attention heads.
Nr   r   r*   r+   r   r   r   r,   r   )r   r   r   r   r   r   r   r+   r   r   r,   r   r   r   r   r4   r4      sC     D .2*1.2O+2-1M*1*.J'.04-4r   r4   c                      \ rS rSr% SrSrS\S'   SrS\S'   SrS\S	'   Sr	S\S
'   Sr
S\S'   SrS\S'   SrS\S'   SrS\S'   Srg)TFSeq2SeqModelOutputi  a  
Base class for model encoder's outputs that also contains : pre-computed hidden states that can speed up sequential
decoding.

Args:
    last_hidden_state (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
        Sequence of hidden-states at the output of the last layer of the decoder of the model.

        If `past_key_values` is used only the last hidden-state of the sequences of shape `(batch_size, 1,
        hidden_size)` is output.
    past_key_values (`List[tf.Tensor]`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
        List of `tf.Tensor` of length `config.n_layers`, with each tensor of shape `(2, batch_size, num_heads,
        sequence_length, embed_size_per_head)`).

        Contains pre-computed hidden-states (key and values in the attention blocks) of the decoder that can be
        used (see `past_key_values` input) to speed up sequential decoding.
    decoder_hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.
    decoder_attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the
        self-attention heads.
    cross_attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights of the decoder's cross-attention layer, after the attention softmax, used to compute the
        weighted average in the cross-attention heads.
    encoder_last_hidden_state (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
        Sequence of hidden-states at the output of the last layer of the encoder of the model.
    encoder_hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.
    encoder_attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the
        self-attention heads.
Nr   r   r*   r+   r   decoder_hidden_statesdecoder_attentionsr,   tf.Tensor | Noneencoder_last_hidden_stateencoder_hidden_statesencoder_attentionsr   )r   r   r   r   r   r   r   r+   r8   r9   r,   r;   r<   r=   r   r   r   r   r7   r7     si    .` .2*1.2O+2592926/604-426/6592926/6r   r7   c                  R    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S	'   Sr	S\S
'   Sr
g)TFCausalLMOutputi[  a  
Base class for causal language model (or autoregressive) outputs.

Args:
    loss (`tf.Tensor` of shape `(n,)`, *optional*, where n is the number of non-masked labels, returned when `labels` is provided):
        Language modeling loss (for next-token prediction).
    logits (`tf.Tensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
        Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
Nr:   lossr   logitsr   r   r   r   r   r   r   r   r   r@   r   rA   r   r   r   r   r   r   r?   r?   [  5    * "D
!"&F&-1M*1*.J'.r   r?   c                  `    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S	'   Sr	S
\S'   Sr
S
\S'   Srg)TFCausalLMOutputWithPastix  a  
Base class for causal language model (or autoregressive) outputs.

Args:
    loss (`tf.Tensor` of shape `(n,)`, *optional*, where n is the number of non-masked labels, returned when `labels` is provided):
        Language modeling loss (for next-token prediction).
    logits (`tf.Tensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
        Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
    past_key_values (`List[tf.Tensor]`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
        List of `tf.Tensor` of length `config.n_layers`, with each tensor of shape `(2, batch_size, num_heads,
        sequence_length, embed_size_per_head)`).

        Contains pre-computed hidden-states (key and values in the attention blocks) that can be used (see
        `past_key_values` input) to speed up sequential decoding.
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
Nr:   r@   r   rA   r*   r+   r   r   r   r   r   r   r   r   r   r@   r   rA   r+   r   r   r   r   r   r   rE   rE   x  @    6 "D
!"&F&.2O+2-1M*1*.J'.r   rE   c                  n    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S	'   Sr	S
\S'   Sr
S
\S'   SrS
\S'   Srg)#TFCausalLMOutputWithCrossAttentionsi  a  
Base class for causal language model (or autoregressive) outputs.

Args:
    loss (`tf.Tensor` of shape `(n,)`, *optional*, where n is the number of non-masked labels, returned when `labels` is provided):
        Language modeling loss (for next-token prediction).
    logits (`tf.Tensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
        Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
    cross_attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights of the decoder's cross-attention layer, after the attention softmax, used to compute the
        weighted average in the cross-attention heads.
    past_key_values (`List[tf.Tensor]`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
        List of `tf.Tensor` of length `config.n_layers`, with each tensor of shape `(2, batch_size, num_heads,
        sequence_length, embed_size_per_head)`).

        Contains pre-computed hidden-states (key and values in the attention blocks) that can be used (see
        `past_key_values` input) to speed up sequential decoding.
Nr:   r@   r   rA   r*   r+   r   r   r   r,   r   )r   r   r   r   r   r@   r   rA   r+   r   r   r,   r   r   r   r   rI   rI     sM    B "D
!"&F&.2O+2-1M*1*.J'.04-4r   rI   c                  R    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S	'   Sr	S\S
'   Sr
g)TFMaskedLMOutputi  a  
Base class for masked language models outputs.

Args:
    loss (`tf.Tensor` of shape `(n,)`, *optional*, where n is the number of non-masked labels, returned when `labels` is provided):
        Masked language modeling (MLM) loss.
    logits (`tf.Tensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
        Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
Nr:   r@   r   rA   r   r   r   r   rB   r   r   r   rK   rK     rC   r   rK   c                      \ rS rSr% SrSrS\S'   SrS\S'   SrS\S	'   Sr	S
\S'   Sr
S
\S'   SrS
\S'   SrS\S'   SrS
\S'   SrS
\S'   Srg)TFSeq2SeqLMOutputi  a  
Base class for sequence-to-sequence language models outputs.

Args:
    loss (`tf.Tensor` of shape `(n,)`, *optional*, where n is the number of non-masked labels, returned when `labels` is provided):
        Language modeling loss.
    logits (`tf.Tensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
        Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
    past_key_values (`List[tf.Tensor]`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
        List of `tf.Tensor` of length `config.n_layers`, with each tensor of shape `(2, batch_size, num_heads,
        sequence_length, embed_size_per_head)`).

        Contains pre-computed hidden-states (key and values in the attention blocks) of the decoder that can be
        used (see `past_key_values` input) to speed up sequential decoding.
    decoder_hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.
    decoder_attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the
        self-attention heads.
    cross_attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights of the decoder's cross-attention layer, after the attention softmax, used to compute the
        weighted average in the cross-attention heads.
    encoder_last_hidden_state (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
        Sequence of hidden-states at the output of the last layer of the encoder of the model.
    encoder_hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.
    encoder_attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the
        self-attention heads.
Nr:   r@   r   rA   r*   r+   r   r8   r9   r,   r;   r<   r=   r   r   r   r   r   r   r@   r   rA   r+   r8   r9   r,   r;   r<   r=   r   r   r   r   rM   rM     ss    ,\ "D
!"&F&.2O+2592926/604-426/6592926/6r   rM   c                  R    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S	'   Sr	S\S
'   Sr
g)TFNextSentencePredictorOutputi  a  
Base class for outputs of models predicting if two sentences are consecutive or not.

Args:
    loss (`tf.Tensor` of shape `(n,)`, *optional*, where n is the number of non-masked labels, returned when `next_sentence_label` is provided):
        Next sentence prediction loss.
    logits (`tf.Tensor` of shape `(batch_size, 2)`):
        Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation
        before SoftMax).
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
Nr:   r@   r   rA   r   r   r   r   rB   r   r   r   rP   rP     s5    , "D
!"&F&-1M*1*.J'.r   rP   c                  R    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S	'   Sr	S\S
'   Sr
g)TFSequenceClassifierOutputi=  a  
Base class for outputs of sentence classification models.

Args:
    loss (`tf.Tensor` of shape `(batch_size, )`, *optional*, returned when `labels` is provided):
        Classification (or regression if config.num_labels==1) loss.
    logits (`tf.Tensor` of shape `(batch_size, config.num_labels)`):
        Classification (or regression if config.num_labels==1) scores (before SoftMax).
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
Nr:   r@   r   rA   r   r   r   r   rB   r   r   r   rR   rR   =  rC   r   rR   c                      \ rS rSr% SrSrS\S'   SrS\S'   SrS\S	'   Sr	S
\S'   Sr
S
\S'   SrS
\S'   SrS\S'   SrS
\S'   SrS
\S'   Srg)!TFSeq2SeqSequenceClassifierOutputiZ  a  
Base class for outputs of sequence-to-sequence sentence classification models.

Args:
    loss (`tf.Tensor` of shape `(1,)`, *optional*, returned when `label` is provided):
        Classification (or regression if config.num_labels==1) loss.
    logits (`tf.Tensor` of shape `(batch_size, config.num_labels)`):
        Classification (or regression if config.num_labels==1) scores (before SoftMax).
    past_key_values (`List[tf.Tensor]`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
        List of `tf.Tensor` of length `config.n_layers`, with each tensor of shape `(2, batch_size, num_heads,
        sequence_length, embed_size_per_head)`).

        Contains pre-computed hidden-states (key and values in the attention blocks) of the decoder that can be
        used (see `past_key_values` input) to speed up sequential decoding.
    decoder_hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.
    decoder_attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the
        self-attention heads.
    cross_attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`
    encoder_last_hidden_state (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
        Sequence of hidden-states at the output of the last layer of the encoder of the model.
    encoder_hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.
    encoder_attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the
        self-attention heads.
Nr:   r@   r   rA   r*   r+   r   r8   r9   r,   r;   r<   r=   r   rN   r   r   r   rT   rT   Z  ss    )V "D
!"&F&.2O+2592926/604-426/6592926/6r   rT   c                  R    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S	'   Sr	S\S
'   Sr
g)TFSemanticSegmenterOutputi  a6  
Base class for outputs of semantic segmentation models.

Args:
    loss (`tf.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
        Classification (or regression if config.num_labels==1) loss.
    logits (`tf.Tensor` of shape `(batch_size, config.num_labels, logits_height, logits_width)`):
        Classification scores for each pixel.

        <Tip warning={true}>

        The logits returned do not necessarily have the same size as the `pixel_values` passed as inputs. This is
        to avoid doing two interpolations and lose some quality when a user needs to resize the logits to the
        original image size as post-processing. You should always check your logits shape and resize as needed.

        </Tip>

    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings, if the model has an embedding layer, + one for
        the output of each layer) of shape `(batch_size, patch_size, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, patch_size, sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
Nr:   r@   r   rA   r   r   r   r   rB   r   r   r   rV   rV     s5    : "D
!"&F&-1M*1*.J'.r   rV   c                  D    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S	'   S
r	g)(TFSemanticSegmenterOutputWithNoAttentioni  a  
Base class for outputs of semantic segmentation models that do not output attention scores.

Args:
    loss (`tf.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
        Classification (or regression if config.num_labels==1) loss.
    logits (`tf.Tensor` of shape `(batch_size, config.num_labels, logits_height, logits_width)`):
        Classification scores for each pixel.

        <Tip warning={true}>

        The logits returned do not necessarily have the same size as the `pixel_values` passed as inputs. This is
        to avoid doing two interpolations and lose some quality when a user needs to resize the logits to the
        original image size as post-processing. You should always check your logits shape and resize as needed.

        </Tip>

    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings, if the model has an embedding layer, + one for
        the output of each layer) of shape `(batch_size, patch_size, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
Nr:   r@   r   rA   r   r   r   
r   r   r   r   r   r@   r   rA   r   r   r   r   r   rX   rX     s*    0 "D
!"&F&-1M*1r   rX   c                  R    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S	'   Sr	S\S
'   Sr
g)TFImageClassifierOutputi  a  
Base class for outputs of image classification models.

Args:
    loss (`tf.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
        Classification (or regression if config.num_labels==1) loss.
    logits (`tf.Tensor` of shape `(batch_size, config.num_labels)`):
        Classification (or regression if config.num_labels==1) scores (before SoftMax).
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings, if the model has an embedding layer, + one for
        the output of each stage) of shape `(batch_size, sequence_length, hidden_size)`. Hidden-states (also called
        feature maps) of the model at the output of each stage.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, patch_size, sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
Nr:   r@   r   rA   r   r   r   r   rB   r   r   r   r[   r[     s5    & "D
!"&F&-1M*1*.J'.r   r[   c                  R    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S	'   Sr	S\S
'   Sr
g)TFMultipleChoiceModelOutputi  a  
Base class for outputs of multiple choice models.

Args:
    loss (`tf.Tensor` of shape *(batch_size, )*, *optional*, returned when `labels` is provided):
        Classification loss.
    logits (`tf.Tensor` of shape `(batch_size, num_choices)`):
        *num_choices* is the second dimension of the input tensors. (see *input_ids* above).

        Classification scores (before SoftMax).
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
Nr:   r@   r   rA   r   r   r   r   rB   r   r   r   r]   r]     s5    . "D
!"&F&-1M*1*.J'.r   r]   c                  R    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S	'   Sr	S\S
'   Sr
g)TFTokenClassifierOutputi  a  
Base class for outputs of token classification models.

Args:
    loss (`tf.Tensor` of shape `(n,)`, *optional*, where n is the number of unmasked labels, returned when `labels` is provided) :
        Classification loss.
    logits (`tf.Tensor` of shape `(batch_size, sequence_length, config.num_labels)`):
        Classification scores (before SoftMax).
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
Nr:   r@   r   rA   r   r   r   r   rB   r   r   r   r_   r_     rC   r   r_   c                  `    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'   Sr	S	\S
'   Sr
S	\S'   Srg)TFQuestionAnsweringModelOutputi-  a1  
Base class for outputs of question answering models.

Args:
    loss (`tf.Tensor` of shape `(batch_size, )`, *optional*, returned when `start_positions` and `end_positions` are provided):
        Total span extraction loss is the sum of a Cross-Entropy for the start and end positions.
    start_logits (`tf.Tensor` of shape `(batch_size, sequence_length)`):
        Span-start scores (before SoftMax).
    end_logits (`tf.Tensor` of shape `(batch_size, sequence_length)`):
        Span-end scores (before SoftMax).
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
Nr:   r@   r   start_logits
end_logitsr   r   r   r   )r   r   r   r   r   r@   r   rb   rc   r   r   r   r   r   r   ra   ra   -  s@    . "D
!(,L%,&*J#*-1M*1*.J'.r   ra   c                      \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'   Sr	S	\S
'   Sr
S\S'   SrS\S'   SrS\S'   SrS\S'   SrS\S'   Srg)%TFSeq2SeqQuestionAnsweringModelOutputiM  a8  
Base class for outputs of sequence-to-sequence question answering models.

Args:
    loss (`tf.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
        Total span extraction loss is the sum of a Cross-Entropy for the start and end positions.
    start_logits (`tf.Tensor` of shape `(batch_size, sequence_length)`):
        Span-start scores (before SoftMax).
    end_logits (`tf.Tensor` of shape `(batch_size, sequence_length)`):
        Span-end scores (before SoftMax).
    past_key_values (`List[tf.Tensor]`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
        List of `tf.Tensor` of length `config.n_layers`, with each tensor of shape `(2, batch_size, num_heads,
        sequence_length, embed_size_per_head)`).

        Contains pre-computed hidden-states (key and values in the attention blocks) of the decoder that can be
        used (see `past_key_values` input) to speed up sequential decoding.
    decoder_hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.
    decoder_attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the
        self-attention heads.
    encoder_last_hidden_state (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
        Sequence of hidden-states at the output of the last layer of the encoder of the model.
    encoder_hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.
    encoder_attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the
        self-attention heads.
Nr:   r@   r   rb   rc   r*   r+   r   r8   r9   r;   r<   r=   r   )r   r   r   r   r   r@   r   rb   rc   r+   r8   r9   r;   r<   r=   r   r   r   r   re   re   M  sr    (T "D
!(,L%,&*J#*.2O+2592926/626/6592926/6r   re   c                  `    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S	'   Sr	S
\S'   Sr
S
\S'   Srg)"TFSequenceClassifierOutputWithPasti  a  
Base class for outputs of sentence classification models.

Args:
    loss (`tf.Tensor` of shape `(batch_size, )`, *optional*, returned when `labels` is provided):
        Classification (or regression if config.num_labels==1) loss.
    logits (`tf.Tensor` of shape `(batch_size, config.num_labels)`):
        Classification (or regression if config.num_labels==1) scores (before SoftMax).
    past_key_values (`List[tf.Tensor]`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
        List of `tf.Tensor` of length `config.n_layers`, with each tensor of shape `(2, batch_size, num_heads,
        sequence_length, embed_size_per_head)`).

        Contains pre-computed hidden-states (key and values in the attention blocks) that can be used (see
        `past_key_values` input) to speed up sequential decoding.
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
Nr:   r@   r   rA   r*   r+   r   r   r   r   rF   r   r   r   rg   rg     rG   r   rg   c                  D    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S	'   S
r	g)&TFImageClassifierOutputWithNoAttentioni  a3  
Base class for outputs of image classification models.

Args:
    loss (`tf.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
        Classification (or regression if config.num_labels==1) loss.
    logits (`tf.Tensor` of shape `(batch_size, config.num_labels)`):
        Classification (or regression if config.num_labels==1) scores (before SoftMax).
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings, if the model has an embedding layer, + one for
        the output of each stage) of shape `(batch_size, num_channels, height, width)`. Hidden-states (also called
        feature maps) of the model at the output of each stage.
Nr:   r@   r   rA   r   r   r   rY   r   r   r   ri   ri     s*     "D
!"&F&59M29r   ri   c                  b    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S	'   Sr	S\S
'   \
S 5       rSrg)TFMaskedImageModelingOutputi  a  
Base class for outputs of masked image completion / in-painting models.

Args:
    loss (`tf.Tensor` of shape `(1,)`, *optional*, returned when `bool_masked_pos` is provided):
        Reconstruction loss.
    reconstruction (`tf.Tensor` of shape `(batch_size, num_channels, height, width)`):
       Reconstructed / completed images.
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when
    `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings, if the model has an embedding layer, + one for
        the output of each stage) of shape `(batch_size, sequence_length, hidden_size)`. Hidden-states (also called
        feature maps) of the model at the output of each stage.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when
    `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, patch_size, sequence_length)`.
        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
Nr:   r@   r   reconstructionr   r   r   c                P    [         R                  " S[        5        U R                  $ )Nzlogits attribute is deprecated and will be removed in version 5 of Transformers. Please use the reconstruction attribute to retrieve the final output instead.)warningswarnFutureWarningrl   )selfs    r   rA   "TFMaskedImageModelingOutput.logits  s%    ]	

 """r   r   )r   r   r   r   r   r@   r   rl   r   r   propertyrA   r   r   r   r   rk   rk     sG    ( "D
!*.N'.-1M*1*.J'.# #r   rk   )(
__future__r   rn   dataclassesr   typingr   r   r   
tensorflowtfutilsr	   r   r   r!   r%   r(   r.   r1   r4   r7   r?   rE   rI   rK   rM   rP   rR   rT   rV   rX   r[   r]   r_   ra   re   rg   ri   rk   r   r   r   <module>rz      s   #  ! ( (   / / /2 :{ : :$ /; / /B : : :* ,5[ ,5 ,5^  /  /  /F 5; 5 5@ '5+ '5 '5T 87; 87 87v /{ / /8  /{  /  /F '5+ '5 '5T /{ / /8 77 77 77t /K / /: / / /8 47 47 47n !/ !/ !/H 2{ 2 2< /k / /4 /+ / /< /k / /8 /[ / /> 37K 37 37l  /  /  /F :[ : :( !#+ !# !#r   