o
    Zhb                     @   s<  d Z ddlZddlmZ ddlmZmZmZmZ ddl	Z	ddl
Z	ddl	mZ ddlmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZmZmZ ddlmZmZmZ ddlmZ e e!Z"eG dd deZ#G dd dej$Z%G dd dej$Z&G dd dej$Z'G dd dej$Z(G dd dej$Z)G dd dej$Z*G dd dej$Z+G dd  d ej$Z,G d!d" d"ej$Z-G d#d$ d$ej$Z.G d%d& d&ej$Z/G d'd( d(ej$Z0G d)d* d*ej$Z1eG d+d, d,eZ2eG d-d. d.e2Z3eG d/d0 d0e2Z4ed1d2G d3d4 d4e2Z5ed5d2G d6d7 d7e2Z6g d8Z7dS )9zPyTorch Bros model.    N)	dataclass)ListOptionalTupleUnion)nn)CrossEntropyLoss   )ACT2FN))BaseModelOutputWithPastAndCrossAttentions,BaseModelOutputWithPoolingAndCrossAttentionsTokenClassifierOutput)PreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)ModelOutputauto_docstringlogging   )
BrosConfigc                   @   st   e Zd ZU dZdZeej ed< dZ	eej ed< dZ
eej ed< dZeeej  ed< dZeeej  ed< dS )BrosSpadeOutputa  
    Base class for outputs of token classification models.

    Args:
        loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided) :
            Classification loss.
        initial_token_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.num_labels)`):
            Classification scores for entity initial tokens (before SoftMax).
        subsequent_token_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, sequence_length+1)`):
            Classification scores for entity sequence tokens (before SoftMax).
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    Nlossinitial_token_logitssubsequent_token_logitshidden_states
attentions)__name__
__module____qualname____doc__r   r   torchFloatTensor__annotations__r   r   r   r   r    r$   r$   U/var/www/auris/lib/python3.10/site-packages/transformers/models/bros/modeling_bros.pyr   )   s   
 r   c                       2   e Zd Z fddZdejdejfddZ  ZS )BrosPositionalEmbedding1Dc                    sD   t t|   |j| _ddtd| jd| j   }| d| d S )Nr   i'          g       @inv_freq)superr'   __init__dim_bbox_sinusoid_emb_1dr!   arangeregister_buffer)selfconfigr)   	__class__r$   r%   r+   L   s   z"BrosPositionalEmbedding1D.__init__pos_seqreturnc                 C   sX   |  }|\}}}||||d| jddd| jd  }tj| | gdd}|S )Nr      dim)sizeviewr)   r,   r!   catsincos)r/   r3   Zseq_sizeb1b2Zb3Zsinusoid_inpZpos_embr$   r$   r%   forwardV   s
   
(z!BrosPositionalEmbedding1D.forwardr   r   r   r+   r!   Tensorr@   __classcell__r$   r$   r1   r%   r'   I   s    
r'   c                       r&   )BrosPositionalEmbedding2Dc                    s.   t t|   |j| _t|| _t|| _d S N)r*   rD   r+   dim_bboxr'   	x_pos_emb	y_pos_embr/   r0   r1   r$   r%   r+   _   s   
z"BrosPositionalEmbedding2D.__init__bboxr4   c                 C   sd   g }t | jD ]!}|d dkr|| |d|f  q|| |d|f  qtj|dd}|S )Nr5   r   .r6   r7   )rangerF   appendrG   rH   r!   r;   )r/   rJ   stackibbox_pos_embr$   r$   r%   r@   f   s   z!BrosPositionalEmbedding2D.forwardrA   r$   r$   r1   r%   rD   ^   s    rD   c                       s,   e Zd Z fddZdejfddZ  ZS )BrosBboxEmbeddingsc                    s2   t t|   t|| _tj|j|jdd| _	d S )NF)bias)
r*   rP   r+   rD   bbox_sinusoid_embr   LinearZdim_bbox_sinusoid_emb_2dZdim_bbox_projectionbbox_projectionrI   r1   r$   r%   r+   r   s   
zBrosBboxEmbeddings.__init__rJ   c                 C   s\   | dd}|d d d d d d d f |d d d d d d d f  }| |}| |}|S )Nr   r   )	transposerR   rT   )r/   rJ   Zbbox_tZbbox_posrO   r$   r$   r%   r@   w   s
   8

zBrosBboxEmbeddings.forwardrA   r$   r$   r1   r%   rP   q   s    rP   c                       sh   e Zd ZdZ fddZ					ddeej deej deej d	eej d
edejfddZ	  Z
S )BrosTextEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                    s   t    tj|j|j|jd| _t|j|j| _	t|j
|j| _tj|j|jd| _t|j| _t|dd| _| dt|jd | jdtj| j tj| jjdd	d
 d S )N)padding_idxZepsposition_embedding_typeabsoluteposition_ids)r   r6   token_type_idsdtypedeviceF)
persistent)r*   r+   r   	EmbeddingZ
vocab_sizehidden_sizeZpad_token_idword_embeddingsmax_position_embeddingsposition_embeddingsZtype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutgetattrrY   r.   r!   r-   expandzerosr[   r9   longr_   rI   r1   r$   r%   r+      s"   

zBrosTextEmbeddings.__init__Nr   	input_idsr\   r[   inputs_embedspast_key_values_lengthr4   c                 C   s   |d ur	|  }n|  d d }|d }|d u r&| jd d ||| f }|d u rPt| drE| jd d d |f }||d |}	|	}ntj|tj| jjd}|d u rY| 	|}| 
|}
||
 }| jdkrp| |}||7 }| |}| |}|S )Nr6   r   r\   r   r]   rZ   )r9   r[   hasattrr\   rm   r!   rn   ro   r_   rc   rf   rY   re   rg   rk   )r/   rp   r\   r[   rq   rr   input_shape
seq_lengthbuffered_token_type_ids buffered_token_type_ids_expandedrf   
embeddingsre   r$   r$   r%   r@      s,   







zBrosTextEmbeddings.forward)NNNNr   )r   r   r   r    r+   r   r!   rB   intr@   rC   r$   r$   r1   r%   rV      s*    rV   c                       s   e Zd Z fddZdejfddZ						ddejd	ejd
eej deej deej deej deeeej	   deej deej fddZ
  ZS )BrosSelfAttentionc                    s   t    |j|j dkrt|dstd|j d|j d|j| _t|j|j | _| j| j | _t	
|j| j| _t	
|j| j| _t	
|j| j| _t	|j| _t|dd| _| jdksf| jd	krw|j| _t	d
|j d | j| _|j| _d S )Nr   Zembedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()rY   rZ   relative_keyrelative_key_queryr5   r   )r*   r+   rb   num_attention_headsrs   
ValueErrorry   attention_head_sizeall_head_sizer   rS   querykeyvalueri   Zattention_probs_dropout_probrk   rl   rY   rd   ra   distance_embedding
is_decoderrI   r1   r$   r%   r+      s&   

zBrosSelfAttention.__init__xc                 C   s6   |  d d | j| jf }|j| }|ddddS )Nr6   r   r5   r   r	   )r9   r~   r   r:   permute)r/   r   Znew_x_shaper$   r$   r%   transpose_for_scores   s   
z&BrosSelfAttention.transpose_for_scoresNFr   rO   attention_mask	head_maskencoder_hidden_statesencoder_attention_maskpast_key_valueoutput_attentionsr4   c	                 C   s  |  |}	|d u}
|
r|d ur|d }|d }|}nP|
r/| | |}| | |}|}n;|d urZ| | |}| | |}tj|d |gdd}tj|d |gdd}n| | |}| | |}| |	}| jrv||f}t||dd}| j	dks| j	dkr|
 d }tj|tj|jd	dd}tj|tj|jd	dd}|| }| || j d }|j|jd
}| j	dkrtd||}|| }n| j	dkrtd||}td||}|| | }|j\}}}}|||||}|g d}td||f}|| }|t| j }|d ur|| }tjdd|}| |}|d ur4|| }t||}|dddd }|
 d d | jf }|j| }|r\||fn|f}| jrh||f }|S )Nr   r   r5   r7   r6   r|   r}   r]   )r^   zbhld,lrd->bhlrzbhrd,lrd->bhlr)r5   r   r   r	   zbnid,bijd->bnijr	   )r   r   r   r   r!   r;   r   matmulrU   rY   r9   r-   ro   r_   r:   r   rd   tor^   Zeinsumshaper   mathsqrtr   r   ZSoftmaxrk   
contiguousr   )r/   r   rO   r   r   r   r   r   r   Zmixed_query_layerZis_cross_attention	key_layerZvalue_layerquery_layerZattention_scoresru   Zposition_ids_lZposition_ids_rZdistanceZpositional_embeddingZrelative_position_scoresZrelative_position_scores_queryZrelative_position_scores_key
batch_sizeZn_headZd_headZbbox_pos_scoresZattention_probsZcontext_layerZnew_context_layer_shapeoutputsr$   r$   r%   r@      sn   









zBrosSelfAttention.forwardNNNNNF)r   r   r   r+   r!   rB   r   r   r   r"   r@   rC   r$   r$   r1   r%   rz      s8    	
rz   c                       8   e Zd Z fddZdejdejdejfddZ  ZS )BrosSelfOutputc                    sB   t    t|j|j| _tj|j|jd| _t|j	| _
d S NrX   )r*   r+   r   rS   rb   denserg   rh   ri   rj   rk   rI   r1   r$   r%   r+   N     
zBrosSelfOutput.__init__r   input_tensorr4   c                 C   &   |  |}| |}| || }|S rE   r   rk   rg   r/   r   r   r$   r$   r%   r@   T     

zBrosSelfOutput.forwardrA   r$   r$   r1   r%   r   M      $r   c                       s   e Zd Z fddZdd Z						ddejdejd	eej d
eej deej deej deeeej	   dee
 deej fddZ  ZS )BrosAttentionc                    s*   t    t|| _t|| _t | _d S rE   )r*   r+   rz   r/   r   outputsetpruned_headsrI   r1   r$   r%   r+   \  s   


zBrosAttention.__init__c                 C   s   t |dkrd S t|| jj| jj| j\}}t| jj|| j_t| jj|| j_t| jj	|| j_	t| j
j|dd| j
_| jjt | | j_| jj| jj | j_| j|| _d S )Nr   r   r7   )lenr   r/   r~   r   r   r   r   r   r   r   r   r   union)r/   headsindexr$   r$   r%   prune_headsb  s   zBrosAttention.prune_headsNFr   rO   r   r   r   r   r   r   r4   c	              
   C   s@   | j ||||||||d}	| |	d |}
|
f|	dd   }|S )Nr   rO   r   r   r   r   r   r   r   r   )r/   r   )r/   r   rO   r   r   r   r   r   r   Zself_outputsattention_outputr   r$   r$   r%   r@   w  s   
zBrosAttention.forwardr   )r   r   r   r+   r   r!   rB   r   r   r"   boolr@   rC   r$   r$   r1   r%   r   [  s8    	
r   c                       r&   )BrosIntermediatec                    sD   t    t|j|j| _t|jt	rt
|j | _d S |j| _d S rE   )r*   r+   r   rS   rb   intermediate_sizer   
isinstanceZ
hidden_actstrr
   intermediate_act_fnrI   r1   r$   r%   r+     s
   
zBrosIntermediate.__init__r   r4   c                 C   s   |  |}| |}|S rE   )r   r   )r/   r   r$   r$   r%   r@     s   

zBrosIntermediate.forwardrA   r$   r$   r1   r%   r     s    r   c                       r   )
BrosOutputc                    sB   t    t|j|j| _tj|j|jd| _t	|j
| _d S r   )r*   r+   r   rS   r   rb   r   rg   rh   ri   rj   rk   rI   r1   r$   r%   r+     r   zBrosOutput.__init__r   r   r4   c                 C   r   rE   r   r   r$   r$   r%   r@     r   zBrosOutput.forwardrA   r$   r$   r1   r%   r     r   r   c                       s   e Zd Z fddZ						ddejdejdeej deej d	eej d
eej deeeej   dee	 deej fddZ
dd Z  ZS )	BrosLayerc                    sn   t    |j| _d| _t|| _|j| _|j| _| jr+| js&t|  dt|| _	t
|| _t|| _d S )Nr   z> should be used as a decoder model if cross attention is added)r*   r+   chunk_size_feed_forwardseq_len_dimr   	attentionr   add_cross_attention	Exceptioncrossattentionr   intermediater   r   rI   r1   r$   r%   r+     s   



zBrosLayer.__init__NFr   rO   r   r   r   r   r   r   r4   c	              	   C   s  |d ur
|d d nd }	| j ||||||	d}
|
d }| jr)|
dd }|
d }n|
dd  }d }| jrp|d urpt| drEtd|  d|d urO|d	d  nd }| |||||||}|d }||dd  }|d }|| }t| j| j| j|}|f| }| jr||f }|S )
Nr5   )rO   r   r   r   r   r   r   r6   r   z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r   )	r   r   rs   r   r   r   feed_forward_chunkr   r   )r/   r   rO   r   r   r   r   r   r   Zself_attn_past_key_valueZself_attention_outputsr   r   Zpresent_key_valueZcross_attn_present_key_valueZcross_attn_past_key_valueZcross_attention_outputslayer_outputr$   r$   r%   r@     sX   


	

zBrosLayer.forwardc                 C   s   |  |}| ||}|S rE   )r   r   )r/   r   Zintermediate_outputr   r$   r$   r%   r     s   
zBrosLayer.feed_forward_chunkr   )r   r   r   r+   r!   rB   r   r"   r   r   r@   r   rC   r$   r$   r1   r%   r     s8    	

Er   c                       s   e Zd Z fddZ									ddejdejdeej d	eej d
eej deej deeeej   dee	 dee	 dee	 dee	 de
eej ef fddZ  ZS )BrosEncoderc                    s4   t     | _t fddt jD | _d S )Nc                    s   g | ]}t  qS r$   )r   ).0_r0   r$   r%   
<listcomp>  s    z(BrosEncoder.__init__.<locals>.<listcomp>)r*   r+   r0   r   Z
ModuleListrK   num_hidden_layerslayerrI   r1   r   r%   r+   
  s   
$zBrosEncoder.__init__NFTr   rO   r   r   r   r   past_key_values	use_cacher   output_hidden_statesreturn_dictr4   c                 C   s^  |
rdnd }|	r
dnd }|	r| j jrdnd }|rdnd }t| jD ]m\}}|
r,||f }|d ur4|| nd }|d ur>|| nd }t| j ddra| jra|rStd d}| |j	|||||||	}n|||||||||	d}|d }|rz||d f7 }|	r||d f }| j jr||d	 f }q!|
r||f }|st
d
d |||||fD S t|||||dS )Nr$   Zgradient_checkpointingFzh`use_cache=True` is incompatible with `config.gradient_checkpointing=True`. Setting `use_cache=False`...r   r   r6   r   r5   c                 s   s    | ]	}|d ur|V  qd S rE   r$   )r   vr$   r$   r%   	<genexpr>R  s    z&BrosEncoder.forward.<locals>.<genexpr>)last_hidden_stater   r   r   cross_attentions)r0   r   	enumerater   rl   ZtrainingloggerwarningZ_gradient_checkpointing_func__call__tupler   )r/   r   rO   r   r   r   r   r   r   r   r   r   Zall_hidden_statesZall_self_attentionsZall_cross_attentionsZnext_decoder_cacherN   Zlayer_moduleZlayer_head_maskr   Zlayer_outputsr$   r$   r%   r@     sz   

zBrosEncoder.forward)	NNNNNNFFT)r   r   r   r+   r!   rB   r   r"   r   r   r   r   r@   rC   r$   r$   r1   r%   r   	  sH    		
r   c                       r&   )
BrosPoolerc                    s*   t    t|j|j| _t | _d S rE   )r*   r+   r   rS   rb   r   ZTanh
activationrI   r1   r$   r%   r+   h  s   
zBrosPooler.__init__r   r4   c                 C   s(   |d d df }|  |}| |}|S )Nr   )r   r   )r/   r   Zfirst_token_tensorpooled_outputr$   r$   r%   r@   m  s   

zBrosPooler.forwardrA   r$   r$   r1   r%   r   g  s    r   c                       r&   )BrosRelationExtractorc                    s   t    |j| _|j| _|j| _|j| _t| j| _	t
| j| j| j | _t
| j| j| j | _ttd| j| _d S )Nr   )r*   r+   n_relationsrb   backbone_hidden_sizehead_hidden_sizeZclassifier_dropout_probr   ri   droprS   r   r   	Parameterr!   rn   
dummy_noderI   r1   r$   r%   r+   w  s   
zBrosRelationExtractor.__init__r   r   c              	   C   s   |  | |}| jdd|dd}tj||gdd}| | |}|	|d|d| j
| j}|	|d|d| j
| j}t|dddd|dddd}|S )Nr   r   Zaxisr5   r	   )r   r   r   Z	unsqueezerepeatr9   r!   r;   r   r:   r   r   r   r   )r/   r   r   Z	dummy_vecZrelation_scorer$   r$   r%   r@     s    zBrosRelationExtractor.forwardrA   r$   r$   r1   r%   r   v  s    r   c                   @   s   e Zd ZeZdZdd ZdS )BrosPreTrainedModelbrosc                 C   s   t |tjr |jjjd| jjd |jdur|jj	  dS dS t |tj
rC|jjjd| jjd |jdurA|jj|j 	  dS dS t |tjrX|jj	  |jjd dS dS )zInitialize the weightsr(   )meanZstdNg      ?)r   r   rS   weightdataZnormal_r0   Zinitializer_rangerQ   Zzero_ra   rW   rg   Zfill_)r/   moduler$   r$   r%   _init_weights  s   

z!BrosPreTrainedModel._init_weightsN)r   r   r   r   Zconfig_classZbase_model_prefixr   r$   r$   r$   r%   r     s    r   c                "       s   e Zd Zd fdd	Zdd Zdd Zdd	 Ze	
	
	
	
	
	
	
	
	
	
	
	
	
	
ddee	j
 dee	j
 dee	j
 dee	j
 dee	j
 dee	j
 dee	j
 dee	j
 dee	j
 deee	j  dee dee dee dee deee	j
 ef fddZ  ZS )	BrosModelTc                    sN   t  | || _t|| _t|| _t|| _|rt	|nd| _
|   dS )zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)r*   r+   r0   rV   rx   rP   bbox_embeddingsr   encoderr   poolerinit_weights)r/   r0   Zadd_pooling_layerr1   r$   r%   r+     s   


zBrosModel.__init__c                 C   s   | j jS rE   rx   rc   )r/   r$   r$   r%   get_input_embeddings  s   zBrosModel.get_input_embeddingsc                 C   s   || j _d S rE   r   )r/   r   r$   r$   r%   set_input_embeddings  s   zBrosModel.set_input_embeddingsc                 C   s*   |  D ]\}}| jj| j| qdS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr   r   r   r   )r/   Zheads_to_pruner   r   r$   r$   r%   _prune_heads  s   zBrosModel._prune_headsNrp   rJ   r   r\   r[   r   rq   r   r   r   r   r   r   r   r4   c           "      C   s  |dur|n| j j}|dur|n| j j}|dur|n| j j}| j jr-|dur(|n| j j}nd}|dur;|dur;td|durD| }n|durQ| dd }ntd|du r]td|\}}|durh|jn|j}|
durx|
d d j	d nd}|du rt
j||d	}|du rt| jd
r| jjddd|f }|||}|}n	t
j|t
j|d}| |||}| j jr|dur| \}}}||f}|	du rt
j||d	}	| |	}nd}| || j j}| j|||||d}|j	d dkr|ddddg df }|| j j }| |}| j|||||||
||||d}|d } | jdur+| | nd}!|s:| |!f|dd  S t| |!|j|j|j|jdS )a  
        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosModel

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosModel.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        >>> last_hidden_states = outputs.last_hidden_state
        ```NFzDYou cannot specify both input_ids and inputs_embeds at the same timer6   z5You have to specify either input_ids or inputs_embedszYou have to specify bboxr   r5   )r_   r\   r]   )rp   r[   r\   rq   rr      )r   r   r5   r   r5   r	   r   r	   )
rO   r   r   r   r   r   r   r   r   r   r   )r   Zpooler_outputr   r   r   r   )r0   r   r   use_return_dictr   r   r   r9   r_   r   r!   Zonesrs   rx   r\   rm   rn   ro   Zget_extended_attention_maskZinvert_attention_maskZget_head_maskr   Z
bbox_scaler   r   r   r   r   r   r   r   )"r/   rp   rJ   r   r\   r[   r   rq   r   r   r   r   r   r   r   rt   r   ru   r_   rr   rv   rw   Zextended_attention_maskZencoder_batch_sizeZencoder_sequence_lengthr   Zencoder_hidden_shapeZencoder_extended_attention_maskZembedding_outputZscaled_bboxZbbox_position_embeddingsZencoder_outputssequence_outputr   r$   r$   r%   r@     s   )
	
zBrosModel.forward)T)NNNNNNNNNNNNNN)r   r   r   r+   r   r   r   r   r   r!   rB   r   r"   r   r   r   r   r@   rC   r$   r$   r1   r%   r     sf    	
r   c                          e Zd ZdgZ fddZe												ddeej deej deej deej d	eej d
eej deej deej deej dee	 dee	 dee	 de
eej ef fddZ  ZS )BrosForTokenClassificationr   c                    s^   t  | |j| _t|| _t|dr|jn|j}t	|| _
t|j|j| _|   d S Nclassifier_dropout)r*   r+   
num_labelsr   r   rs   r   rj   r   ri   rk   rS   rb   
classifierr   r/   r0   r   r1   r$   r%   r+   d  s   
z#BrosForTokenClassification.__init__Nrp   rJ   r   bbox_first_token_maskr\   r[   r   rq   labelsr   r   r   r4   c                 C   s   |dur|n| j j}| j||||||||
||d
}|d }| |}| |}d}|	durXt }|durK|d}||d| j| |	d| }n||d| j|	d}|sn|f|dd  }|durl|f| S |S t|||j	|j
dS )a  
        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.
        bbox_first_token_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to indicate the first token of each bounding box. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```N)	rJ   r   r\   r[   r   rq   r   r   r   r   r6   r5   r   logitsr   r   )r0   r   r   rk   r   r   r:   r   r   r   r   )r/   rp   rJ   r   r   r\   r[   r   rq   r   r   r   r   r   r   r  r   loss_fctr   r$   r$   r%   r@   q  sD   ,


z"BrosForTokenClassification.forwardNNNNNNNNNNNNr   r   r   "_keys_to_ignore_on_load_unexpectedr+   r   r   r!   rB   r   r   r   r   r@   rC   r$   r$   r1   r%   r   `  sV    	
r   a  
    Bros Model with a token classification head on top (initial_token_layers and subsequent_token_layer on top of the
    hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. The initial_token_classifier is used to
    predict the first token of each entity, and the subsequent_token_classifier is used to predict the subsequent
    tokens within an entity. Compared to BrosForTokenClassification, this model is more robust to serialization errors
    since it predicts next token from one token.
    )Zcustom_introc                        s   e Zd ZdgZ fddZe													ddeej deej deej deej d	eej d
eej deej deej deej deej dee	 dee	 dee	 de
eej ef fddZ  ZS )!BrosSpadeEEForTokenClassificationr   c              	      s   t  | || _|j| _|j| _|j| _t|| _t	|dr"|j
n|j}tt|t|j|jt|t|j|j| _t|| _|   d S r   )r*   r+   r0   r   r   rb   r   r   r   rs   r   rj   r   Z
Sequentialri   rS   initial_token_classifierr   subsequent_token_classifierr   r   r1   r$   r%   r+     s    

z*BrosSpadeEEForTokenClassification.__init__Nrp   rJ   r   r   r\   r[   r   rq   initial_token_labelssubsequent_token_labelsr   r   r   r4   c                 C   s  |dur|n| j j}| j||||||||||d
}|d }|dd }| |dd }| ||d}d| }|j\}}|j	}t
j|t
|dg|gdd }||dddddf t
|jj}t
||d j|t
jd}||dddddf t
|jj}|d }d}|	dur|
durt }|	d}	|dur|d}||d| j| |	| }n
||d| j|	}|
d}
||d|d | |
| }|| }|s||f|dd  }|dur|f| S |S t||||j|jd	S )
a>  
        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.
        bbox_first_token_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to indicate the first token of each bounding box. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.
        initial_token_labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for the initial token classification.
        subsequent_token_labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for the subsequent token classification.

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosSpadeEEForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosSpadeEEForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```N
rp   rJ   r   r\   r[   r   rq   r   r   r   r   r   r   r_   r^   r6   r5   )r   r   r   r   r   )r0   r   r   rU   r   r  r  squeezer   r_   r!   r;   rn   r   r   masked_fillfinfor^   mineyer:   r   r   r   r   r   )r/   rp   rJ   r   r   r\   r[   r   rq   r	  r
  r   r   r   r   last_hidden_statesr   r   Zinv_attention_maskr   max_seq_lengthr_   Zinvalid_token_maskself_token_maskZsubsequent_token_maskr   r  Zinitial_token_lossZsubsequent_token_lossr   r$   r$   r%   r@     sp   1
&  


z)BrosSpadeEEForTokenClassification.forward)NNNNNNNNNNNNN)r   r   r   r  r+   r   r   r!   rB   r   r   r   r   r@   rC   r$   r$   r1   r%   r    s\    
	
r  z
    Bros Model with a token classification head on top (a entity_linker layer on top of the hidden-states output) e.g.
    for Entity-Linking. The entity_linker is used to predict intra-entity links (one entity to another entity).
    c                       r   )!BrosSpadeELForTokenClassificationr   c                    sx   t  | || _|j| _|j| _|j| _t|| _t	|dr"|j
n|j t|| _|   d S  t|| _|   d S r   )r*   r+   r0   r   r   rb   r   r   r   rs   r   rj   r   entity_linkerr   rI   r1   r$   r%   r+   m  s   


z*BrosSpadeELForTokenClassification.__init__Nrp   rJ   r   r   r\   r[   r   rq   r   r   r   r   r4   c                 C   s~  |dur|n| j j}| j||||||||
||d
}|d }|dd }| ||d}d}|	durt }|j\}}|j	}t
||d j|t
jd}|d}t
j| t
j|dgt
j|dgdd}||dddddf t
|jj}||dddddf t
|jj}||d|d | |	d| }|s|f|d	d  }|dur|f| S |S t|||j|jd
S )a  
        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.
        bbox_first_token_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to indicate the first token of each bounding box. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosSpadeELForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosSpadeELForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```Nr  r   r   r  r6   r]   r   r5   r   )r0   r   r   rU   r   r  r  r   r   r_   r!   r  r   r   r:   r;   rn   r  r  r^   r  r   r   r   )r/   rp   rJ   r   r   r\   r[   r   rq   r   r   r   r   r   r  r  r   r  r   r  r_   r  maskr   r$   r$   r%   r@   {  sR   +

(($z)BrosSpadeELForTokenClassification.forwardr  r  r$   r$   r1   r%   r  d  sV    	
r  )r   r   r   r  r  )8r    r   dataclassesr   typingr   r   r   r   r!   Ztorch.utils.checkpointr   Ztorch.nnr   Zactivationsr
   Zmodeling_outputsr   r   r   Zmodeling_utilsr   Zpytorch_utilsr   r   r   utilsr   r   r   Zconfiguration_brosr   Z
get_loggerr   r   r   Moduler'   rD   rP   rV   rz   r   r   r   r   r   r   r   r   r   r   r   r  r  __all__r$   r$   r$   r%   <module>   s`   
B 7Z^" 2g	 r