o
    ZŽhbØ  ã                   @   s<  d Z ddlZddlmZ ddlmZmZmZmZ ddl	Z	ddl
Z	ddl	mZ ddlmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZmZmZ ddlmZmZmZ ddlmZ e  e!¡Z"eG dd„ deƒƒZ#G dd„ dej$ƒZ%G dd„ dej$ƒZ&G dd„ dej$ƒZ'G dd„ dej$ƒZ(G dd„ dej$ƒZ)G dd„ dej$ƒZ*G dd„ dej$ƒZ+G dd „ d ej$ƒZ,G d!d"„ d"ej$ƒZ-G d#d$„ d$ej$ƒZ.G d%d&„ d&ej$ƒZ/G d'd(„ d(ej$ƒZ0G d)d*„ d*ej$ƒZ1eG d+d,„ d,eƒƒZ2eG d-d.„ d.e2ƒƒZ3eG d/d0„ d0e2ƒƒZ4ed1d2G d3d4„ d4e2ƒƒZ5ed5d2G d6d7„ d7e2ƒƒZ6g d8¢Z7dS )9zPyTorch Bros model.é    N)Ú	dataclass)ÚListÚOptionalÚTupleÚUnion)Únn)ÚCrossEntropyLossé   )ÚACT2FN)Ú)BaseModelOutputWithPastAndCrossAttentionsÚ,BaseModelOutputWithPoolingAndCrossAttentionsÚTokenClassifierOutput)ÚPreTrainedModel)Úapply_chunking_to_forwardÚ find_pruneable_heads_and_indicesÚprune_linear_layer)ÚModelOutputÚauto_docstringÚloggingé   )Ú
BrosConfigc                   @   st   e Zd ZU dZdZeej ed< dZ	eej ed< dZ
eej ed< dZeeej  ed< dZeeej  ed< dS )ÚBrosSpadeOutputað  
    Base class for outputs of token classification models.

    Args:
        loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided) :
            Classification loss.
        initial_token_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.num_labels)`):
            Classification scores for entity initial tokens (before SoftMax).
        subsequent_token_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, sequence_length+1)`):
            Classification scores for entity sequence tokens (before SoftMax).
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    NÚlossÚinitial_token_logitsÚsubsequent_token_logitsÚhidden_statesÚ
attentions)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   ÚtorchÚFloatTensorÚ__annotations__r   r   r   r   r   © r$   r$   úU/var/www/auris/lib/python3.10/site-packages/transformers/models/bros/modeling_bros.pyr   )   s   
 r   c                       ó2   e Zd Z‡ fdd„Zdejdejfdd„Z‡  ZS )ÚBrosPositionalEmbedding1Dc                    sD   t t| ƒ ¡  |j| _ddt d| jd¡| j   }|  d|¡ d S )Nr   i'  ç        g       @Úinv_freq)Úsuperr'   Ú__init__Údim_bbox_sinusoid_emb_1dr!   ÚarangeÚregister_buffer)ÚselfÚconfigr)   ©Ú	__class__r$   r%   r+   L   s   ÿz"BrosPositionalEmbedding1D.__init__Úpos_seqÚreturnc                 C   sX   |  ¡ }|\}}}| |||d¡| j ddd| jd ¡ }tj| ¡ | ¡ gdd}|S )Nr   é   éÿÿÿÿ©Údim)ÚsizeÚviewr)   r,   r!   ÚcatÚsinÚcos)r/   r3   Zseq_sizeÚb1Úb2Zb3Zsinusoid_inpZpos_embr$   r$   r%   ÚforwardV   s
   
(z!BrosPositionalEmbedding1D.forward©r   r   r   r+   r!   ÚTensorr@   Ú__classcell__r$   r$   r1   r%   r'   I   s    
r'   c                       r&   )ÚBrosPositionalEmbedding2Dc                    s.   t t| ƒ ¡  |j| _t|ƒ| _t|ƒ| _d S ©N)r*   rD   r+   Údim_bboxr'   Ú	x_pos_embÚ	y_pos_emb©r/   r0   r1   r$   r%   r+   _   s   
z"BrosPositionalEmbedding2D.__init__Úbboxr4   c                 C   sd   g }t | jƒD ]!}|d dkr| |  |d|f ¡¡ q| |  |d|f ¡¡ qtj|dd}|S )Nr5   r   .r6   r7   )ÚrangerF   ÚappendrG   rH   r!   r;   )r/   rJ   ÚstackÚiÚbbox_pos_embr$   r$   r%   r@   f   s   z!BrosPositionalEmbedding2D.forwardrA   r$   r$   r1   r%   rD   ^   s    rD   c                       s,   e Zd Z‡ fdd„Zdejfdd„Z‡  ZS )ÚBrosBboxEmbeddingsc                    s2   t t| ƒ ¡  t|ƒ| _tj|j|jdd| _	d S )NF)Úbias)
r*   rP   r+   rD   Úbbox_sinusoid_embr   ÚLinearZdim_bbox_sinusoid_emb_2dZdim_bbox_projectionÚbbox_projectionrI   r1   r$   r%   r+   r   s   
zBrosBboxEmbeddings.__init__rJ   c                 C   s\   |  dd¡}|d d d …d d …d d …f |d d …d d d …d d …f  }|  |¡}|  |¡}|S )Nr   r   )Ú	transposerR   rT   )r/   rJ   Zbbox_tZbbox_posrO   r$   r$   r%   r@   w   s
   8

zBrosBboxEmbeddings.forwardrA   r$   r$   r1   r%   rP   q   s    rP   c                       sh   e Zd ZdZ‡ fdd„Z					ddeej deej deej d	eej d
edejfdd„Z	‡  Z
S )ÚBrosTextEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                    s¼   t ƒ  ¡  tj|j|j|jd| _t |j|j¡| _	t |j
|j¡| _tj|j|jd| _t |j¡| _t|ddƒ| _|  dt |j¡ d¡¡ | jdtj| j ¡ tj| jjdd	d
 d S )N)Úpadding_idx©ZepsÚposition_embedding_typeÚabsoluteÚposition_ids)r   r6   Útoken_type_ids©ÚdtypeÚdeviceF)Ú
persistent)r*   r+   r   Ú	EmbeddingZ
vocab_sizeÚhidden_sizeZpad_token_idÚword_embeddingsÚmax_position_embeddingsÚposition_embeddingsZtype_vocab_sizeÚtoken_type_embeddingsÚ	LayerNormÚlayer_norm_epsÚDropoutÚhidden_dropout_probÚdropoutÚgetattrrY   r.   r!   r-   ÚexpandÚzerosr[   r9   Úlongr_   rI   r1   r$   r%   r+   ƒ   s"   
ý
ùzBrosTextEmbeddings.__init__Nr   Ú	input_idsr\   r[   Úinputs_embedsÚpast_key_values_lengthr4   c                 C   sø   |d ur	|  ¡ }n|  ¡ d d… }|d }|d u r&| jd d …||| …f }|d u rPt| dƒrE| jd d …d |…f }| |d |¡}	|	}ntj|tj| jjd}|d u rY|  	|¡}|  
|¡}
||
 }| jdkrp|  |¡}||7 }|  |¡}|  |¡}|S )Nr6   r   r\   r   r]   rZ   )r9   r[   Úhasattrr\   rm   r!   rn   ro   r_   rc   rf   rY   re   rg   rk   )r/   rp   r\   r[   rq   rr   Úinput_shapeÚ
seq_lengthÚbuffered_token_type_idsÚ buffered_token_type_ids_expandedrf   Ú
embeddingsre   r$   r$   r%   r@   ›   s,   







zBrosTextEmbeddings.forward)NNNNr   )r   r   r   r    r+   r   r!   rB   Úintr@   rC   r$   r$   r1   r%   rV   €   s*    úþýüûúùrV   c                       sž   e Zd Z‡ fdd„Zdejfdd„Z						ddejd	ejd
eej deej deej deej deeeej	   deej deej fdd„Z
‡  ZS )ÚBrosSelfAttentionc                    sú   t ƒ  ¡  |j|j dkrt|dƒstd|j› d|j› dƒ‚|j| _t|j|j ƒ| _| j| j | _t	 
|j| j¡| _t	 
|j| j¡| _t	 
|j| j¡| _t	 |j¡| _t|ddƒ| _| jdksf| jd	krw|j| _t	 d
|j d | j¡| _|j| _d S )Nr   Zembedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads (ú)rY   rZ   Úrelative_keyÚrelative_key_queryr5   r   )r*   r+   rb   Únum_attention_headsrs   Ú
ValueErrorry   Úattention_head_sizeÚall_head_sizer   rS   ÚqueryÚkeyÚvalueri   Zattention_probs_dropout_probrk   rl   rY   rd   ra   Údistance_embeddingÚ
is_decoderrI   r1   r$   r%   r+   Ã   s&   

ÿÿzBrosSelfAttention.__init__Úxc                 C   s6   |  ¡ d d… | j| jf }|j|Ž }| dddd¡S )Nr6   r   r5   r   r	   )r9   r~   r€   r:   Úpermute)r/   r‡   Znew_x_shaper$   r$   r%   Útranspose_for_scoresÛ   s   þ
z&BrosSelfAttention.transpose_for_scoresNFr   rO   Úattention_maskÚ	head_maskÚencoder_hidden_statesÚencoder_attention_maskÚpast_key_valueÚoutput_attentionsr4   c	                 C   sÔ  |   |¡}	|d u}
|
r|d ur|d }|d }|}nP|
r/|  |  |¡¡}|  |  |¡¡}|}n;|d urZ|  |  |¡¡}|  |  |¡¡}tj|d |gdd}tj|d |gdd}n|  |  |¡¡}|  |  |¡¡}|  |	¡}| jrv||f}t || dd¡¡}| j	dksŠ| j	dkrë| 
¡ d }tj|tj|jd	 dd¡}tj|tj|jd	 dd¡}|| }|  || j d ¡}|j|jd
}| j	dkrÒt d||¡}|| }n| j	dkrët d||¡}t d||¡}|| | }|j\}}}}| ||||¡}| g d¢¡}t d||f¡}|| }|t | j¡ }|d ur|| }tjdd|ƒ}|  |¡}|d ur4|| }t ||¡}| dddd¡ ¡ }| 
¡ d d… | jf }|j|Ž }|r\||fn|f}| jrh||f }|S )Nr   r   r5   r7   r6   éþÿÿÿr|   r}   r]   )r^   zbhld,lrd->bhlrzbhrd,lrd->bhlr)r5   r   r   r	   zbnid,bijd->bnijr	   )r‚   r‰   rƒ   r„   r!   r;   r†   ÚmatmulrU   rY   r9   r-   ro   r_   r:   r…   rd   Útor^   ZeinsumÚshaperˆ   ÚmathÚsqrtr€   r   ZSoftmaxrk   Ú
contiguousr   )r/   r   rO   rŠ   r‹   rŒ   r   rŽ   r   Zmixed_query_layerZis_cross_attentionÚ	key_layerZvalue_layerÚquery_layerZattention_scoresru   Zposition_ids_lZposition_ids_rZdistanceZpositional_embeddingZrelative_position_scoresZrelative_position_scores_queryZrelative_position_scores_keyÚ
batch_sizeZn_headZd_headZbbox_pos_scoresZattention_probsZcontext_layerZnew_context_layer_shapeÚoutputsr$   r$   r%   r@   ã   sn   









zBrosSelfAttention.forward©NNNNNF)r   r   r   r+   r!   rB   r‰   r   r   r"   r@   rC   r$   r$   r1   r%   rz   Â   s8    ÷þýüûúùø	÷
örz   c                       ó8   e Zd Z‡ fdd„Zdejdejdejfdd„Z‡  ZS )ÚBrosSelfOutputc                    sB   t ƒ  ¡  t |j|j¡| _tj|j|jd| _t |j	¡| _
d S ©NrX   )r*   r+   r   rS   rb   Údenserg   rh   ri   rj   rk   rI   r1   r$   r%   r+   N  ó   
zBrosSelfOutput.__init__r   Úinput_tensorr4   c                 C   ó&   |   |¡}|  |¡}|  || ¡}|S rE   ©rŸ   rk   rg   ©r/   r   r¡   r$   r$   r%   r@   T  ó   

zBrosSelfOutput.forwardrA   r$   r$   r1   r%   r   M  ó    $r   c                       s”   e Zd Z‡ fdd„Zdd„ Z						ddejdejd	eej d
eej deej deej deeeej	   dee
 deej fdd„Z‡  ZS )ÚBrosAttentionc                    s*   t ƒ  ¡  t|ƒ| _t|ƒ| _tƒ | _d S rE   )r*   r+   rz   r/   r   ÚoutputÚsetÚpruned_headsrI   r1   r$   r%   r+   \  s   


zBrosAttention.__init__c                 C   s²   t |ƒdkrd S t|| jj| jj| jƒ\}}t| jj|ƒ| j_t| jj|ƒ| j_t| jj	|ƒ| j_	t| j
j|dd| j
_| jjt |ƒ | j_| jj| jj | j_| j |¡| _d S )Nr   r   r7   )Úlenr   r/   r~   r€   rª   r   r‚   rƒ   r„   r¨   rŸ   r   Úunion)r/   ÚheadsÚindexr$   r$   r%   Úprune_headsb  s   üzBrosAttention.prune_headsNFr   rO   rŠ   r‹   rŒ   r   rŽ   r   r4   c	              
   C   s@   | j ||||||||d}	|  |	d |¡}
|
f|	dd …  }|S )N©r   rO   rŠ   r‹   rŒ   r   rŽ   r   r   r   )r/   r¨   )r/   r   rO   rŠ   r‹   rŒ   r   rŽ   r   Zself_outputsÚattention_outputrš   r$   r$   r%   r@   w  s   ø
zBrosAttention.forwardr›   )r   r   r   r+   r¯   r!   rB   r   r   r"   Úboolr@   rC   r$   r$   r1   r%   r§   [  s8    ÷þýüûúùø	÷
ör§   c                       r&   )ÚBrosIntermediatec                    sD   t ƒ  ¡  t |j|j¡| _t|jt	ƒrt
|j | _d S |j| _d S rE   )r*   r+   r   rS   rb   Úintermediate_sizerŸ   Ú
isinstanceZ
hidden_actÚstrr
   Úintermediate_act_fnrI   r1   r$   r%   r+   “  s
   
zBrosIntermediate.__init__r   r4   c                 C   s   |   |¡}|  |¡}|S rE   )rŸ   r·   )r/   r   r$   r$   r%   r@   ›  s   

zBrosIntermediate.forwardrA   r$   r$   r1   r%   r³   ’  s    r³   c                       rœ   )Ú
BrosOutputc                    sB   t ƒ  ¡  t |j|j¡| _tj|j|jd| _t 	|j
¡| _d S rž   )r*   r+   r   rS   r´   rb   rŸ   rg   rh   ri   rj   rk   rI   r1   r$   r%   r+   ¢  r    zBrosOutput.__init__r   r¡   r4   c                 C   r¢   rE   r£   r¤   r$   r$   r%   r@   ¨  r¥   zBrosOutput.forwardrA   r$   r$   r1   r%   r¸   ¡  r¦   r¸   c                       s”   e Zd Z‡ fdd„Z						ddejdejdeej deej d	eej d
eej deeeej   dee	 deej fdd„Z
dd„ Z‡  ZS )Ú	BrosLayerc                    sn   t ƒ  ¡  |j| _d| _t|ƒ| _|j| _|j| _| jr+| js&t| › dƒ‚t|ƒ| _	t
|ƒ| _t|ƒ| _d S )Nr   z> should be used as a decoder model if cross attention is added)r*   r+   Úchunk_size_feed_forwardÚseq_len_dimr§   Ú	attentionr†   Úadd_cross_attentionÚ	ExceptionÚcrossattentionr³   Úintermediater¸   r¨   rI   r1   r$   r%   r+   °  s   



zBrosLayer.__init__NFr   rO   rŠ   r‹   rŒ   r   rŽ   r   r4   c	              	   C   s  |d ur
|d d… nd }	| j ||||||	d}
|
d }| jr)|
dd… }|
d }n|
dd … }d }| jrp|d urpt| dƒrEtd| › dƒ‚|d urO|d	d … nd }|  |||||||¡}|d }||dd…  }|d }|| }t| j| j| j|ƒ}|f| }| jr‡||f }|S )
Nr5   )rO   rŠ   r‹   r   rŽ   r   r   r6   r¿   z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r   )	r¼   r†   rs   r¾   r¿   r   Úfeed_forward_chunkrº   r»   )r/   r   rO   rŠ   r‹   rŒ   r   rŽ   r   Zself_attn_past_key_valueZself_attention_outputsr±   rš   Zpresent_key_valueZcross_attn_present_key_valueZcross_attn_past_key_valueZcross_attention_outputsÚlayer_outputr$   r$   r%   r@   ¾  sX   ú


ÿù	ü

zBrosLayer.forwardc                 C   s   |   |¡}|  ||¡}|S rE   )rÀ   r¨   )r/   r±   Zintermediate_outputrÂ   r$   r$   r%   rÁ     s   
zBrosLayer.feed_forward_chunkr›   )r   r   r   r+   r!   rB   r   r"   r   r²   r@   rÁ   rC   r$   r$   r1   r%   r¹   ¯  s8    ÷þýüûúùø	÷

öEr¹   c                       s²   e Zd Z‡ fdd„Z									ddejdejdeej d	eej d
eej deej deeeej   dee	 dee	 dee	 dee	 de
eej ef fdd„Z‡  ZS )ÚBrosEncoderc                    s4   t ƒ  ¡  ˆ | _t ‡ fdd„tˆ jƒD ƒ¡| _d S )Nc                    s   g | ]}t ˆ ƒ‘qS r$   )r¹   )Ú.0Ú_©r0   r$   r%   Ú
<listcomp>  s    z(BrosEncoder.__init__.<locals>.<listcomp>)r*   r+   r0   r   Z
ModuleListrK   Únum_hidden_layersÚlayerrI   r1   rÆ   r%   r+   
  s   
$zBrosEncoder.__init__NFTr   rO   rŠ   r‹   rŒ   r   Úpast_key_valuesÚ	use_cacher   Úoutput_hidden_statesÚreturn_dictr4   c                 C   s^  |
rdnd }|	r
dnd }|	r| j jrdnd }|rdnd }t| jƒD ]m\}}|
r,||f }|d ur4|| nd }|d ur>|| nd }t| j ddƒra| jra|rSt d¡ d}|  |j	|||||||	¡}n|||||||||	d}|d }|rz||d f7 }|	rŽ||d f }| j jrŽ||d	 f }q!|
r–||f }|s¦t
d
d„ |||||fD ƒƒS t|||||dS )Nr$   Zgradient_checkpointingFzh`use_cache=True` is incompatible with `config.gradient_checkpointing=True`. Setting `use_cache=False`...r°   r   r6   r   r5   c                 s   s    | ]	}|d ur|V  qd S rE   r$   )rÄ   Úvr$   r$   r%   Ú	<genexpr>R  s   € ø÷z&BrosEncoder.forward.<locals>.<genexpr>)Úlast_hidden_staterÊ   r   r   Úcross_attentions)r0   r½   Ú	enumeraterÉ   rl   ZtrainingÚloggerÚwarningZ_gradient_checkpointing_funcÚ__call__Útupler   )r/   r   rO   rŠ   r‹   rŒ   r   rÊ   rË   r   rÌ   rÍ   Zall_hidden_statesZall_self_attentionsZall_cross_attentionsZnext_decoder_cacherN   Zlayer_moduleZlayer_head_maskrŽ   Zlayer_outputsr$   r$   r%   r@     sz   
ÿøø€
ûþûzBrosEncoder.forward)	NNNNNNFFT)r   r   r   r+   r!   rB   r   r"   r   r²   r   r   r@   rC   r$   r$   r1   r%   rÃ   	  sH    	ôþýüûúùø	÷
öõôórÃ   c                       r&   )Ú
BrosPoolerc                    s*   t ƒ  ¡  t |j|j¡| _t ¡ | _d S rE   )r*   r+   r   rS   rb   rŸ   ZTanhÚ
activationrI   r1   r$   r%   r+   h  s   
zBrosPooler.__init__r   r4   c                 C   s(   |d d …df }|   |¡}|  |¡}|S )Nr   )rŸ   rØ   )r/   r   Zfirst_token_tensorÚpooled_outputr$   r$   r%   r@   m  s   

zBrosPooler.forwardrA   r$   r$   r1   r%   r×   g  s    r×   c                       r&   )ÚBrosRelationExtractorc                    s‚   t ƒ  ¡  |j| _|j| _|j| _|j| _t | j¡| _	t 
| j| j| j ¡| _t 
| j| j| j ¡| _t t d| j¡¡| _d S )Nr   )r*   r+   Ún_relationsrb   Úbackbone_hidden_sizeÚhead_hidden_sizeZclassifier_dropout_probr   ri   ÚdroprS   r‚   rƒ   Ú	Parameterr!   rn   Ú
dummy_noderI   r1   r$   r%   r+   w  s   
zBrosRelationExtractor.__init__r˜   r—   c              	   C   s¶   |   |  |¡¡}| j d¡ d| d¡d¡}tj||gdd}|  |  |¡¡}| 	| d¡| d¡| j
| j¡}| 	| d¡| d¡| j
| j¡}t | dddd¡| dddd¡¡}|S )Nr   r   ©Zaxisr5   r	   )r‚   rÞ   rà   Z	unsqueezeÚrepeatr9   r!   r;   rƒ   r:   rÛ   rÝ   r‘   rˆ   )r/   r˜   r—   Z	dummy_vecZrelation_scorer$   r$   r%   r@   …  s   ÿ ÿzBrosRelationExtractor.forwardrA   r$   r$   r1   r%   rÚ   v  s    rÚ   c                   @   s   e Zd ZeZdZdd„ ZdS )ÚBrosPreTrainedModelÚbrosc                 C   s´   t |tjƒr |jjjd| jjd |jdur|jj 	¡  dS dS t |tj
ƒrC|jjjd| jjd |jdurA|jj|j  	¡  dS dS t |tjƒrX|jj 	¡  |jj d¡ dS dS )zInitialize the weightsr(   )ÚmeanZstdNg      ð?)rµ   r   rS   ÚweightÚdataZnormal_r0   Zinitializer_rangerQ   Zzero_ra   rW   rg   Zfill_)r/   Úmoduler$   r$   r%   Ú_init_weights  s   
ÿ
ÿþz!BrosPreTrainedModel._init_weightsN)r   r   r   r   Zconfig_classZbase_model_prefixré   r$   r$   r$   r%   rã   ˜  s    rã   c                "       sü   e Zd Zd‡ fdd„	Zdd„ Zdd„ Zdd	„ Ze	
	
	
ddee	j
 dee	j
 dee	j
 dee	j
 dee	j
 dee	j
 dee	j
 dee	j
 dee	j
 deee	j  dee dee dee dee deee	j
 ef fdd„ƒZ‡  ZS )Ú	BrosModelTc                    sN   t ƒ  |¡ || _t|ƒ| _t|ƒ| _t|ƒ| _|rt	|ƒnd| _
|  ¡  dS )zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)r*   r+   r0   rV   rx   rP   Úbbox_embeddingsrÃ   Úencoderr×   ÚpoolerÚinit_weights)r/   r0   Zadd_pooling_layerr1   r$   r%   r+   °  s   


zBrosModel.__init__c                 C   s   | j jS rE   ©rx   rc   )r/   r$   r$   r%   Úget_input_embeddingsÀ  s   zBrosModel.get_input_embeddingsc                 C   s   || j _d S rE   rï   )r/   r„   r$   r$   r%   Úset_input_embeddingsÃ  s   zBrosModel.set_input_embeddingsc                 C   s*   |  ¡ D ]\}}| jj| j |¡ qdS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)Úitemsrì   rÉ   r¼   r¯   )r/   Zheads_to_prunerÉ   r­   r$   r$   r%   Ú_prune_headsÆ  s   ÿzBrosModel._prune_headsNrp   rJ   rŠ   r\   r[   r‹   rq   rŒ   r   rÊ   rË   r   rÌ   rÍ   r4   c           "      C   s  |dur|n| j j}|dur|n| j j}|dur|n| j j}| j jr-|dur(|n| j j}nd}|dur;|dur;tdƒ‚|durD| ¡ }n|durQ| ¡ dd… }ntdƒ‚|du r]tdƒ‚|\}}|durh|jn|j}|
durx|
d d j	d nd}|du r…t
j||d	}|du r­t| jd
ƒr¤| jjdd…d|…f }| ||¡}|}n	t
j|t
j|d}|  |||¡}| j jrØ|durØ| ¡ \}}}||f}|	du rÒt
j||d	}	|  |	¡}nd}|  || j j¡}| j|||||d}|j	d dkr|dd…dd…g d¢f }|| j j }|  |¡}| j|||||||
||||d}|d } | jdur+|  | ¡nd}!|s:| |!f|dd…  S t| |!|j|j|j|jdS )aÍ  
        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosModel

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosModel.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        >>> last_hidden_states = outputs.last_hidden_state
        ```NFzDYou cannot specify both input_ids and inputs_embeds at the same timer6   z5You have to specify either input_ids or inputs_embedszYou have to specify bboxr   r5   )r_   r\   r]   )rp   r[   r\   rq   rr   é   )r   r   r5   r   r5   r	   r   r	   )
rO   rŠ   r‹   rŒ   r   rÊ   rË   r   rÌ   rÍ   r   )rÐ   Zpooler_outputrÊ   r   r   rÑ   )r0   r   rÌ   Úuse_return_dictr†   rË   r   r9   r_   r“   r!   Zonesrs   rx   r\   rm   rn   ro   Zget_extended_attention_maskZinvert_attention_maskZget_head_maskrÈ   Z
bbox_scalerë   rì   rí   r   rÊ   r   r   rÑ   )"r/   rp   rJ   rŠ   r\   r[   r‹   rq   rŒ   r   rÊ   rË   r   rÌ   rÍ   rt   r™   ru   r_   rr   rv   rw   Zextended_attention_maskZencoder_batch_sizeZencoder_sequence_lengthrÅ   Zencoder_hidden_shapeZencoder_extended_attention_maskZembedding_outputZscaled_bboxZbbox_position_embeddingsZencoder_outputsÚsequence_outputrÙ   r$   r$   r%   r@   Î  s   )ÿ
û	
õúzBrosModel.forward)T)NNNNNNNNNNNNNN)r   r   r   r+   rð   rñ   ró   r   r   r!   rB   r   r"   r²   r   r   r   r@   rC   r$   r$   r1   r%   rê   ®  sf    ñþýüûúùø	÷
öõôóòñðrê   c                       óÎ   e Zd ZdgZ‡ fdd„Ze												ddeej deej deej deej d	eej d
eej deej deej deej dee	 dee	 dee	 de
eej ef fdd„ƒZ‡  ZS )ÚBrosForTokenClassificationrí   c                    s^   t ƒ  |¡ |j| _t|ƒ| _t|dƒr|jn|j}t 	|¡| _
t |j|j¡| _|  ¡  d S ©NÚclassifier_dropout)r*   r+   Ú
num_labelsrê   rä   rs   rú   rj   r   ri   rk   rS   rb   Ú
classifierrî   ©r/   r0   rú   r1   r$   r%   r+   d  s   
ÿz#BrosForTokenClassification.__init__Nrp   rJ   rŠ   Úbbox_first_token_maskr\   r[   r‹   rq   Úlabelsr   rÌ   rÍ   r4   c                 C   sð   |dur|n| j j}| j||||||||
||d
}|d }|  |¡}|  |¡}d}|	durXtƒ }|durK| d¡}|| d| j¡| |	 d¡| ƒ}n|| d| j¡|	 d¡ƒ}|sn|f|dd…  }|durl|f| S |S t|||j	|j
dS )aè  
        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.
        bbox_first_token_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to indicate the first token of each bounding box. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```N)	rJ   rŠ   r\   r[   r‹   rq   r   rÌ   rÍ   r   r6   r5   ©r   Úlogitsr   r   )r0   rõ   rä   rk   rü   r   r:   rû   r   r   r   )r/   rp   rJ   rŠ   rþ   r\   r[   r‹   rq   rÿ   r   rÌ   rÍ   rš   rö   r  r   Úloss_fctr¨   r$   r$   r%   r@   q  sD   ,ö


ÿüz"BrosForTokenClassification.forward©NNNNNNNNNNNN©r   r   r   Ú"_keys_to_ignore_on_load_unexpectedr+   r   r   r!   rB   r²   r   r   r   r@   rC   r$   r$   r1   r%   rø   `  sV    óþýüûúùø	÷
öõôóòrø   a  
    Bros Model with a token classification head on top (initial_token_layers and subsequent_token_layer on top of the
    hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. The initial_token_classifier is used to
    predict the first token of each entity, and the subsequent_token_classifier is used to predict the subsequent
    tokens within an entity. Compared to BrosForTokenClassification, this model is more robust to serialization errors
    since it predicts next token from one token.
    )Zcustom_introc                        sÚ   e Zd ZdgZ‡ fdd„Ze													ddeej deej deej deej d	eej d
eej deej deej deej deej dee	 dee	 dee	 de
eej ef fdd„ƒZ‡  ZS )Ú!BrosSpadeEEForTokenClassificationrí   c              	      s–   t ƒ  |¡ || _|j| _|j| _|j| _t|ƒ| _t	|dƒr"|j
n|j}t t |¡t |j|j¡t |¡t |j|j¡¡| _t|ƒ| _|  ¡  d S rù   )r*   r+   r0   rû   rÛ   rb   rÜ   rê   rä   rs   rú   rj   r   Z
Sequentialri   rS   Úinitial_token_classifierrÚ   Úsubsequent_token_classifierrî   rý   r1   r$   r%   r+   Ô  s    
ÿü
z*BrosSpadeEEForTokenClassification.__init__Nrp   rJ   rŠ   rþ   r\   r[   r‹   rq   Úinitial_token_labelsÚsubsequent_token_labelsr   rÌ   rÍ   r4   c                 C   s  |dur|n| j j}| j||||||||||d
}|d }| dd¡ ¡ }|  |¡ dd¡ ¡ }|  ||¡ d¡}d| }|j\}}|j	}t
j|t
 |dg¡ |¡gdd ¡ }| |dd…ddd…f t
 |j¡j¡}t
 ||d ¡j|t
jd}| |ddd…dd…f t
 |j¡j¡}| d¡ ¡ }d}|	durá|
durátƒ }|	 d¡}	|dur¿| d¡}|| d| j¡| |	| ƒ}n
|| d| j¡|	ƒ}|
 d¡}
|| d|d ¡| |
| ƒ}|| }|sø||f|dd…  }|durö|f| S |S t||||j|jd	S )
a>  
        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.
        bbox_first_token_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to indicate the first token of each bounding box. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.
        initial_token_labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for the initial token classification.
        subsequent_token_labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for the subsequent token classification.

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosSpadeEEForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosSpadeEEForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```N©
rp   rJ   rŠ   r\   r[   r‹   rq   r   rÌ   rÍ   r   r   rá   ©r_   r^   r6   r5   )r   r   r   r   r   )r0   rõ   rä   rU   r–   r  r  Úsqueezer“   r_   r!   r;   rn   r’   r²   Úmasked_fillÚfinfor^   ÚminÚeyer:   r   rû   r   r   r   )r/   rp   rJ   rŠ   rþ   r\   r[   r‹   rq   r	  r
  r   rÌ   rÍ   rš   Úlast_hidden_statesr   r   Zinv_attention_maskr™   Úmax_seq_lengthr_   Zinvalid_token_maskÚself_token_maskZsubsequent_token_maskr   r  Zinitial_token_lossZsubsequent_token_lossr¨   r$   r$   r%   r@   í  sp   1ö
& ÿ ÿ

þ
þûz)BrosSpadeEEForTokenClassification.forward)NNNNNNNNNNNNN)r   r   r   r  r+   r   r   r!   rB   r²   r   r   r   r@   rC   r$   r$   r1   r%   r  È  s\    
òþýüûúùø	÷
öõôóòñr  zì
    Bros Model with a token classification head on top (a entity_linker layer on top of the hidden-states output) e.g.
    for Entity-Linking. The entity_linker is used to predict intra-entity links (one entity to another entity).
    c                       r÷   )Ú!BrosSpadeELForTokenClassificationrí   c                    sx   t ƒ  |¡ || _|j| _|j| _|j| _t|ƒ| _t	|dƒr"|j
n|j t|ƒ| _|  ¡  d S  t|ƒ| _|  ¡  d S rù   )r*   r+   r0   rû   rÛ   rb   rÜ   rê   rä   rs   rú   rj   rÚ   Úentity_linkerrî   rI   r1   r$   r%   r+   m  s   

ü
z*BrosSpadeELForTokenClassification.__init__Nrp   rJ   rŠ   rþ   r\   r[   r‹   rq   rÿ   r   rÌ   rÍ   r4   c                 C   s~  |dur|n| j j}| j||||||||
||d
}|d }| dd¡ ¡ }|  ||¡ d¡}d}|	durŸtƒ }|j\}}|j	}t
 ||d ¡j|t
jd}| d¡}t
j| t
j|dgt
j|dgdd}| |dd…ddd…f t
 |j¡j¡}| |ddd…dd…f t
 |j¡j¡}|| d|d ¡| |	 d¡| ƒ}|sµ|f|d	d…  }|dur³|f| S |S t|||j|jd
S )aö  
        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.
        bbox_first_token_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to indicate the first token of each bounding box. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosSpadeELForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosSpadeELForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```Nr  r   r   r  r6   r]   rá   r5   r   )r0   rõ   rä   rU   r–   r  r  r   r“   r_   r!   r  r’   r²   r:   r;   rn   r  r  r^   r  r   r   r   )r/   rp   rJ   rŠ   rþ   r\   r[   r‹   rq   rÿ   r   rÌ   rÍ   rš   r  r  r   r  r™   r  r_   r  Úmaskr¨   r$   r$   r%   r@   {  sR   +ö

þû(($üz)BrosSpadeELForTokenClassification.forwardr  r  r$   r$   r1   r%   r  d  sV    óþýüûúùø	÷
öõôóòr  )rã   rê   rø   r  r  )8r    r”   Údataclassesr   Útypingr   r   r   r   r!   Ztorch.utils.checkpointr   Ztorch.nnr   Zactivationsr
   Zmodeling_outputsr   r   r   Zmodeling_utilsr   Zpytorch_utilsr   r   r   Úutilsr   r   r   Zconfiguration_brosr   Z
get_loggerr   rÓ   r   ÚModuler'   rD   rP   rV   rz   r   r§   r³   r¸   r¹   rÃ   r×   rÚ   rã   rê   rø   r  r  Ú__all__r$   r$   r$   r%   Ú<module>   s`   
B 7Z^" 2gÿ	 ÿr