o
    Zh                     @   s  d Z ddlZddlmZmZmZ ddlZddlmZ ddlm	Z	m
Z
mZ ddlmZmZ ddlmZmZmZmZmZmZmZ dd	lmZ dd
lmZmZ ddlmZmZ ddlm Z  e!e"Z#eG dd deZ$G dd dej%Z&G dd dej%Z'G dd dej%Z(G dd dej%Z)G dd dej%Z*G dd dej%Z+G dd dej%Z,G dd dej%Z-eG d d! d!e$Z.G d"d# d#e$Z/G d$d% d%ej%Z0ed&d'G d(d) d)e$Z1eG d*d+ d+e$Z2eG d,d- d-e$Z3G d.d/ d/ej%Z4eG d0d1 d1e$Z5d2d3 Z6g d4Z7dS )5zPyTorch MPNet model.    N)OptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FNgelu)BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputMultipleChoiceModelOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging   )MPNetConfigc                   @   s   e Zd ZeZdZdd ZdS )MPNetPreTrainedModelmpnetc                 C   s   t |tjr |jjjd| jjd |jdur|jj	  dS dS t |tj
rC|jjjd| jjd |jdurA|jj|j 	  dS dS t |tjrX|jj	  |jjd dS t |tre|jj	  dS dS )zInitialize the weightsg        )meanZstdNg      ?)
isinstancer   LinearweightdataZnormal_configZinitializer_rangebiasZzero_	Embeddingpadding_idx	LayerNormZfill_MPNetLMHead)selfmodule r)   W/var/www/auris/lib/python3.10/site-packages/transformers/models/mpnet/modeling_mpnet.py_init_weights1   s    


z"MPNetPreTrainedModel._init_weightsN)__name__
__module____qualname__r   Zconfig_classZbase_model_prefixr+   r)   r)   r)   r*   r   ,   s    r   c                       s.   e Zd Z fddZdddZdd Z  ZS )	MPNetEmbeddingsc                    s   t    d| _tj|j|j| jd| _tj|j|j| jd| _	tj
|j|jd| _
t|j| _| jdt|jddd d S )Nr   )r$   Zepsposition_ids)r   F)
persistent)super__init__r$   r   r#   
vocab_sizehidden_sizeword_embeddingsZmax_position_embeddingsposition_embeddingsr%   layer_norm_epsDropouthidden_dropout_probdropoutZregister_buffertorcharangeexpandr'   r!   	__class__r)   r*   r5   E   s   

zMPNetEmbeddings.__init__Nc           	      K   s   |d u r|d urt || j}n| |}|d ur| }n| d d }|d }|d u r8| jd d d |f }|d u rA| |}| |}|| }| |}| |}|S )Nr2   r   )	"create_position_ids_from_input_idsr$   &create_position_ids_from_inputs_embedssizer1   r8   r9   r%   r=   )	r'   	input_idsr1   inputs_embedskwargsinput_shapeZ
seq_lengthr9   
embeddingsr)   r)   r*   forwardS   s"   





zMPNetEmbeddings.forwardc                 C   sN   |  dd }|d }tj| jd || j d tj|jd}|d|S )z
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

        Args:
            inputs_embeds: torch.Tensor

        Returns: torch.Tensor
        Nr2   r   )dtypedevicer   )rF   r>   r?   r$   longrN   	unsqueezer@   )r'   rH   rJ   Zsequence_lengthr1   r)   r)   r*   rE   m   s   	z6MPNetEmbeddings.create_position_ids_from_inputs_embeds)NNN)r,   r-   r.   r5   rL   rE   __classcell__r)   r)   rB   r*   r/   D   s    
r/   c                       6   e Zd Z fddZdd Z				d	ddZ  ZS )
MPNetSelfAttentionc                    s   t    |j|j dkrt|dstd|j d|j d|j| _t|j|j | _| j| j | _t	
|j| j| _t	
|j| j| _t	
|j| j| _t	
|j|j| _t	|j| _d S )Nr   Zembedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ())r4   r5   r7   num_attention_headshasattr
ValueErrorintattention_head_sizeall_head_sizer   r   qkvor;   Zattention_probs_dropout_probr=   rA   rB   r)   r*   r5      s   

zMPNetSelfAttention.__init__c                 C   s6   |  d d | j| jf }|j| }|ddddS )Nr2   r      r   r	   )rF   rU   rY   viewpermute)r'   xZnew_x_shaper)   r)   r*   transpose_for_scores   s   
z'MPNetSelfAttention.transpose_for_scoresNFc                 K   s  |  |}| |}| |}	| |}| |}| |	}	t||dd}
|
t| j	 }
|d ur8|
|7 }
|d ur@|
| }
t
jj|
dd}| |}|d urU|| }t||	}|dddd }| d d | jf }|j| }| |}|r||f}|S |f}|S )Nr2   dimr   r_   r   r	   )r[   r\   r]   rc   r>   matmulZ	transposemathsqrtrY   r   Z
functionalZsoftmaxr=   ra   
contiguousrF   rZ   r`   r^   )r'   hidden_statesattention_mask	head_maskposition_biasoutput_attentionsrI   r[   r\   r]   Zattention_scoresZattention_probscZnew_c_shaper^   outputsr)   r)   r*   rL      s2   
	







zMPNetSelfAttention.forwardNNNF)r,   r-   r.   r5   rc   rL   rQ   r)   r)   rB   r*   rS      s    rS   c                       rR   )
MPNetAttentionc                    sB   t    t|| _tj|j|jd| _t|j	| _
t | _d S Nr0   )r4   r5   rS   attnr   r%   r7   r:   r;   r<   r=   setpruned_headsrA   rB   r)   r*   r5      s
   

zMPNetAttention.__init__c                 C   s   t |dkrd S t|| jj| jj| j\}}t| jj|| j_t| jj|| j_t| jj	|| j_	t| jj
|dd| j_
| jjt | | j_| jj| jj | j_| j|| _d S )Nr   r   re   )lenr   ru   rU   rY   rw   r   r[   r\   r]   r^   rZ   union)r'   headsindexr)   r)   r*   prune_heads   s   zMPNetAttention.prune_headsNFc           
      K   sB   | j |||||d}| | |d | }|f|dd   }	|	S )N)ro   r   r   )ru   r%   r=   )
r'   rk   rl   rm   rn   ro   rI   Zself_outputsattention_outputrq   r)   r)   r*   rL      s   	zMPNetAttention.forwardrr   )r,   r-   r.   r5   r|   rL   rQ   r)   r)   rB   r*   rs      s    rs   c                       2   e Zd Z fddZdejdejfddZ  ZS )MPNetIntermediatec                    sD   t    t|j|j| _t|jt	rt
|j | _d S |j| _d S N)r4   r5   r   r   r7   intermediate_sizedenser   Z
hidden_actstrr
   intermediate_act_fnrA   rB   r)   r*   r5      s
   
zMPNetIntermediate.__init__rk   returnc                 C   s   |  |}| |}|S r   )r   r   )r'   rk   r)   r)   r*   rL      s   

zMPNetIntermediate.forwardr,   r-   r.   r5   r>   TensorrL   rQ   r)   r)   rB   r*   r      s    r   c                       s8   e Zd Z fddZdejdejdejfddZ  ZS )MPNetOutputc                    sB   t    t|j|j| _tj|j|jd| _t	|j
| _d S rt   )r4   r5   r   r   r   r7   r   r%   r:   r;   r<   r=   rA   rB   r)   r*   r5     s   
zMPNetOutput.__init__rk   input_tensorr   c                 C   s&   |  |}| |}| || }|S r   )r   r=   r%   )r'   rk   r   r)   r)   r*   rL     s   

zMPNetOutput.forwardr   r)   r)   rB   r*   r     s    $r   c                       s.   e Zd Z fddZ				dddZ  ZS )
MPNetLayerc                    s,   t    t|| _t|| _t|| _d S r   )r4   r5   rs   	attentionr   intermediater   outputrA   rB   r)   r*   r5     s   


zMPNetLayer.__init__NFc                 K   sL   | j |||||d}|d }|dd  }	| |}
| |
|}|f|	 }	|	S )N)rn   ro   r   r   )r   r   r   )r'   rk   rl   rm   rn   ro   rI   Zself_attention_outputsr}   rq   Zintermediate_outputZlayer_outputr)   r)   r*   rL     s   	

zMPNetLayer.forwardrr   )r,   r-   r.   r5   rL   rQ   r)   r)   rB   r*   r     s    	r   c                       sp   e Zd Z fddZ					ddejdeej deej ded	ed
efddZdddZ	e
dddZ  ZS )MPNetEncoderc                    sN   t     | _ j| _t fddt jD | _	t
 j| j| _d S )Nc                    s   g | ]}t  qS r)   )r   ).0_r!   r)   r*   
<listcomp>:  s    z)MPNetEncoder.__init__.<locals>.<listcomp>)r4   r5   r!   rU   Zn_headsr   Z
ModuleListrangenum_hidden_layerslayerr#   Zrelative_attention_num_bucketsrelative_attention_biasrA   rB   r   r*   r5   6  s
   
 zMPNetEncoder.__init__NFrk   rl   rm   ro   output_hidden_statesreturn_dictc                 K   s   |  |}|r	dnd }	|rdnd }
t| jD ]'\}}|r!|	|f }	||||| |fd|i|}|d }|r=|
|d f }
q|rE|	|f }	|sStdd ||	|
fD S t||	|
dS )Nr)   ro   r   r   c                 s   s    | ]	}|d ur|V  qd S r   r)   )r   r]   r)   r)   r*   	<genexpr>`  s    z'MPNetEncoder.forward.<locals>.<genexpr>)last_hidden_staterk   
attentions)compute_position_bias	enumerater   tupler   )r'   rk   rl   rm   ro   r   r   rI   rn   Zall_hidden_statesZall_attentionsiZlayer_moduleZlayer_outputsr)   r)   r*   rL   =  s:   



zMPNetEncoder.forward    c                 C   s   | d| d| d}}}|d ur,|d d d d d f }|d d d d d f }ntj|tjdd d d f }tj|tjdd d d f }|| }	| j|	|d}
|
|j}
| |
}|g d	d}|
|d||f }|S )Nr   r   )rM   )num_buckets)r_   r   r   r2   )rF   r>   r?   rO   relative_position_buckettorN   r   ra   rP   r@   rj   )r'   rb   r1   r   ZbszqlenZklenZcontext_positionZmemory_positionrelative_positionZ	rp_bucketvaluesr)   r)   r*   r   g  s   "
z"MPNetEncoder.compute_position_bias   c                 C   s   d}|  }|d }||dk  tj| 7 }t|}|d }||k }|t| | t||  ||   tj }t|t||d }|t	|||7 }|S )Nr   r_   r   )
r   r>   rO   abslogfloatrh   minZ	full_likewhere)r   r   Zmax_distanceretnZ	max_exactZis_smallZval_if_larger)   r)   r*   r   y  s   
&z%MPNetEncoder.relative_position_bucket)NNFFF)Nr   )r   r   )r,   r-   r.   r5   r>   r   r   boolrL   r   staticmethodr   rQ   r)   r)   rB   r*   r   5  s.    


*r   c                       r~   )MPNetPoolerc                    s*   t    t|j|j| _t | _d S r   )r4   r5   r   r   r7   r   ZTanh
activationrA   rB   r)   r*   r5     s   
zMPNetPooler.__init__rk   r   c                 C   s(   |d d df }|  |}| |}|S Nr   )r   r   )r'   rk   Zfirst_token_tensorpooled_outputr)   r)   r*   rL     s   

zMPNetPooler.forwardr   r)   r)   rB   r*   r     s    r   c                       s   e Zd Zd fdd	Zdd Zdd Zdd	 Ze	
	
	
	
	
	
	
	
ddee	j
 dee	j dee	j
 dee	j dee	j dee dee dee deee	j ef fddZ  ZS )
MPNetModelTc                    sD   t  | || _t|| _t|| _|rt|nd| _| 	  dS )zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)
r4   r5   r!   r/   rK   r   encoderr   pooler	post_init)r'   r!   add_pooling_layerrB   r)   r*   r5     s   

zMPNetModel.__init__c                 C      | j jS r   rK   r8   r'   r)   r)   r*   get_input_embeddings     zMPNetModel.get_input_embeddingsc                 C   s   || j _d S r   r   )r'   valuer)   r)   r*   set_input_embeddings  s   zMPNetModel.set_input_embeddingsc                 C   s*   |  D ]\}}| jj| j| qdS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr   r   r   r|   )r'   Zheads_to_pruner   rz   r)   r)   r*   _prune_heads  s   zMPNetModel._prune_headsNrG   rl   r1   rm   rH   ro   r   r   r   c	                 K   sL  |d ur|n| j j}|d ur|n| j j}|d ur|n| j j}|d ur*|d ur*td|d ur9| || | }
n|d urF| d d }
ntd|d urQ|jn|j}|d u r_tj	|
|d}| 
||
}| || j j}| j|||d}| j||||||d}|d }| jd ur| |nd }|s||f|dd   S t|||j|jd	S )
NzDYou cannot specify both input_ids and inputs_embeds at the same timer2   z5You have to specify either input_ids or inputs_embeds)rN   )rG   r1   rH   )rl   rm   ro   r   r   r   r   )r   Zpooler_outputrk   r   )r!   ro   r   use_return_dictrW   Z%warn_if_padding_and_no_attention_maskrF   rN   r>   ZonesZget_extended_attention_maskZget_head_maskr   rK   r   r   r   rk   r   )r'   rG   rl   r1   rm   rH   ro   r   r   rI   rJ   rN   Zextended_attention_maskZembedding_outputZencoder_outputssequence_outputr   r)   r)   r*   rL     sH   
zMPNetModel.forward)T)NNNNNNNN)r,   r-   r.   r5   r   r   r   r   r   r>   
LongTensorFloatTensorr   r   r   r   r   rL   rQ   r)   r)   rB   r*   r     sB    	r   c                       s   e Zd ZdgZ fddZdd Zdd Ze									dd	ee	j
 d
ee	j dee	j
 dee	j dee	j dee	j
 dee dee dee deee	j ef fddZ  ZS )MPNetForMaskedLMzlm_head.decoderc                    s0   t  | t|dd| _t|| _|   d S NF)r   )r4   r5   r   r   r&   lm_headr   rA   rB   r)   r*   r5     s   
zMPNetForMaskedLM.__init__c                 C   r   r   )r   decoderr   r)   r)   r*   get_output_embeddings  r   z&MPNetForMaskedLM.get_output_embeddingsc                 C   s   || j _|j| j _d S r   )r   r   r"   )r'   Znew_embeddingsr)   r)   r*   set_output_embeddings  s   z&MPNetForMaskedLM.set_output_embeddingsNrG   rl   r1   rm   rH   labelsro   r   r   r   c
              
   C   s   |	dur|	n| j j}	| j||||||||	d}
|
d }| |}d}|dur7t }||d| j j|d}|	sM|f|
dd  }|durK|f| S |S t|||
j|
j	dS )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        Nrl   r1   rm   rH   ro   r   r   r   r2   r_   losslogitsrk   r   )
r!   r   r   r   r   r`   r6   r   rk   r   )r'   rG   rl   r1   rm   rH   r   ro   r   r   rq   r   Zprediction_scoresZmasked_lm_lossloss_fctr   r)   r)   r*   rL     s4   
zMPNetForMaskedLM.forward	NNNNNNNNN)r,   r-   r.   Z_tied_weights_keysr5   r   r   r   r   r>   r   r   r   r   r   r   r   rL   rQ   r)   r)   rB   r*   r     sH    		
r   c                       s0   e Zd ZdZ fddZdd Zdd Z  ZS )r&   z5MPNet Head for masked and permuted language modeling.c                    sh   t    t|j|j| _tj|j|jd| _tj|j|j	dd| _
tt|j	| _| j| j
_d S )Nr0   F)r"   )r4   r5   r   r   r7   r   r%   r:   
layer_normr6   r   	Parameterr>   Zzerosr"   rA   rB   r)   r*   r5   B  s   
zMPNetLMHead.__init__c                 C   s   | j | j_ d S r   )r"   r   r   r)   r)   r*   _tie_weightsM  s   zMPNetLMHead._tie_weightsc                 K   s*   |  |}t|}| |}| |}|S r   )r   r   r   r   r'   featuresrI   rb   r)   r)   r*   rL   P  s
   


zMPNetLMHead.forward)r,   r-   r.   __doc__r5   r   rL   rQ   r)   r)   rB   r*   r&   ?  s
    r&   z
    MPNet Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
    output) e.g. for GLUE tasks.
    )Zcustom_introc                          e Zd Z fddZe									ddeej deej deej deej deej d	eej d
ee	 dee	 dee	 de
eej ef fddZ  ZS )MPNetForSequenceClassificationc                    s8   t  | |j| _t|dd| _t|| _|   d S r   )r4   r5   
num_labelsr   r   MPNetClassificationHead
classifierr   rA   rB   r)   r*   r5   b  s
   
z'MPNetForSequenceClassification.__init__NrG   rl   r1   rm   rH   r   ro   r   r   r   c
              
   C   sf  |	dur|	n| j j}	| j||||||||	d}
|
d }| |}d}|dur| j jdu rP| jdkr6d| j _n| jdkrL|jtjksG|jtj	krLd| j _nd| j _| j jdkrnt
 }| jdkrh|| | }n+|||}n%| j jdkrt }||d| j|d}n| j jdkrt }|||}|	s|f|
d	d  }|dur|f| S |S t|||
j|
jd
S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   r   Z
regressionZsingle_label_classificationZmulti_label_classificationr2   r_   r   )r!   r   r   r   Zproblem_typer   rM   r>   rO   rX   r   squeezer   r`   r   r   rk   r   r'   rG   rl   r1   rm   rH   r   ro   r   r   rq   r   r   r   r   r   r)   r)   r*   rL   l  sR   



"


z&MPNetForSequenceClassification.forwardr   )r,   r-   r.   r5   r   r   r>   r   r   r   r   r   r   r   rL   rQ   r)   r)   rB   r*   r   [  sB    
	
r   c                       r   )MPNetForMultipleChoicec                    s@   t  | t|| _t|j| _t|j	d| _
|   d S )Nr   )r4   r5   r   r   r   r;   r<   r=   r   r7   r   r   rA   rB   r)   r*   r5     s
   
zMPNetForMultipleChoice.__init__NrG   rl   r1   rm   rH   r   ro   r   r   r   c
              
   C   sN  |	dur|	n| j j}	|dur|jd n|jd }
|dur%|d|dnd}|dur4|d|dnd}|durC|d|dnd}|durV|d|d|dnd}| j||||||||	d}|d }| |}| |}|d|
}d}|durt }|||}|	s|f|dd  }|dur|f| S |S t	|||j
|jdS )a  
        input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
            model's internal embedding lookup matrix.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
            num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
            `input_ids` above)
        Nr   r2   rd   )r1   rl   rm   rH   ro   r   r   r_   r   )r!   r   shaper`   rF   r   r=   r   r   r   rk   r   )r'   rG   rl   r1   rm   rH   r   ro   r   r   Znum_choicesZflat_input_idsZflat_position_idsZflat_attention_maskZflat_inputs_embedsrq   r   r   Zreshaped_logitsr   r   r   r)   r)   r*   rL     sH   $



zMPNetForMultipleChoice.forwardr   )r,   r-   r.   r5   r   r   r>   r   r   r   r   r   r   r   rL   rQ   r)   r)   rB   r*   r     sB    
	
r   c                       r   )MPNetForTokenClassificationc                    sN   t  | |j| _t|dd| _t|j| _t	|j
|j| _|   d S r   )r4   r5   r   r   r   r   r;   r<   r=   r   r7   r   r   rA   rB   r)   r*   r5     s   z$MPNetForTokenClassification.__init__NrG   rl   r1   rm   rH   r   ro   r   r   r   c
              
   C   s   |	dur|	n| j j}	| j||||||||	d}
|
d }| |}| |}d}|dur;t }||d| j|d}|	sQ|f|
dd  }|durO|f| S |S t|||
j	|
j
dS )z
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        Nr   r   r2   r_   r   )r!   r   r   r=   r   r   r`   r   r   rk   r   r   r)   r)   r*   rL     s6   

z#MPNetForTokenClassification.forwardr   )r,   r-   r.   r5   r   r   r>   r   r   r   r   r   r   r   rL   rQ   r)   r)   rB   r*   r     sB    	
r   c                       s(   e Zd ZdZ fddZdd Z  ZS )r   z-Head for sentence-level classification tasks.c                    s@   t    t|j|j| _t|j| _t|j|j	| _
d S r   )r4   r5   r   r   r7   r   r;   r<   r=   r   out_projrA   rB   r)   r*   r5   S  s   
z MPNetClassificationHead.__init__c                 K   sL   |d d dd d f }|  |}| |}t|}|  |}| |}|S r   )r=   r   r>   tanhr   r   r)   r)   r*   rL   Y  s   




zMPNetClassificationHead.forward)r,   r-   r.   r   r5   rL   rQ   r)   r)   rB   r*   r   P  s    r   c                       s   e Zd Z fddZe										ddeej deej deej deej deej d	eej d
eej dee	 dee	 dee	 de
eej ef fddZ  ZS )MPNetForQuestionAnsweringc                    s@   t  | |j| _t|dd| _t|j|j| _| 	  d S r   )
r4   r5   r   r   r   r   r   r7   
qa_outputsr   rA   rB   r)   r*   r5   e  s
   z"MPNetForQuestionAnswering.__init__NrG   rl   r1   rm   rH   start_positionsend_positionsro   r   r   r   c              
   C   sF  |
d ur|
n| j j}
| j|||||||	|
d}|d }| |}|jddd\}}|d }|d }d }|d ur|d urt| dkrN|d}t| dkr[|d}|d}|	d|}|	d|}t
|d}|||}|||}|| d }|
s||f|dd   }|d ur|f| S |S t||||j|jdS )	Nr   r   r   r2   re   )Zignore_indexr_   )r   start_logits
end_logitsrk   r   )r!   r   r   r   splitr   rj   rx   rF   clampr   r   rk   r   )r'   rG   rl   r1   rm   rH   r   r   ro   r   r   rq   r   r   r   r   Z
total_lossZignored_indexr   Z
start_lossZend_lossr   r)   r)   r*   rL   o  sN   






z!MPNetForQuestionAnswering.forward)
NNNNNNNNNN)r,   r-   r.   r5   r   r   r>   r   r   r   r   r   r   r   rL   rQ   r)   r)   rB   r*   r   c  sH    
	
r   c                 C   s2   |  | }tj|dd|| }| | S )z
    Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
    are ignored. This is modified from fairseq's `utils.make_positions`. :param torch.Tensor x: :return torch.Tensor:
    r   re   )nerX   r>   ZcumsumZtype_asrO   )rG   r$   maskZincremental_indicesr)   r)   r*   rD     s   rD   )r   r   r   r   r   r   r   r   )8r   rh   typingr   r   r   r>   r   Ztorch.nnr   r   r   Zactivationsr
   r   Zmodeling_outputsr   r   r   r   r   r   r   Zmodeling_utilsr   Zpytorch_utilsr   r   utilsr   r   Zconfiguration_mpnetr   Z
get_loggerr,   loggerr   Moduler/   rS   rs   r   r   r   r   r   r   r   r&   r   r   r   r   r   rD   __all__r)   r)   r)   r*   <module>   sP   $	
;I/ ZYGP\AK