
    fTh                     l   S r SSKrSSKJrJrJr  SSKrSSKJr  SSKJ	r	J
r
Jr  SSKJrJr  SSKJrJrJrJrJrJrJr  SS	KJr  SS
KJrJr  SSKJrJr  SSKJ r   \RB                  " \"5      r#\ " S S\5      5       r$ " S S\RJ                  5      r& " S S\RJ                  5      r' " S S\RJ                  5      r( " S S\RJ                  5      r) " S S\RJ                  5      r* " S S\RJ                  5      r+ " S S\RJ                  5      r, " S S\RJ                  5      r-\ " S  S!\$5      5       r. " S" S#\$5      r/ " S$ S%\RJ                  5      r0\" S&S'9 " S( S)\$5      5       r1\ " S* S+\$5      5       r2\ " S, S-\$5      5       r3 " S. S/\RJ                  5      r4\ " S0 S1\$5      5       r5S2 r6/ S3Qr7g)4zPyTorch MPNet model.    N)OptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FNgelu)BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputMultipleChoiceModelOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging   )MPNetConfigc                   "    \ rS rSr\rSrS rSrg)MPNetPreTrainedModel,   mpnetc                    [        U[        R                  5      (       ak  UR                  R                  R                  SU R                  R                  S9  UR                  b%  UR                  R                  R                  5         gg[        U[        R                  5      (       ax  UR                  R                  R                  SU R                  R                  S9  UR                  b2  UR                  R                  UR                     R                  5         gg[        U[        R                  5      (       aJ  UR                  R                  R                  5         UR                  R                  R                  S5        g[        U[        5      (       a%  UR                  R                  R                  5         gg)zInitialize the weightsg        )meanstdNg      ?)
isinstancer   Linearweightdatanormal_configinitializer_rangebiaszero_	Embeddingpadding_idx	LayerNormfill_MPNetLMHead)selfmodules     `/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/mpnet/modeling_mpnet.py_init_weights"MPNetPreTrainedModel._init_weights1   s2   fbii(( MM&&CT[[5R5R&S{{&  &&( '--MM&&CT[[5R5R&S!!-""6#5#56<<> .--KK""$MM$$S),,KK""$ -     N)	__name__
__module____qualname____firstlineno__r   config_classbase_model_prefixr3   __static_attributes__r6   r5   r2   r   r   ,   s    L%r5   r   c                   8   ^  \ rS rSrU 4S jrSS jrS rSrU =r$ )MPNetEmbeddingsD   c                 6  > [         TU ]  5         SU l        [        R                  " UR
                  UR                  U R                  S9U l        [        R                  " UR                  UR                  U R                  S9U l	        [        R                  " UR                  UR                  S9U l
        [        R                  " UR                  5      U l        U R                  S[         R"                  " UR                  5      R%                  S5      SS9  g )Nr   )r,   epsposition_ids)r   F)
persistent)super__init__r,   r   r+   
vocab_sizehidden_sizeword_embeddingsmax_position_embeddingsposition_embeddingsr-   layer_norm_epsDropouthidden_dropout_probdropoutregister_buffertorcharangeexpandr0   r'   	__class__s     r2   rH   MPNetEmbeddings.__init__E   s    !||F,=,=v?Q?Q_c_o_op#%<<**F,>,>DL\L\$
  f&8&8f>S>STzz&"<"<=ELL)G)GHOOPWXej 	 	
r5   c                 ~   Uc*  Ub  [        XR                  5      nOU R                  U5      nUb  UR                  5       nOUR                  5       S S nUS   nUc  U R                  S S 2S U24   nUc  U R                  U5      nU R                  U5      nX7-   nU R                  U5      nU R                  U5      nU$ )NrE   r   )	"create_position_ids_from_input_idsr,   &create_position_ids_from_inputs_embedssizerD   rK   rM   r-   rQ   )	r0   	input_idsrD   inputs_embedskwargsinput_shape
seq_lengthrM   
embeddingss	            r2   forwardMPNetEmbeddings.forwardS   s    $A)M]M]^#JJ=Y #..*K',,.s3K ^
,,Q^<L  00;M"66|D"8
^^J/
\\*-
r5   c                    UR                  5       SS nUS   n[        R                  " U R                  S-   X0R                  -   S-   [        R                  UR
                  S9nUR                  S5      R                  U5      $ )z
We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

Args:
    inputs_embeds: torch.Tensor

Returns: torch.Tensor
NrE   r   )dtypedevicer   )r\   rS   rT   r,   longrg   	unsqueezerU   )r0   r^   r`   sequence_lengthrD   s        r2   r[   6MPNetEmbeddings.create_position_ids_from_inputs_embedsm   s~     $((*3B/%a.||q /4D4D"Dq"HPUPZPZcpcwcw
 %%a(//<<r5   )r-   rQ   r,   rM   rK   )NNN)	r7   r8   r9   r:   rH   rc   r[   r=   __classcell__rW   s   @r2   r?   r?   D   s    
4= =r5   r?   c                   @   ^  \ rS rSrU 4S jrS r    SS jrSrU =r$ )MPNetSelfAttention   c                 @  > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eUR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  UR                  5      U l        [        R                  " UR                   5      U l        g )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ())rG   rH   rJ   num_attention_headshasattr
ValueErrorintattention_head_sizeall_head_sizer   r#   qkvorO   attention_probs_dropout_probrQ   rV   s     r2   rH   MPNetSelfAttention.__init__   s=    : ::a?PVXhHiHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PP6--t/A/AB6--t/A/AB6--t/A/AB6--v/A/ABzz&"E"EFr5   c                     UR                  5       S S U R                  U R                  4-   nUR                  " U6 nUR	                  SSSS5      $ )NrE   r      r   r
   )r\   rt   rx   viewpermute)r0   xnew_x_shapes      r2   transpose_for_scores'MPNetSelfAttention.transpose_for_scores   sL    ffhsmt'?'?AYAY&ZZFFK yyAq!$$r5   c                    U R                  U5      nU R                  U5      nU R                  U5      n	U R                  U5      nU R                  U5      nU R                  U	5      n	[        R
                  " XxR                  SS5      5      n
U
[        R                  " U R                  5      -  n
Ub  X-  n
Ub  X-   n
[        R                  R                  U
SS9nU R                  U5      nUb  X-  n[        R
                  " X5      nUR                  SSSS5      R                  5       nUR!                  5       S S U R"                  4-   nUR$                  " U6 nU R'                  U5      nU(       a  X4nU$ U4nU$ )NrE   dimr   r   r   r
   )rz   r{   r|   r   rS   matmul	transposemathsqrtrx   r   
functionalsoftmaxrQ   r   
contiguousr\   ry   r   r}   )r0   hidden_statesattention_mask	head_maskposition_biasoutput_attentionsr_   rz   r{   r|   attention_scoresattention_probscnew_c_shaper}   outputss                   r2   rc   MPNetSelfAttention.forward   sp    FF=!FF=!FF=!%%a(%%a(%%a( !<<;;r2+>?+dii8P8P.QQ $-%/@ --//0@b/I,,7 -9OLL,IIaAq!,,.ffhsmt'9'9&;;FFK FF1I*;1& CDr5   )ry   rx   rQ   r{   rt   r}   rz   r|   NNNF)	r7   r8   r9   r:   rH   r   rc   r=   rl   rm   s   @r2   ro   ro      s&    G&% - -r5   ro   c                   @   ^  \ rS rSrU 4S jrS r    SS jrSrU =r$ )MPNetAttention   c                   > [         TU ]  5         [        U5      U l        [        R
                  " UR                  UR                  S9U l        [        R                  " UR                  5      U l
        [        5       U l        g NrB   )rG   rH   ro   attnr   r-   rJ   rN   rO   rP   rQ   setpruned_headsrV   s     r2   rH   MPNetAttention.__init__   sX    &v.	f&8&8f>S>STzz&"<"<=Er5   c                 6   [        U5      S:X  a  g [        XR                  R                  U R                  R                  U R
                  5      u  p[        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l	        [        U R                  R                  USS9U R                  l
        U R                  R                  [        U5      -
  U R                  l        U R                  R                  U R                  R                  -  U R                  l        U R
                  R                  U5      U l        g )Nr   r   r   )lenr   r   rt   rx   r   r   rz   r{   r|   r}   ry   union)r0   headsindexs      r2   prune_headsMPNetAttention.prune_heads   s   u:?79900$))2O2OQUQbQb
 )e<		(e<		(e<		(eC		(,		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:r5   c                     U R                  UUUUUS9nU R                  U R                  US   5      U-   5      nU4USS  -   n	U	$ )N)r   r   r   )r   r-   rQ   )
r0   r   r   r   r   r   r_   self_outputsattention_outputr   s
             r2   rc   MPNetAttention.forward   sb     yy/ ! 
  >>$,,|A*G-*WX#%QR(88r5   )r-   r   rQ   r   r   )	r7   r8   r9   r:   rH   r   rc   r=   rl   rm   s   @r2   r   r      s%    ";&  r5   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MPNetIntermediate   c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g N)rG   rH   r   r#   rJ   intermediate_sizedenser"   
hidden_actstrr   intermediate_act_fnrV   s     r2   rH   MPNetIntermediate.__init__   s`    YYv1163K3KL
f''--'-f.?.?'@D$'-'8'8D$r5   r   returnc                 J    U R                  U5      nU R                  U5      nU$ r   r   r   )r0   r   s     r2   rc   MPNetIntermediate.forward   s&    

=100?r5   r   
r7   r8   r9   r:   rH   rS   Tensorrc   r=   rl   rm   s   @r2   r   r      s(    9U\\ ell  r5   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )MPNetOutputi  c                 (  > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g r   )rG   rH   r   r#   r   rJ   r   r-   rN   rO   rP   rQ   rV   s     r2   rH   MPNetOutput.__init__  s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=r5   r   input_tensorr   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   )r   rQ   r-   )r0   r   r   s      r2   rc   MPNetOutput.forward  s5    

=1]3}'CDr5   )r-   r   rQ   r   rm   s   @r2   r   r     s6    >U\\  RWR^R^  r5   r   c                   :   ^  \ rS rSrU 4S jr    SS jrSrU =r$ )
MPNetLayeri  c                    > [         TU ]  5         [        U5      U l        [	        U5      U l        [        U5      U l        g r   )rG   rH   r   	attentionr   intermediater   outputrV   s     r2   rH   MPNetLayer.__init__  s3    '/-f5!&)r5   c                     U R                  UUUUUS9nUS   nUSS  n	U R                  U5      n
U R                  X5      nU4U	-   n	U	$ )N)r   r   r   r   r   r   r   )r0   r   r   r   r   r   r_   self_attention_outputsr   r   intermediate_outputlayer_outputs               r2   rc   MPNetLayer.forward  sr     "&'/ "0 "
 2!4(,"//0@A{{#6I/G+r5   r   r   )r7   r8   r9   r:   rH   rc   r=   rl   rm   s   @r2   r   r     s     *  r5   r   c                      ^  \ rS rSrU 4S jr     SS\R                  S\\R                     S\\R                     S\S\S\4S	 jjr	SS
 jr
\SS j5       rSrU =r$ )MPNetEncoderi5  c                 J  > [         TU ]  5         Xl        UR                  U l        [
        R                  " [        UR                  5       Vs/ s H  n[        U5      PM     sn5      U l
        [
        R                  " UR                  U R                  5      U l        g s  snf r   )rG   rH   r'   rt   n_headsr   
ModuleListrangenum_hidden_layersr   layerr+   relative_attention_num_bucketsrelative_attention_bias)r0   r'   _rW   s      r2   rH   MPNetEncoder.__init__6  sw    11]]fF^F^@_#`@_1Jv$6@_#`a
')||F4Y4Y[_[g[g'h$ $as   B r   r   r   r   output_hidden_statesreturn_dictc                 T   U R                  U5      nU(       a  SOS n	U(       a  SOS n
[        U R                  5       H7  u  pU(       a  X4-   n	U" UUX;   U4SU0UD6nUS   nU(       d  M/  XS   4-   n
M9     U(       a  X4-   n	U(       d  [        S XU
4 5       5      $ [	        UU	U
S9$ )Nr6   r   r   r   c              3   .   #    U  H  oc  M  Uv   M     g 7fr   r6   ).0r|   s     r2   	<genexpr>'MPNetEncoder.forward.<locals>.<genexpr>`  s     h$Vq$Vs   	)last_hidden_stater   
attentions)compute_position_bias	enumerater   tupler   )r0   r   r   r   r   r   r   r_   r   all_hidden_statesall_attentionsilayer_modulelayer_outputss                 r2   rc   MPNetEncoder.forward=  s     22=A"6BD0d(4OA#$58H$H!(	
 #4 M *!,M  !/3C2E!E  5$   14D Dh]~$Vhhh++%
 	
r5   c                 P   UR                  S5      UR                  S5      UR                  S5      penUb  US S 2S S 2S 4   nUS S 2S S S 24   nOT[        R                  " U[        R                  S9S S 2S 4   n[        R                  " U[        R                  S9S S S 24   nX-
  n	U R	                  XS9n
U
R                  UR                  5      n
U R                  U
5      nUR                  / SQ5      R                  S5      nUR                  USXV45      R                  5       nU$ )Nr   r   )rf   )num_buckets)r   r   r   rE   )r\   rS   rT   rh   relative_position_buckettorg   r   r   ri   rU   r   )r0   r   rD   r   bszqlenklencontext_positionmemory_positionrelative_position	rp_bucketvaluess               r2   r   "MPNetEncoder.compute_position_biasg  s   &&)QVVAYq	4#+Aq$J7*1dA:6O$||D

CAtGL#ll4uzzB47KO+>112C1]	LL*	--i8	*44Q7R45@@Br5   c                     SnU * nUS-  nX4S:  R                  [        R                  5      U-  -  n[        R                  " U5      nUS-  nXE:  nU[        R                  " UR                  5       U-  5      [        R                  " X%-  5      -  X-
  -  R                  [        R                  5      -   n[        R                  " U[        R                  " XqS-
  5      5      nU[        R                  " XdU5      -  nU$ )Nr   r   r   )
r   rS   rh   abslogfloatr   min	full_likewhere)r   r   max_distanceretn	max_exactis_smallval_if_larges           r2   r   %MPNetEncoder.relative_position_buckety  s    Azz%**%33IIaL1$	= IIaggi)+,txx8P/QQU`Ulm
"UZZ. yyu|[\_/]^u{{855
r5   )r'   r   r   r   )NNFFF)N    )r     )r7   r8   r9   r:   rH   rS   r   r   boolrc   r   staticmethodr   r=   rl   rm   s   @r2   r   r   5  s    i 26,0"'%*!(
||(
 !.(
 ELL)	(

  (
 #(
 (
T$  r5   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MPNetPooleri  c                    > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " 5       U l        g r   )rG   rH   r   r#   rJ   r   Tanh
activationrV   s     r2   rH   MPNetPooler.__init__  s9    YYv1163E3EF
'')r5   r   r   c                 \    US S 2S4   nU R                  U5      nU R                  U5      nU$ Nr   )r   r  )r0   r   first_token_tensorpooled_outputs       r2   rc   MPNetPooler.forward  s6     +1a40

#566r5   )r  r   r   rm   s   @r2   r  r    s(    $
U\\ ell  r5   r  c                   B  ^  \ rS rSrSU 4S jjrS rS rS r\        SS\	\
R                     S\	\
R                     S\	\
R                     S	\	\
R                     S
\	\
R                     S\	\   S\	\   S\	\   S\\\
R                      \4   4S jj5       rSrU =r$ )
MPNetModeli  c                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        U(       a  [        U5      OSU l        U R                  5         g)z^
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
N)
rG   rH   r'   r?   rb   r   encoderr  pooler	post_init)r0   r'   add_pooling_layerrW   s      r2   rH   MPNetModel.__init__  sK    
 	 )&1#F+->k&)D 	r5   c                 .    U R                   R                  $ r   rb   rK   r0   s    r2   get_input_embeddingsMPNetModel.get_input_embeddings  s    ...r5   c                 $    XR                   l        g r   r)  )r0   values     r2   set_input_embeddingsMPNetModel.set_input_embeddings  s    */'r5   c                     UR                  5        H7  u  p#U R                  R                  U   R                  R	                  U5        M9     g)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
N)itemsr#  r   r   r   )r0   heads_to_pruner   r   s       r2   _prune_headsMPNetModel._prune_heads  s<    
 +002LELLu%//;;EB 3r5   r]   r   rD   r   r^   r   r   r   r   c	           	         Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUb  Ub  [	        S5      eUb"  U R                  X5        UR                  5       n
O"Ub  UR                  5       S S n
O[	        S5      eUb  UR                  OUR                  nUc  [        R                  " XS9nU R                  X*5      nU R                  X@R                   R                  5      nU R                  XUS9nU R                  UUUUUUS9nUS   nU R                  b  U R                  U5      OS nU(       d
  UU4USS  -   $ [!        UUUR"                  UR$                  S	9$ )
NzDYou cannot specify both input_ids and inputs_embeds at the same timerE   z5You have to specify either input_ids or inputs_embeds)rg   )r]   rD   r^   )r   r   r   r   r   r   r   )r   pooler_outputr   r   )r'   r   r   use_return_dictrv   %warn_if_padding_and_no_attention_maskr\   rg   rS   onesget_extended_attention_maskget_head_maskr   rb   r#  r$  r   r   r   )r0   r]   r   rD   r   r^   r   r   r   r_   r`   rg   extended_attention_maskembedding_outputencoder_outputssequence_outputr  s                    r2   rc   MPNetModel.forward  s    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66yQ#..*K&',,.s3KTUU%.%:!!@T@T!"ZZCN040P0PQ_0m&&y++2O2OP	??Yiv?w,,2/!5# ' 
 *!,8<8OO4UY#]3oab6III)-')77&11	
 	
r5   )r'   rb   r#  r$  )T)NNNNNNNN)r7   r8   r9   r:   rH   r+  r/  r4  r   r   rS   
LongTensorFloatTensorr  r   r   r   r   rc   r=   rl   rm   s   @r2   r!  r!    s    /0C  156:371559,0/3&*7
E,,-7
 !!2!237
 u//0	7

 E--.7
   1 127
 $D>7
 'tn7
 d^7
 
uU\\"$>>	?7
 7
r5   r!  c                   ^  ^  \ rS rSrS/rU 4S jrS rS r\         SS\	\
R                     S\	\
R                     S\	\
R                     S	\	\
R                     S
\	\
R                     S\	\
R                     S\	\   S\	\   S\	\   S\\\
R                      \4   4S jj5       rSrU =r$ )MPNetForMaskedLMi  zlm_head.decoderc                    > [         TU ]  U5        [        USS9U l        [	        U5      U l        U R                  5         g NF)r&  )rG   rH   r!  r   r/   lm_headr%  rV   s     r2   rH   MPNetForMaskedLM.__init__  s6     %@
"6* 	r5   c                 .    U R                   R                  $ r   )rH  decoderr*  s    r2   get_output_embeddings&MPNetForMaskedLM.get_output_embeddings  s    ||###r5   c                 Z    XR                   l        UR                  U R                   l        g r   )rH  rK  r)   )r0   new_embeddingss     r2   set_output_embeddings&MPNetForMaskedLM.set_output_embeddings  s    -*//r5   r]   r   rD   r   r^   labelsr   r   r   r   c
                    U	b  U	OU R                   R                  n	U R                  UUUUUUUU	S9n
U
S   nU R                  U5      nSnUbF  [	        5       nU" UR                  SU R                   R                  5      UR                  S5      5      nU	(       d  U4U
SS -   nUb  U4U-   $ U$ [        UUU
R                  U
R                  S9$ )az  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
Nr   rD   r   r^   r   r   r   r   rE   r   losslogitsr   r   )
r'   r8  r   rH  r   r   rI   r   r   r   )r0   r]   r   rD   r   r^   rR  r   r   r   r   r@  prediction_scoresmasked_lm_lossloss_fctr   s                   r2   rc   MPNetForMaskedLM.forward  s    & &1%<k$++B]B]**)%'/!5#  	
 "!* LL9')H%&7&<&<RAWAW&XZ`ZeZefhZijN')GABK7F3A3M^%.YSYY$!//))	
 	
r5   )rH  r   	NNNNNNNNN)r7   r8   r9   r:   _tied_weights_keysrH   rL  rP  r   r   rS   rB  rC  r  r   r   r   r   rc   r=   rl   rm   s   @r2   rE  rE    s   +,$0  156:371559-1,0/3&*0
E,,-0
 !!2!230
 u//0	0

 E--.0
   1 120
 ))*0
 $D>0
 'tn0
 d^0
 
uU\\"N2	30
 0
r5   rE  c                   8   ^  \ rS rSrSrU 4S jrS rS rSrU =r	$ )r/   i?  z5MPNet Head for masked and permuted language modeling.c                   > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  UR                  SS9U l
        [        R                  " [        R                  " UR                  5      5      U l        U R                  U R                  l        g )NrB   F)r)   )rG   rH   r   r#   rJ   r   r-   rN   
layer_normrI   rK  	ParameterrS   zerosr)   rV   s     r2   rH   MPNetLMHead.__init__B  s    YYv1163E3EF
,,v'9'9v?T?TUyy!3!3V5F5FUSLLV->->!?@	 !IIr5   c                 :    U R                   U R                  l         g r   )r)   rK  r*  s    r2   _tie_weightsMPNetLMHead._tie_weightsM  s     IIr5   c                     U R                  U5      n[        U5      nU R                  U5      nU R                  U5      nU$ r   )r   r   r`  rK  r0   featuresr_   r   s       r2   rc   MPNetLMHead.forwardP  s;    JJx GOOA LLOr5   )r)   rK  r   r`  )
r7   r8   r9   r:   __doc__rH   re  rc   r=   rl   rm   s   @r2   r/   r/   ?  s    ?	&& r5   r/   z
    MPNet Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
    output) e.g. for GLUE tasks.
    )custom_introc                   L  ^  \ rS rSrU 4S jr\         SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\
   S
\\
   S\\
   S\\\R                     \4   4S jj5       rSrU =r$ )MPNetForSequenceClassificationi[  c                    > [         TU ]  U5        UR                  U l        [        USS9U l        [        U5      U l        U R                  5         g rG  )rG   rH   
num_labelsr!  r   MPNetClassificationHead
classifierr%  rV   s     r2   rH   'MPNetForSequenceClassification.__init__b  sC      ++%@
1&9 	r5   r]   r   rD   r   r^   rR  r   r   r   r   c
                 .   U	b  U	OU R                   R                  n	U R                  UUUUUUUU	S9n
U
S   nU R                  U5      nSnUGb  U R                   R                  c  U R
                  S:X  a  SU R                   l        OoU R
                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                   l        OSU R                   l        U R                   R                  S:X  aI  [        5       nU R
                  S:X  a&  U" UR                  5       UR                  5       5      nOU" X5      nOU R                   R                  S:X  a=  [        5       nU" UR                  SU R
                  5      UR                  S5      5      nO,U R                   R                  S:X  a  [        5       nU" X5      nU	(       d  U4U
S	S -   nUb  U4U-   $ U$ [        UUU
R                   U
R"                  S
9$ )ae  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
NrT  r   r   
regressionsingle_label_classificationmulti_label_classificationrE   r   rU  )r'   r8  r   rr  problem_typerp  rf   rS   rh   rw   r	   squeezer   r   r   r   r   r   r0   r]   r   rD   r   r^   rR  r   r   r   r   r@  rW  rV  rZ  r   s                   r2   rc   &MPNetForSequenceClassification.forwardl  s   ( &1%<k$++B]B]**)%'/!5#  	
 "!*1{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'!//))	
 	
r5   )rr  r   rp  r\  )r7   r8   r9   r:   rH   r   r   rS   rB  rC  r  r   r   r   r   rc   r=   rl   rm   s   @r2   rn  rn  [  s	     156:371559-1,0/3&*A
E,,-A
 !!2!23A
 u//0	A

 E--.A
   1 12A
 ))*A
 $D>A
 'tnA
 d^A
 
uU\\"$<<	=A
 A
r5   rn  c                   L  ^  \ rS rSrU 4S jr\         SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\
   S
\\
   S\\
   S\\\R                     \4   4S jj5       rSrU =r$ )MPNetForMultipleChoicei  c                    > [         TU ]  U5        [        U5      U l        [        R
                  " UR                  5      U l        [        R                  " UR                  S5      U l
        U R                  5         g )Nr   )rG   rH   r!  r   r   rO   rP   rQ   r#   rJ   rr  r%  rV   s     r2   rH   MPNetForMultipleChoice.__init__  sV     '
zz&"<"<=))F$6$6: 	r5   r]   r   rD   r   r^   rR  r   r   r   r   c
                    U	b  U	OU R                   R                  n	Ub  UR                  S   OUR                  S   n
Ub!  UR                  SUR	                  S5      5      OSnUb!  UR                  SUR	                  S5      5      OSnUb!  UR                  SUR	                  S5      5      OSnUb1  UR                  SUR	                  S5      UR	                  S5      5      OSnU R                  UUUUUUUU	S9nUS   nU R                  U5      nU R                  U5      nUR                  SU
5      nSnUb  [        5       nU" UU5      nU	(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  S9$ )a(  
input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
    Indices of input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are input IDs?](../glossary#input-ids)
position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
    config.max_position_embeddings - 1]`.

    [What are position IDs?](../glossary#position-ids)
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
    Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
    is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
    model's internal embedding lookup matrix.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
    num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
    `input_ids` above)
Nr   rE   r   )rD   r   r   r^   r   r   r   r   rU  )r'   r8  shaper   r\   r   rQ   rr  r   r   r   r   )r0   r]   r   rD   r   r^   rR  r   r   r   num_choicesflat_input_idsflat_position_idsflat_attention_maskflat_inputs_embedsr   r  rW  reshaped_logitsrV  rZ  r   s                         r2   rc   MPNetForMultipleChoice.forward  s   H &1%<k$++B]B],5,Aiooa(}GZGZ[\G]CLCXINN2,>?^bLXLdL--b,2C2CB2GHjnR`Rln11"n6I6I"6MNrv ( r=#5#5b#9=;M;Mb;QR 	 ***.,/!5#  	
  
]3/ ++b+6')HOV4D%''!"+5F)-)9TGf$EvE("!//))	
 	
r5   )rr  rQ   r   r\  )r7   r8   r9   r:   rH   r   r   rS   rB  rC  r  r   r   r   r   rc   r=   rl   rm   s   @r2   r}  r}    s	     156:371559-1,0/3&*M
E,,-M
 !!2!23M
 u//0	M

 E--.M
   1 12M
 ))*M
 $D>M
 'tnM
 d^M
 
uU\\"$==	>M
 M
r5   r}  c                   L  ^  \ rS rSrU 4S jr\         SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\
   S
\\
   S\\
   S\\\R                     \4   4S jj5       rSrU =r$ )MPNetForTokenClassificationi  c                 .  > [         TU ]  U5        UR                  U l        [        USS9U l        [
        R                  " UR                  5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g rG  )rG   rH   rp  r!  r   r   rO   rP   rQ   r#   rJ   rr  r%  rV   s     r2   rH   $MPNetForTokenClassification.__init__  sk      ++%@
zz&"<"<=))F$6$68I8IJ 	r5   r]   r   rD   r   r^   rR  r   r   r   r   c
                    U	b  U	OU R                   R                  n	U R                  UUUUUUUU	S9n
U
S   nU R                  U5      nU R	                  U5      nSnUb<  [        5       nU" UR                  SU R                  5      UR                  S5      5      nU	(       d  U4U
SS -   nUb  U4U-   $ U$ [        UUU
R                  U
R                  S9$ )z
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
NrT  r   rE   r   rU  )r'   r8  r   rQ   rr  r   r   rp  r   r   r   rz  s                   r2   rc   #MPNetForTokenClassification.forward  s    $ &1%<k$++B]B]**)%'/!5#  	
 "!*,,71')HFKKDOO<fkk"oNDY,F)-)9TGf$EvE$!//))	
 	
r5   )rr  rQ   r   rp  r\  )r7   r8   r9   r:   rH   r   r   rS   rB  rC  r  r   r   r   r   rc   r=   rl   rm   s   @r2   r  r    s    	  156:371559-1,0/3&*1
E,,-1
 !!2!231
 u//0	1

 E--.1
   1 121
 ))*1
 $D>1
 'tn1
 d^1
 
uU\\"$99	:1
 1
r5   r  c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )rq  iP  z-Head for sentence-level classification tasks.c                 ,  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  5      U l        [        R                  " UR                  UR                  5      U l
        g r   )rG   rH   r   r#   rJ   r   rO   rP   rQ   rp  out_projrV   s     r2   rH    MPNetClassificationHead.__init__S  s`    YYv1163E3EF
zz&"<"<=		&"4"4f6G6GHr5   c                     US S 2SS S 24   nU R                  U5      nU R                  U5      n[        R                  " U5      nU R                  U5      nU R	                  U5      nU$ r  )rQ   r   rS   tanhr  rh  s       r2   rc   MPNetClassificationHead.forwardY  sY    Q1WLLOJJqMJJqMLLOMM!r5   )r   rQ   r  )	r7   r8   r9   r:   rk  rH   rc   r=   rl   rm   s   @r2   rq  rq  P  s    7I r5   rq  c                   l  ^  \ rS rSrU 4S jr\          SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\
   S\\
   S\\
   S\\\R                     \4   4S jj5       rSrU =r$ )MPNetForQuestionAnsweringic  c                    > [         TU ]  U5        UR                  U l        [        USS9U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g rG  )
rG   rH   rp  r!  r   r   r#   rJ   
qa_outputsr%  rV   s     r2   rH   "MPNetForQuestionAnswering.__init__e  sU      ++%@
))F$6$68I8IJ 	r5   r]   r   rD   r   r^   start_positionsend_positionsr   r   r   r   c                    U
b  U
OU R                   R                  n
U R                  UUUUUUU	U
S9nUS   nU R                  U5      nUR	                  SSS9u  pUR                  S5      R                  5       nUR                  S5      R                  5       nS nUb  Ub  [        UR                  5       5      S:  a  UR                  S5      n[        UR                  5       5      S:  a  UR                  S5      nUR                  S5      nUR                  SU5      nUR                  SU5      n[        US9nU" X5      nU" X5      nUU-   S-  nU
(       d  X4USS  -   nUb  U4U-   $ U$ [        UUUUR                  UR                  S9$ )	NrT  r   r   rE   r   )ignore_indexr   )rV  start_logits
end_logitsr   r   )r'   r8  r   r  splitry  r   r   r\   clampr   r   r   r   )r0   r]   r   rD   r   r^   r  r  r   r   r   r   r@  rW  r  r  
total_lossignored_indexrZ  
start_lossend_lossr   s                         r2   rc   !MPNetForQuestionAnswering.forwardo  s    &1%<k$++B]B]**)%'/!5#  	
 "!*1#)<<r<#: #++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
:H$x/14J"/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
r5   )r   rp  r  )
NNNNNNNNNN)r7   r8   r9   r:   rH   r   r   rS   rB  rC  r  r   r   r   r   rc   r=   rl   rm   s   @r2   r  r  c  s     156:3715596:48,0/3&*<
E,,-<
 !!2!23<
 u//0	<

 E--.<
   1 12<
 "%"2"23<
   0 01<
 $D><
 'tn<
 d^<
 
uU\\"$@@	A<
 <
r5   r  c                     U R                  U5      R                  5       n[        R                  " USS9R	                  U5      U-  nUR                  5       U-   $ )z
Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
are ignored. This is modified from fairseq's `utils.make_positions`. :param torch.Tensor x: :return torch.Tensor:
r   r   )nerw   rS   cumsumtype_asrh   )r]   r,   maskincremental_indicess       r2   rZ   rZ     sP     <<$((*D,,t3;;DADH##%33r5   )rE  r}  r  rn  r  r   r!  r   )8rk  r   typingr   r   r   rS   r   torch.nnr   r   r	   activationsr   r   modeling_outputsr   r   r   r   r   r   r   modeling_utilsr   pytorch_utilsr   r   utilsr   r   configuration_mpnetr   
get_loggerr7   loggerr   Moduler?   ro   r   r   r   r   r   r  r!  rE  r/   rn  r}  r  rq  r  rZ   __all__r6   r5   r2   <module>r     s      ) )   A A '   . Q , , 
		H	% %? % %.8=bii 8=vF FR+RYY +^		  "))  @V299 Vt"))  V
% V
 V
rD
+ D
N")) 8 M
%9 M
M
` Y
1 Y
 Y
x >
"6 >
 >
Bbii & H
 4 H
 H
V4	r5   