
    fTh                        S SK r S SKrS SKrS SKJr  S SKJrJrJr  S SK	r	S SK	J
r
  S SKJrJrJr  SSKJr  SSKJrJrJrJrJrJrJrJr  SS	KJr  SS
KJrJr  SSKJ r J!r!J"r"  SSK#J$r$  \"RJ                  " \&5      r'S r( " S S\
RR                  5      r*\
RV                  \*S.r, " S S\
RR                  5      r- " S S\
RR                  5      r. " S S\
RR                  5      r/ " S S\
RR                  5      r0 " S S\
RR                  5      r1 " S S\
RR                  5      r2 " S S\
RR                  5      r3 " S  S!\
RR                  5      r4 " S" S#\
RR                  5      r5 " S$ S%\
RR                  5      r6 " S& S'\
RR                  5      r7 " S( S)\
RR                  5      r8 " S* S+\
RR                  5      r9 " S, S-\
RR                  5      r: " S. S/\
RR                  5      r; " S0 S1\
RR                  5      r< " S2 S3\
RR                  5      r= " S4 S5\
RR                  5      r>\! " S6 S7\5      5       r?\ " S8 S9\ 5      5       r@\! " S: S;\?5      5       rA\!" S<S=9 " S> S?\?5      5       rB\! " S@ SA\?5      5       rC " SB SC\
RR                  5      rD\!" SDS=9 " SE SF\?5      5       rE\!" SGS=9 " SH SI\?5      5       rF\! " SJ SK\?5      5       rG\! " SL SM\?5      5       rH\! " SN SO\?5      5       rI/ SPQrJg)Q    N)	dataclass)OptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputMultipleChoiceModelOutputNextSentencePredictorOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)ModelOutputauto_docstringlogging   )MobileBertConfigc           	          SSK nSSKnSSKn[        R                  R                  U5      n[        R                  SU 35        UR                  R                  U5      n/ n/ n	U H]  u  p[        R                  SU
 SU 35        UR                  R                  Xj5      nUR                  U
5        U	R                  U5        M_     [        X5       GH  u  pU
R                  SS5      n
U
R                  S	S
5      n
U
R                  SS5      n
U
R                  SS5      n
U
R!                  S5      n
[#        S U
 5       5      (       a)  [        R                  SSR%                  U
5       35        M  U nU
 H  nUR'                  SU5      (       a  UR!                  SU5      nOU/nUS   S:X  d	  US   S:X  a  [)        US5      nOZUS   S:X  d	  US   S:X  a  [)        US5      nO;US   S:X  a  [)        US5      nO%US   S:X  a  [)        US5      nO [)        XS   5      n[-        U5      S:  d  M  [/        US   5      nUU   nM     WSS S :X  a  [)        US5      nOUS:X  a  UR1                  U5      n UR2                  UR2                  :X  d"   S!UR2                   S"UR2                   S#35       e [        R                  S$U
 35        [8        R:                  " U5      Ul        GM     U $ ! [         a    [        R                  S5        e f = f! [*         a,    [        R                  SSR%                  U
5       35         GM  f = f! [4         a1  nU=R6                  UR2                  UR2                  4-  sl        e SnAff = f)%z'Load tf checkpoints in a pytorch model.r   NzLoading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see https://www.tensorflow.org/install/ for installation instructions.z&Converting TensorFlow checkpoint from zLoading TF weight z with shape 	ffn_layerffnFakeLayerNorm	LayerNormextra_output_weightszdense/kernelbert
mobilebert/c              3   ,   #    U  H
  nUS ;   v   M     g7f))adam_vadam_mAdamWeightDecayOptimizerAdamWeightDecayOptimizer_1global_stepN ).0ns     j/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/mobilebert/modeling_mobilebert.py	<genexpr>0load_tf_weights_in_mobilebert.<locals>.<genexpr>V   s      
 nns   z	Skipping z[A-Za-z]+_\d+z_(\d+)kernelgammaweightoutput_biasbetabiasoutput_weightssquad
classifier   r   i_embeddingszPointer shape z and array shape z mismatchedzInitialize PyTorch weight )renumpy
tensorflowImportErrorloggererrorospathabspathinfotrainlist_variablesload_variableappendzipreplacesplitanyjoin	fullmatchgetattrAttributeErrorlenint	transposeshapeAssertionErrorargstorch
from_numpydata)modelconfigtf_checkpoint_pathr=   nptftf_path	init_varsnamesarraysnamerV   arraypointerm_namescope_namesnumes                     r/   load_tf_weights_in_mobilebertrl   5   sW   
 ggoo01G
KK8	BC''0IEF (l5'BC&&w5Te	 ! 5)||K/||O[9||2NC||FL1zz#  

 
 
 KK)CHHTN#345F||,f55 hhy&9%h1~)[^w-F!'84Q=0KNf4L!'62Q#33!'84Q7*!'<8%g1~>G ;1$+a.)!#,+ , #$<=(gx0GxLL'E	==EKK/  /@[Y/ 	078''.c *d LI  Q	
 	b & KK)CHHTN+; <=  	FFw}}ekk22F	s5   K+ L4<M+!L1MM
N,M>>Nc                   f   ^  \ rS rSrSU 4S jjrS\R                  S\R                  4S jrSrU =r	$ )NoNorm   c                    > [         TU ]  5         [        R                  " [        R
                  " U5      5      U l        [        R                  " [        R                  " U5      5      U l        g N)	super__init__r   	ParameterrY   zerosr7   onesr4   )self	feat_sizeeps	__class__s      r/   rs   NoNorm.__init__   s@    LLY!78	ll5::i#89    input_tensorreturnc                 8    XR                   -  U R                  -   $ rq   )r4   r7   )rw   r}   s     r/   forwardNoNorm.forward   s    kk)DII55r|   )r7   r4   rq   
__name__
__module____qualname____firstlineno__rs   rY   Tensorr   __static_attributes____classcell__rz   s   @r/   rn   rn      s(    :
6ELL 6U\\ 6 6r|   rn   )
layer_normno_normc                      ^  \ rS rSrSrU 4S jr    SS\\R                     S\\R                     S\\R                     S\\R                     S\R                  4
S	 jjrS
rU =r$ )MobileBertEmbeddings   zGConstruct the embeddings from word, position and token_type embeddings.c                 b  > [         TU ]  5         UR                  U l        UR                  U l        UR                  U l        [
        R                  " UR                  UR                  UR                  S9U l	        [
        R                  " UR                  UR                  5      U l        [
        R                  " UR                  UR                  5      U l        U R                  (       a  SOSnU R                  U-  n[
        R                  " X1R                  5      U l        [         UR"                     " UR                  5      U l        [
        R&                  " UR(                  5      U l        U R-                  S[.        R0                  " UR                  5      R3                  S5      SS9  g )N)padding_idxr   r   position_ids)r   F)
persistent)rr   rs   trigram_inputembedding_sizehidden_sizer   	Embedding
vocab_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddingsLinearembedding_transformationNORM2FNnormalization_typer!   Dropouthidden_dropout_probdropoutregister_bufferrY   arangeexpand)rw   r]   embed_dim_multiplierembedded_input_sizerz   s       r/   rs   MobileBertEmbeddings.__init__   sD   #11$33!--!||F,=,=v?T?Tbhbubuv#%<<0N0NPVPbPb#c %'\\&2H2H&J\J\%]"$($6$6qA"114HH(*		2EGYGY(Z% !:!:;F<N<NOzz&"<"<= 	ELL)G)GHOOPWXej 	 	
r|   	input_idstoken_type_idsr   inputs_embedsr~   c           
      ,   Ub  UR                  5       nOUR                  5       S S nUS   nUc  U R                  S S 2S U24   nUc8  [        R                  " U[        R                  U R                  R
                  S9nUc  U R                  U5      nU R                  (       ah  [        R                  " [        R                  R                  US S 2SS 24   / SQSS9U[        R                  R                  US S 2S S24   / SQSS9/SS	9nU R                  (       d  U R                  U R                  :w  a  U R                  U5      nU R                  U5      nU R!                  U5      nXG-   U-   n	U R#                  U	5      n	U R%                  U	5      n	U	$ )
Nr   r   dtypedevice)r   r   r   r   r   r           )value)r   r   r   r   r   r   r;   dim)sizer   rY   ru   longr   r   r   catr   
functionalpadr   r   r   r   r   r!   r   )
rw   r   r   r   r   input_shape
seq_lengthr   r   
embeddingss
             r/   r   MobileBertEmbeddings.forward   s     #..*K',,.s3K ^
,,Q^<L!"[[EJJtO`O`OgOghN  00;M "IIMM%%mAqrE&:<NVY%Z!MM%%mAssF&;=OWZ%[
 M !4!48H8H!H 99-HM #66|D $ : :> J"8;PP
^^J/
\\*-
r|   )	r!   r   r   r   r   r   r   r   r   )NNNN)r   r   r   r   __doc__rs   r   rY   
LongTensorFloatTensorr   r   r   r   r   s   @r/   r   r      s    Q
0 155937590E,,-0 !!1!120 u//0	0
   1 120 
0 0r|   r   c                      ^  \ rS rSrU 4S jrS r   SS\R                  S\R                  S\R                  S\\R                     S\\R                     S	\\
   S
\\R                     4S jjrSrU =r$ )MobileBertSelfAttention   c                 r  > [         TU ]  5         UR                  U l        [        UR                  UR                  -  5      U l        U R                  U R
                  -  U l        [        R                  " UR                  U R                  5      U l	        [        R                  " UR                  U R                  5      U l
        [        R                  " UR                  (       a  UR                  OUR                  U R                  5      U l        [        R                  " UR                  5      U l        g rq   )rr   rs   num_attention_headsrT   true_hidden_sizeattention_head_sizeall_head_sizer   r   querykeyuse_bottleneck_attentionr   r   r   attention_probs_dropout_probr   rw   r]   rz   s     r/   rs    MobileBertSelfAttention.__init__   s    #)#=#= #&v'>'>A[A['[#\ !558P8PPYYv668J8JK
99V44d6H6HIYY'-'F'FF##FL^L^`d`r`r

 zz&"E"EFr|   c                     UR                  5       S S U R                  U R                  4-   nUR                  U5      nUR	                  SSSS5      $ )Nr   r   r;   r   r   )r   r   r   viewpermute)rw   xnew_x_shapes      r/   transpose_for_scores,MobileBertSelfAttention.transpose_for_scores   sL    ffhsmt'?'?AYAY&ZZFF;yyAq!$$r|   query_tensor
key_tensorvalue_tensorattention_mask	head_maskoutput_attentionsr~   c                    U R                  U5      nU R                  U5      nU R                  U5      n	U R                  U5      n
U R                  U5      nU R                  U	5      n[        R
                  " XR                  SS5      5      nU[        R                  " U R                  5      -  nUb  X-   n[        R                  R                  USS9nU R                  U5      nUb  X-  n[        R
                  " X5      nUR                  SSSS5      R                  5       nUR!                  5       S S U R"                  4-   nUR%                  U5      nU(       a  X4nU$ U4nU$ )Nr   r   r   r;   r   r   )r   r   r   r   rY   matmulrU   mathsqrtr   r   r   softmaxr   r   
contiguousr   r   r   )rw   r   r   r   r   r   r   mixed_query_layermixed_key_layermixed_value_layerquery_layer	key_layervalue_layerattention_scoresattention_probscontext_layernew_context_layer_shapeoutputss                     r/   r   MobileBertSelfAttention.forward   sc    !JJ|4((:. JJ|4//0AB--o>	//0AB !<<5H5HR5PQ+dii8P8P.QQ%/@--//0@b/I ,,7 -9O_B%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S%**+BC6G=2 O\M]r|   )r   r   r   r   r   r   r   NNN)r   r   r   r   rs   r   rY   r   r   r   boolr   r   r   r   r   s   @r/   r   r      s    G% 7;15,0$ll$ LL$ ll	$
 !!2!23$ E--.$ $D>$ 
u||	$ $r|   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )MobileBertSelfOutputi  c                 t  > [         TU ]  5         UR                  U l        [        R                  " UR
                  UR
                  5      U l        [        UR                     " UR
                  UR                  S9U l
        U R                  (       d&  [        R                  " UR                  5      U l        g g Nry   )rr   rs   use_bottleneckr   r   r   denser   r   layer_norm_epsr!   r   r   r   r   s     r/   rs   MobileBertSelfOutput.__init__  s    $33YYv668O8OP
 !:!:;F<S<SY_YnYno""::f&@&@ADL #r|   hidden_statesresidual_tensorr~   c                     U R                  U5      nU R                  (       d  U R                  U5      nU R                  X2-   5      nU$ rq   )r   r   r   r!   rw   r   r   layer_outputss       r/   r   MobileBertSelfOutput.forward  s>    

=1"" LL7M}'FGr|   )r!   r   r   r   r   r   s   @r/   r   r     s7    BU\\ ELL UZUaUa  r|   r   c                     ^  \ rS rSrU 4S jrS r   SS\R                  S\R                  S\R                  S\R                  S\\R                     S	\\R                     S
\\
   S\\R                     4S jjrSrU =r$ )MobileBertAttentioni'  c                    > [         TU ]  5         [        U5      U l        [	        U5      U l        [        5       U l        g rq   )rr   rs   r   rw   r   outputsetpruned_headsr   s     r/   rs   MobileBertAttention.__init__(  s0    +F3	*62Er|   c                 6   [        U5      S:X  a  g [        XR                  R                  U R                  R                  U R
                  5      u  p[        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l	        [        U R                  R                  USS9U R                  l        U R                  R                  [        U5      -
  U R                  l        U R                  R                  U R                  R                  -  U R                  l        U R
                  R                  U5      U l        g )Nr   r   r   )rS   r   rw   r   r   r  r   r   r   r   r  r   r   union)rw   headsindexs      r/   prune_headsMobileBertAttention.prune_heads.  s   u:?79900$))2O2OQUQbQb

 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:r|   r   r   r   layer_inputr   r   r   r~   c                 n    U R                  UUUUUU5      nU R                  US   U5      n	U	4USS  -   n
U
$ )Nr   r   )rw   r  )rw   r   r   r   r  r   r   r   self_outputsattention_outputr   s              r/   r   MobileBertAttention.forward@  sT     yy
  ;;|AD#%QR(88r|   )r  r  rw   r   )r   r   r   r   rs   r  rY   r   r   r   r   r   r   r   r   r   s   @r/   r  r  '  s    ";0 7;15,0ll LL ll	
 \\ !!2!23 E--. $D> 
u||	 r|   r  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MobileBertIntermediateiY  c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g rq   )rr   rs   r   r   r   intermediate_sizer   
isinstance
hidden_actstrr   intermediate_act_fnr   s     r/   rs   MobileBertIntermediate.__init__Z  s`    YYv668P8PQ
f''--'-f.?.?'@D$'-'8'8D$r|   r   r~   c                 J    U R                  U5      nU R                  U5      nU$ rq   r   r  rw   r   s     r/   r   MobileBertIntermediate.forwardb  s&    

=100?r|   r  r   r   s   @r/   r  r  Y  s(    9U\\ ell  r|   r  c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )OutputBottleneckih  c                 .  > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                     " UR
                  UR                  S9U l
        [        R                  " UR                  5      U l        g r   )rr   rs   r   r   r   r   r   r   r   r   r!   r   r   r   r   s     r/   rs   OutputBottleneck.__init__i  sh    YYv668J8JK
 !:!:;F<N<NTZTiTijzz&"<"<=r|   r   r   r~   c                 p    U R                  U5      nU R                  U5      nU R                  X2-   5      nU$ rq   )r   r   r!   r   s       r/   r   OutputBottleneck.forwardo  s5    

=1]3}'FGr|   )r!   r   r   r   r   s   @r/   r#  r#  h  s6    >U\\ ELL UZUaUa  r|   r#  c                      ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  S\R                  4S jrSrU =r	$ )	MobileBertOutputiv  c                   > [         TU ]  5         UR                  U l        [        R                  " UR
                  UR                  5      U l        [        UR                     " UR                  5      U l
        U R                  (       d&  [        R                  " UR                  5      U l        g [        U5      U l        g rq   )rr   rs   r   r   r   r  r   r   r   r   r!   r   r   r   r#  
bottleneckr   s     r/   rs   MobileBertOutput.__init__w  s    $33YYv779P9PQ
 !:!:;F<S<ST""::f&@&@ADL.v6DOr|   intermediate_statesresidual_tensor_1residual_tensor_2r~   c                     U R                  U5      nU R                  (       d&  U R                  U5      nU R                  XB-   5      nU$ U R                  XB-   5      nU R	                  XC5      nU$ rq   )r   r   r   r!   r+  )rw   r-  r.  r/  layer_outputs        r/   r   MobileBertOutput.forward  sj     zz"56""<<5L>>,*JKL   >>,*JKL??<KLr|   )r!   r+  r   r   r   r   r   s   @r/   r)  r)  v  sD    7
#(<<
DILL
ejeqeq
	
 
r|   r)  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )BottleneckLayeri  c                    > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                     " UR
                  UR                  S9U l
        g r   )rr   rs   r   r   r   intra_bottleneck_sizer   r   r   r   r!   r   s     r/   rs   BottleneckLayer.__init__  sR    YYv1163O3OP
 !:!:;F<X<X^d^s^str|   r   r~   c                 J    U R                  U5      nU R                  U5      nU$ rq   r   r!   )rw   r   r  s      r/   r   BottleneckLayer.forward  s$    jj/nn[1r|   r!   r   r   r   s   @r/   r4  r4    s)    u
U\\ ell  r|   r4  c                   h   ^  \ rS rSrU 4S jrS\R                  S\\R                     4S jrSr	U =r
$ )
Bottlenecki  c                    > [         TU ]  5         UR                  U l        UR                  U l        [	        U5      U l        U R                  (       a  [	        U5      U l        g g rq   )rr   rs   key_query_shared_bottleneckr   r4  input	attentionr   s     r/   rs   Bottleneck.__init__  sP    +1+M+M((.(G(G%$V,
++,V4DN ,r|   r   r~   c                     U R                  U5      nU R                  (       a  U4S-  $ U R                  (       a  U R                  U5      nX3X4$ XX4$ )N   )r@  r   r?  rA  )rw   r   bottlenecked_hidden_statesshared_attention_inputs       r/   r   Bottleneck.forward  sX    " &*ZZ%>"((.0144--%)^^M%B"*Mnn!-\\r|   )rA  r@  r?  r   r   r   r   r   rs   rY   r   r   r   r   r   r   s   @r/   r=  r=    s1    5]U\\ ]eELL6I ] ]r|   r=  c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )	FFNOutputi  c                    > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                     " UR
                  UR                  S9U l
        g r   )rr   rs   r   r   r  r   r   r   r   r   r!   r   s     r/   rs   FFNOutput.__init__  sR    YYv779P9PQ
 !:!:;F<S<SY_YnYnor|   r   r   r~   c                 N    U R                  U5      nU R                  X2-   5      nU$ rq   r9  r   s       r/   r   FFNOutput.forward  s'    

=1}'FGr|   r;  r   r   s   @r/   rJ  rJ    s7    p
U\\ ELL UZUaUa  r|   rJ  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )FFNLayeri  c                 b   > [         TU ]  5         [        U5      U l        [	        U5      U l        g rq   )rr   rs   r  intermediaterJ  r  r   s     r/   rs   FFNLayer.__init__  s'    26:'r|   r   r~   c                 J    U R                  U5      nU R                  X!5      nU$ rq   rR  r  )rw   r   intermediate_outputr  s       r/   r   FFNLayer.forward  s(    "//>$7Gr|   rU  r   r   s   @r/   rP  rP    s(    (
U\\ ell  r|   rP  c                      ^  \ rS rSrU 4S jr   S
S\R                  S\\R                     S\\R                     S\\	   S\
\R                     4
S jjrS	rU =r$ )MobileBertLayeri  c                   > [         TU ]  5         UR                  U l        UR                  U l        [	        U5      U l        [        U5      U l        [        U5      U l	        U R                  (       a  [        U5      U l        UR                  S:  aL  [        R                  " [        UR                  S-
  5       Vs/ s H  n[        U5      PM     sn5      U l        g g s  snf Nr   )rr   rs   r   num_feedforward_networksr  rA  r  rR  r)  r  r=  r+  r   
ModuleListrangerP  r   rw   r]   _rz   s      r/   rs   MobileBertLayer.__init__  s    $33(.(G(G%,V426:&v.(0DO**Q.}}fFeFehiFi@j%k@j1hv&6@j%klDH /%ks   =C r   r   r   r   r~   c           
         U R                   (       a  U R                  U5      u  pVpxO	U/S-  u  pVpxU R                  UUUUUUUS9n	U	S   n
U
4nU	SS  nU R                  S:w  a+  [	        U R
                  5       H  u  pU" U
5      n
X4-  nM     U R                  U
5      nU R                  XU5      nU4U-   [        R                  " S5      UUUUU
U4-   U-   nU$ )NrD  )r   r   r   i  )
r   r+  rA  r\  	enumerater   rR  r  rY   tensor)rw   r   r   r   r   r   r   r   r  self_attention_outputsr  sr   i
ffn_modulerV  r1  s                    r/   r   MobileBertLayer.forward  s+    BF//R_B`?LlKCP/TUBU?Ll!%/ "0 "
 2!4(,((A-!*488!4#-.>#? (( "5 #//0@A{{#6-XO T" #
  	 r|   )rA  r+  r   rR  r\  r  r   r   )r   r   r   r   rs   rY   r   r   r   r   r   r   r   r   r   s   @r/   rY  rY    su    m  7;15,0.||. !!2!23. E--.	.
 $D>. 
u||	. .r|   rY  c                      ^  \ rS rSrU 4S jr     SS\R                  S\\R                     S\\R                     S\\	   S\\	   S\\	   S	\
\\4   4S
 jjrSrU =r$ )MobileBertEncoderi  c                    > [         TU ]  5         [        R                  " [	        UR
                  5       Vs/ s H  n[        U5      PM     sn5      U l        g s  snf rq   )rr   rs   r   r]  r^  num_hidden_layersrY  layerr_  s      r/   rs   MobileBertEncoder.__init__  sB    ]]U6KcKcEd#eEdOF$;Ed#ef
#es   Ar   r   r   r   output_hidden_statesreturn_dictr~   c                 *   U(       a  SOS nU(       a  SOS n[        U R                  5       H4  u  pU(       a  Xq4-   nU
" UUX9   U5      nUS   nU(       d  M,  XS   4-   nM6     U(       a  Xq4-   nU(       d  [        S XU4 5       5      $ [        XUS9$ )Nr,   r   r   c              3   .   #    U  H  oc  M  Uv   M     g 7frq   r,   )r-   vs     r/   r0   ,MobileBertEncoder.forward.<locals>.<genexpr>9  s     h$Vq$Vs   	)last_hidden_stater   
attentions)rc  rn  tupler   )rw   r   r   r   r   rp  rq  all_hidden_statesall_attentionsrg  layer_moduler  s               r/   r   MobileBertEncoder.forward  s     #7BD0d(4OA#$58H$H!(!	M *!,M  !/3C2E!E  5    14D Dh]~$Vhhh+Yg
 	
r|   )rn  )NNFFT)r   r   r   r   rs   rY   r   r   r   r   r   r   r   r   r   r   r   s   @r/   rk  rk    s    g 7;15,1/4&*"
||"
 !!2!23"
 E--.	"

 $D>"
 'tn"
 d^"
 
uo%	&"
 "
r|   rk  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MobileBertPooleri?  c                    > [         TU ]  5         UR                  U l        U R                  (       a1  [        R
                  " UR                  UR                  5      U l        g g rq   )rr   rs   classifier_activationdo_activater   r   r   r   r   s     r/   rs   MobileBertPooler.__init__@  sH    !776#5#5v7I7IJDJ r|   r   r~   c                     US S 2S4   nU R                   (       d  U$ U R                  U5      n[        R                  " U5      nU$ )Nr   )r  r   rY   tanh)rw   r   first_token_tensorpooled_outputs       r/   r   MobileBertPooler.forwardF  sE     +1a40%% JJ'9:M!JJ}5M  r|   )r   r  r   r   s   @r/   r~  r~  ?  s)    K	!U\\ 	!ell 	! 	!r|   r~  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )!MobileBertPredictionHeadTransformiR  c                 b  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        UR                  [        5      (       a  [        UR                     U l
        OUR                  U l
        [        S   " UR                  UR                  S9U l        g )Nr   r   )rr   rs   r   r   r   r   r  r  r  r   transform_act_fnr   r   r!   r   s     r/   rs   *MobileBertPredictionHeadTransform.__init__S  s    YYv1163E3EF
f''--$*6+<+<$=D!$*$5$5D! .v/A/AvG\G\]r|   r   r~   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ rq   )r   r  r!   r   s     r/   r   )MobileBertPredictionHeadTransform.forward\  s4    

=1--m<}5r|   )r!   r   r  r   r   s   @r/   r  r  R  s)    ^U\\ ell  r|   r  c                   l   ^  \ rS rSrU 4S jrSS jrS\R                  S\R                  4S jrSr	U =r
$ )	MobileBertLMPredictionHeadic  c                   > [         TU ]  5         [        U5      U l        [        R
                  " UR                  UR                  UR                  -
  SS9U l	        [        R
                  " UR                  UR                  SS9U l
        [        R                  " [        R                  " UR                  5      5      U l        U R                  U R                  l        g )NF)r7   )rr   rs   r  	transformr   r   r   r   r   r   decoderrt   rY   ru   r7   r   s     r/   rs   #MobileBertLMPredictionHead.__init__d  s    :6B YYv00&2D2DvG\G\2\chi
yy!6!68I8IPUVLLV->->!?@	 IIr|   r~   c                 :    U R                   U R                  l         g rq   )r7   r  rw   s    r/   _tie_weights'MobileBertLMPredictionHead._tie_weightso  s     IIr|   r   c                    U R                  U5      nUR                  [        R                  " U R                  R
                  R                  5       U R                  R
                  /SS95      nXR                  R                  -  nU$ )Nr   r   )	r  r   rY   r   r  r4   tr   r7   r   s     r/   r   "MobileBertLMPredictionHead.forwardr  si    }5%,,UYY8K8K8M8M8OQUQ[Q[QbQb7cij-kl***r|   )r7   r  r   r  )r~   N)r   r   r   r   rs   r  rY   r   r   r   r   r   s   @r/   r  r  c  s-    	&&U\\ ell  r|   r  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MobileBertOnlyMLMHeadiy  c                 B   > [         TU ]  5         [        U5      U l        g rq   )rr   rs   r  predictionsr   s     r/   rs   MobileBertOnlyMLMHead.__init__z  s    5f=r|   sequence_outputr~   c                 (    U R                  U5      nU$ rq   r  )rw   r  prediction_scoress      r/   r   MobileBertOnlyMLMHead.forward~  s     ,,_=  r|   r  r   r   s   @r/   r  r  y  s(    >!u|| ! ! !r|   r  c                      ^  \ rS rSrU 4S jrS\R                  S\R                  S\\R                     4S jrSr	U =r
$ )MobileBertPreTrainingHeadsi  c                    > [         TU ]  5         [        U5      U l        [        R
                  " UR                  S5      U l        g Nr;   )rr   rs   r  r  r   r   r   seq_relationshipr   s     r/   rs   #MobileBertPreTrainingHeads.__init__  s4    5f= "		&*<*<a @r|   r  r  r~   c                 L    U R                  U5      nU R                  U5      nX44$ rq   r  r  )rw   r  r  r  seq_relationship_scores        r/   r   "MobileBertPreTrainingHeads.forward  s-     ,,_=!%!6!6}!E 88r|   r  rH  r   s   @r/   r  r    s=    A
9u|| 9ELL 9UZ[`[g[gUh 9 9r|   r  c                   &    \ rS rSr\r\rSrS r	Sr
g)MobileBertPreTrainedModeli  r$   c                    [        U[        R                  5      (       ak  UR                  R                  R                  SU R                  R                  S9  UR                  b%  UR                  R                  R                  5         gg[        U[        R                  5      (       ax  UR                  R                  R                  SU R                  R                  S9  UR                  b2  UR                  R                  UR                     R                  5         gg[        U[        R                  [        45      (       aJ  UR                  R                  R                  5         UR                  R                  R                  S5        g[        U[        5      (       a%  UR                  R                  R                  5         gg)zInitialize the weightsr   )meanstdNg      ?)r  r   r   r4   r[   normal_r]   initializer_ranger7   zero_r   r   r!   rn   fill_r  )rw   modules     r/   _init_weights'MobileBertPreTrainedModel._init_weights  s8   fbii(( MM&&CT[[5R5R&S{{&  &&( '--MM&&CT[[5R5R&S!!-""6#5#56<<> .v 677KK""$MM$$S) :;;KK""$ <r|   r,   N)r   r   r   r   r   config_classrl   load_tf_weightsbase_model_prefixr  r   r,   r|   r/   r  r    s    #L3O$%r|   r  c                       \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\R                     \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S'   S	rg)
MobileBertForPreTrainingOutputi  a  
Output type of [`MobileBertForPreTraining`].

Args:
    loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
        Total loss as the sum of the masked language modeling loss and the next sequence prediction
        (classification) loss.
    prediction_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
        Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
    seq_relationship_logits (`torch.FloatTensor` of shape `(batch_size, 2)`):
        Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation
        before SoftMax).
    hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
        shape `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
Nlossprediction_logitsseq_relationship_logitsr   rw  r,   )r   r   r   r   r   r  r   rY   r   __annotations__r  r  r   r   rw  r   r,   r|   r/   r  r    s~    2 )-D(5$$
%,59x 1 129;?Xe&7&78?8<M8E%"3"345<59Ju00129r|   r  c                   L  ^  \ rS rSrSrSU 4S jjrS rS rS r\	         SS\
\R                     S\
\R                     S	\
\R                     S
\
\R                     S\
\R                     S\
\R                     S\
\   S\
\   S\
\   S\\\4   4S jj5       rSrU =r$ )MobileBertModeli  z&
https://arxiv.org/pdf/2004.02984.pdf
c                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        U(       a  [        U5      OSU l        U R                  5         g)z^
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
N)
rr   rs   r]   r   r   rk  encoderr~  pooler	post_init)rw   r]   add_pooling_layerrz   s      r/   rs   MobileBertModel.__init__  sL    
 	 .v6(02C&v. 	r|   c                 .    U R                   R                  $ rq   r   r   r  s    r/   get_input_embeddings$MobileBertModel.get_input_embeddings  s    ...r|   c                 $    XR                   l        g rq   r  )rw   r   s     r/   set_input_embeddings$MobileBertModel.set_input_embeddings  s    */'r|   c                     UR                  5        H7  u  p#U R                  R                  U   R                  R	                  U5        M9     g)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
N)itemsr  rn  rA  r  )rw   heads_to_prunern  r  s       r/   _prune_headsMobileBertModel._prune_heads  s<    
 +002LELLu%//;;EB 3r|   r   r   r   r   r   r   rp  r   rq  r~   c
           	      f   Ub  UOU R                   R                  nUb  UOU R                   R                  nU	b  U	OU R                   R                  n	Ub  Ub  [	        S5      eUb"  U R                  X5        UR                  5       n
O"Ub  UR                  5       S S n
O[	        S5      eUb  UR                  OUR                  nUc  [        R                  " XS9nUc$  [        R                  " U
[        R                  US9nU R                  X*5      nU R                  XPR                   R                  5      nU R                  XX6S9nU R!                  UUUUUU	S9nUS   nU R"                  b  U R#                  U5      OS nU	(       d
  UU4US	S  -   $ [%        UUUR&                  UR(                  S
9$ )NzDYou cannot specify both input_ids and inputs_embeds at the same timer   z5You have to specify either input_ids or inputs_embeds)r   r   )r   r   r   r   )r   r   r   rp  rq  r   r   )rv  pooler_outputr   rw  )r]   r   rp  use_return_dict
ValueError%warn_if_padding_and_no_attention_maskr   r   rY   rv   ru   r   get_extended_attention_maskget_head_maskrm  r   r  r  r   r   rw  )rw   r   r   r   r   r   r   rp  r   rq  r   r   extended_attention_maskembedding_outputencoder_outputsr  r  s                    r/   r   MobileBertModel.forward  s    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66yQ#..*K&',,.s3KTUU%.%:!!@T@T!"ZZCN!"[[EJJvVN 150P0PQ_0m &&y++2O2OP	??> + 
 ,,2/!5# ' 
 *!,8<8OO4UY#]3oab6III)-')77&11	
 	
r|   )r]   r   r  r  )T)	NNNNNNNNN)r   r   r   r   r   rs   r  r  r  r   r   rY   r   r   r   r   r   r   r   r   r   r   s   @r/   r  r    s   /0C  156:59371559/3,0&*D
E,,-D
 !!2!23D
 !!1!12	D

 u//0D
 E--.D
   1 12D
 'tnD
 $D>D
 d^D
 
u00	1D
 D
r|   r  z
    MobileBert Model with two heads on top as done during the pretraining: a `masked language modeling` head and a
    `next sentence prediction (classification)` head.
    )custom_introc                     ^  \ rS rSrSS/rU 4S jrS rS rSS\\	   S\
R                  4U 4S	 jjjr\           SS
\\R                     S\\R                      S\\R                     S\\R                     S\\R                      S\\R                      S\\R                     S\\R                     S\\R                      S\\R                      S\\R                      S\\\4   4S jj5       rSrU =r$ )MobileBertForPreTrainingi5  cls.predictions.decoder.weightcls.predictions.decoder.biasc                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        U R                  5         g rq   )rr   rs   r  r$   r  clsr  r   s     r/   rs   !MobileBertForPreTraining.__init__>  s4     )&1-f5 	r|   c                 B    U R                   R                  R                  $ rq   r  r  r  r  s    r/   get_output_embeddings.MobileBertForPreTraining.get_output_embeddingsF      xx##+++r|   c                     XR                   R                  l        UR                  U R                   R                  l        g rq   r  r  r  r7   rw   new_embeddingss     r/   set_output_embeddings.MobileBertForPreTraining.set_output_embeddingsI  *    '5$$2$7$7!r|   new_num_tokensr~   c                    > U R                  U R                  R                  R                  USS9U R                  R                  l        [        TU ]  US9$ NT)r  
transposed)r  _get_resized_lm_headr  r  r   rr   resize_token_embeddingsrw   r  rz   s     r/   r  0MobileBertForPreTraining.resize_token_embeddingsM  sR    %)%>%>HH  &&~RV &? &
" w.n.MMr|   r   r   r   r   r   r   labelsnext_sentence_labelr   rp  rq  c                    Ub  UOU R                   R                  nU R                  UUUUUUU	U
US9	nUSS u  pU R                  X5      u  nnSnUbv  Ubs  [	        5       nU" UR                  SU R                   R                  5      UR                  S5      5      nU" UR                  SS5      UR                  S5      5      nUU-   nU(       d  UU4USS -   nUb  U4U-   $ U$ [        UUUUR                  UR                  S9$ )a  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
next_sentence_label (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
    (see `input_ids` docstring) Indices should be in `[0, 1]`:

    - 0 indicates sequence B is a continuation of sequence A,
    - 1 indicates sequence B is a random sequence.

Examples:

```python
>>> from transformers import AutoTokenizer, MobileBertForPreTraining
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("google/mobilebert-uncased")
>>> model = MobileBertForPreTraining.from_pretrained("google/mobilebert-uncased")

>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0)
>>> # Batch size 1
>>> outputs = model(input_ids)

>>> prediction_logits = outputs.prediction_logits
>>> seq_relationship_logits = outputs.seq_relationship_logits
```Nr   r   r   r   r   r   rp  rq  r;   r   )r  r  r  r   rw  )
r]   r  r$   r  r	   r   r   r  r   rw  )rw   r   r   r   r   r   r   r  r  r   rp  rq  r   r  r  r  r  
total_lossloss_fctmasked_lm_lossnext_sentence_lossr  s                         r/   r    MobileBertForPreTraining.forwardU  sC   V &1%<k$++B]B]//))%'/!5# " 

 *1!&48HH_4\11
"5"A')H%&7&<&<RAWAW&XZ`ZeZefhZijN!)*@*E*Eb!*LNaNfNfgiNj!k'*<<J')?@712;NF/9/EZMF*Q6Q-/$:!//))
 	
r|   r  r$   rq   NNNNNNNNNNN)r   r   r   r   _tied_weights_keysrs   r  r  r   rT   r   r   r  r   rY   r   r   r   r   r  r   r   r   r   s   @r/   r  r  5  s    ;<Z[,8Nhsm Nr|| N N  156:59371559-1:>9=<@37K
E,,-K
 !!2!23K
 !!1!12	K

 u//0K
 E--.K
   1 12K
 ))*K
 &e&6&67K
 $E$5$56K
 'u'8'89K
 e//0K
 
u44	5K
 K
r|   r  c                     ^  \ rS rSrSS/rU 4S jrS rS rSS\\	   S\
R                  4U 4S	 jjjr\          SS
\\R                     S\\R                      S\\R                     S\\R                     S\\R                      S\\R                      S\\R                     S\\   S\\   S\\   S\\\4   4S jj5       rSrU =r$ )MobileBertForMaskedLMi  r  r  c                    > [         TU ]  U5        [        USS9U l        [	        U5      U l        Xl        U R                  5         g NF)r  )rr   rs   r  r$   r  r  r]   r  r   s     r/   rs   MobileBertForMaskedLM.__init__  s;     )&EJ(0 	r|   c                 B    U R                   R                  R                  $ rq   r  r  s    r/   r  +MobileBertForMaskedLM.get_output_embeddings  r  r|   c                     XR                   R                  l        UR                  U R                   R                  l        g rq   r  r  s     r/   r  +MobileBertForMaskedLM.set_output_embeddings  r  r|   r  r~   c                    > U R                  U R                  R                  R                  USS9U R                  R                  l        [        TU ]  US9$ r  r  r  s     r/   r  -MobileBertForMaskedLM.resize_token_embeddings  sR    %)%>%>HH  &&~RV &? &
" w.n.MMr|   r   r   r   r   r   r   r  r   rp  rq  c                    U
b  U
OU R                   R                  n
U R                  UUUUUUUU	U
S9	nUS   nU R                  U5      nSnUbF  [	        5       nU" UR                  SU R                   R                  5      UR                  S5      5      nU
(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  S9$ )az  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
Nr  r   r   r;   r  logitsr   rw  )
r]   r  r$   r  r	   r   r   r   r   rw  )rw   r   r   r   r   r   r   r  r   rp  rq  r   r  r  r  r  r  s                    r/   r   MobileBertForMaskedLM.forward  s    ( &1%<k$++B]B]//))%'/!5# " 

 "!* HH_5')H%&7&<&<RAWAW&XZ`ZeZefhZijN')GABK7F3A3M^%.YSYY$!//))	
 	
r|   )r  r]   r$   rq   
NNNNNNNNNN)r   r   r   r   r  rs   r  r  r   rT   r   r   r  r   rY   r   r   r   r   r   r   r   r   r   r   s   @r/   r  r    sE   :<Z[,8Nhsm Nr|| N N  156:59371559-1,0/3&*2
E,,-2
 !!2!232
 !!1!12	2

 u//02
 E--.2
   1 122
 ))*2
 $D>2
 'tn2
 d^2
 
un$	%2
 2
r|   r  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MobileBertOnlyNSPHeadi  c                 n   > [         TU ]  5         [        R                  " UR                  S5      U l        g r  )rr   rs   r   r   r   r  r   s     r/   rs   MobileBertOnlyNSPHead.__init__  s'     "		&*<*<a @r|   r  r~   c                 (    U R                  U5      nU$ rq   r  )rw   r  r  s      r/   r   MobileBertOnlyNSPHead.forward  s    !%!6!6}!E%%r|   r"  r   r   s   @r/   r  r    s)    A&U\\ &ell & &r|   r  zZ
    MobileBert Model with a `next sentence prediction (classification)` head on top.
    c                   R  ^  \ rS rSrU 4S jr\          SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\
   S\\
   S\\
   S\\\4   4S jj5       rSrU =r$ )#MobileBertForNextSentencePredictioni  c                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        U R                  5         g rq   )rr   rs   r  r$   r  r  r  r   s     r/   rs   ,MobileBertForNextSentencePrediction.__init__  s4     )&1(0 	r|   r   r   r   r   r   r   r  r   rp  rq  r~   c                    SU;   a,  [         R                  " S[        5        UR                  S5      nU
b  U
OU R                  R
                  n
U R                  UUUUUUUU	U
S9	nUS   nU R                  U5      nSnUb2  [        5       nU" UR                  SS5      UR                  S5      5      nU
(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  S9$ )	a  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
    (see `input_ids` docstring) Indices should be in `[0, 1]`.

    - 0 indicates sequence B is a continuation of sequence A,
    - 1 indicates sequence B is a random sequence.

Examples:

```python
>>> from transformers import AutoTokenizer, MobileBertForNextSentencePrediction
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("google/mobilebert-uncased")
>>> model = MobileBertForNextSentencePrediction.from_pretrained("google/mobilebert-uncased")

>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
>>> encoding = tokenizer(prompt, next_sentence, return_tensors="pt")

>>> outputs = model(**encoding, labels=torch.LongTensor([1]))
>>> loss = outputs.loss
>>> logits = outputs.logits
```r  zoThe `next_sentence_label` argument is deprecated and will be removed in a future version, use `labels` instead.Nr  r   r   r;   r  )warningswarnFutureWarningpopr]   r  r$   r  r	   r   r   r   rw  )rw   r   r   r   r   r   r   r  r   rp  rq  kwargsr   r  r  r  r  r  s                     r/   r   +MobileBertForNextSentencePrediction.forward  s   R !F*MM%
 ZZ 56F%0%<k$++B]B]//))%'/!5# " 

  
!%-!8!')H!)*@*E*Eb!*LfkkZ\o!^,.<F7I7U')F2a[aa*#)!//))	
 	
r|   r
  r  )r   r   r   r   rs   r   r   rY   r   r   r   r   r   r   r   r   r   r   s   @r/   r%  r%    s     156:59371559-1,0/3&*O
E,,-O
 !!2!23O
 !!1!12	O

 u//0O
 E--.O
   1 12O
 ))*O
 $D>O
 'tnO
 d^O
 
u11	2O
 O
r|   r%  z
    MobileBert Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    c                   l  ^  \ rS rSrU 4S jr\          SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\	   S\\	   S\\	   S\
\\R                     \4   4S jj5       rSrU =r$ )#MobileBertForSequenceClassificationia  c                 r  > [         TU ]  U5        UR                  U l        Xl        [	        U5      U l        UR                  b  UR                  OUR                  n[        R                  " U5      U l
        [        R                  " UR                  UR                  5      U l        U R                  5         g rq   )rr   rs   
num_labelsr]   r  r$   classifier_dropoutr   r   r   r   r   r   r:   r  rw   r]   r3  rz   s      r/   rs   ,MobileBertForSequenceClassification.__init__i  s      ++)&1)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	r|   r   r   r   r   r   r   r  r   rp  rq  r~   c                 R   U
b  U
OU R                   R                  n
U R                  UUUUUUUU	U
S9	nUS   nU R                  U5      nU R	                  U5      nSnUGb  U R                   R
                  c  U R                  S:X  a  SU R                   l        OoU R                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                   l        OSU R                   l        U R                   R
                  S:X  aI  [        5       nU R                  S:X  a&  U" UR                  5       UR                  5       5      nOU" X5      nOU R                   R
                  S:X  a=  [        5       nU" UR                  SU R                  5      UR                  S5      5      nO,U R                   R
                  S:X  a  [        5       nU" X5      nU
(       d  U4USS -   nUb  U4U-   $ U$ [!        UUUR"                  UR$                  S	9$ )
ae  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Nr  r   
regressionsingle_label_classificationmulti_label_classificationr   r;   r  )r]   r  r$   r   r:   problem_typer2  r   rY   r   rT   r
   squeezer	   r   r   r   r   rw  )rw   r   r   r   r   r   r   r  r   rp  rq  r   r  r  r  r  r  s                    r/   r   +MobileBertForSequenceClassification.forwardx  s   ( &1%<k$++B]B]//))%'/!5# " 

  
]3/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'!//))	
 	
r|   )r:   r]   r   r$   r2  r  )r   r   r   r   rs   r   r   rY   r   r   r   r   r   r   r   r   r   s   @r/   r0  r0  a  s     -11515/3,004)-,0/3&*E
ELL)E
 !.E
 !.	E

 u||,E
 ELL)E
  -E
 &E
 $D>E
 'tnE
 d^E
 
uU\\"$<<	=E
 E
r|   r0  c                     ^  \ rS rSrU 4S jr\           SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\	   S\\	   S\\	   S\
\\R                     \4   4S jj5       rSrU =r$ )MobileBertForQuestionAnsweringi  c                    > [         TU ]  U5        UR                  U l        [        USS9U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r  )
rr   rs   r2  r  r$   r   r   r   
qa_outputsr  r   s     r/   rs   'MobileBertForQuestionAnswering.__init__  sU      ++)&EJ))F$6$68I8IJ 	r|   r   r   r   r   r   r   start_positionsend_positionsr   rp  rq  r~   c                 $   Ub  UOU R                   R                  nU R                  UUUUUUU	U
US9	nUS   nU R                  U5      nUR	                  SSS9u  nnUR                  S5      R                  5       nUR                  S5      R                  5       nS nUb  Ub  [        UR                  5       5      S:  a  UR                  S5      n[        UR                  5       5      S:  a  UR                  S5      nUR                  S5      nUR                  SU5      nUR                  SU5      n[        US9nU" X5      nU" UU5      nUU-   S-  nU(       d  UU4USS  -   nUb  U4U-   $ U$ [        UUUUR                  UR                  S9$ )	Nr  r   r   r   r   )ignore_indexr;   )r  start_logits
end_logitsr   rw  )r]   r  r$   r@  rM   r;  r   rS   r   clampr	   r   r   rw  )rw   r   r   r   r   r   r   rB  rC  r   rp  rq  r   r  r  rF  rG  r  ignored_indexr  
start_lossend_lossr  s                          r/   r   &MobileBertForQuestionAnswering.forward  s    &1%<k$++B]B]//))%'/!5# " 

 "!*1#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
M:H$x/14J"J/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
r|   )r$   r2  r@  r  )r   r   r   r   rs   r   r   rY   r   r   r   r   r   r   r   r   r   s   @r/   r>  r>    s     -11515/3,0042604,0/3&*>
ELL)>
 !.>
 !.	>

 u||,>
 ELL)>
  ->
 "%,,/>
  ->
 $D>>
 'tn>
 d^>
 
uU\\"$@@	A>
 >
r|   r>  c                   l  ^  \ rS rSrU 4S jr\          SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\	   S\\	   S\\	   S\
\\R                     \4   4S jj5       rSrU =r$ )MobileBertForMultipleChoicei  c                 0  > [         TU ]  U5        [        U5      U l        UR                  b  UR                  OUR
                  n[        R                  " U5      U l        [        R                  " UR                  S5      U l        U R                  5         g r[  )rr   rs   r  r$   r3  r   r   r   r   r   r   r:   r  r4  s      r/   rs   $MobileBertForMultipleChoice.__init__  su     )&1)/)B)B)NF%%TZTnTn 	 zz"45))F$6$6: 	r|   r   r   r   r   r   r   r  r   rp  rq  r~   c                 Z   U
b  U
OU R                   R                  n
Ub  UR                  S   OUR                  S   nUb!  UR                  SUR	                  S5      5      OSnUb!  UR                  SUR	                  S5      5      OSnUb!  UR                  SUR	                  S5      5      OSnUb!  UR                  SUR	                  S5      5      OSnUb1  UR                  SUR	                  S5      UR	                  S5      5      OSnU R                  UUUUUUUU	U
S9	nUS   nU R                  U5      nU R                  U5      nUR                  SU5      nSnUb  [        5       nU" X5      nU
(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  S9$ )a  
input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
    Indices of input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are input IDs?](../glossary#input-ids)
token_type_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
    Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
    1]`:

    - 0 corresponds to a *sentence A* token,
    - 1 corresponds to a *sentence B* token.

    [What are token type IDs?](../glossary#token-type-ids)
position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
    config.max_position_embeddings - 1]`.

    [What are position IDs?](../glossary#position-ids)
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
    Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
    is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
    model's internal embedding lookup matrix.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
    num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
    `input_ids` above)
Nr   r   r   r  r;   r  )r]   r  rV   r   r   r$   r   r:   r	   r   r   rw  )rw   r   r   r   r   r   r   r  r   rp  rq  num_choicesr   r  r  reshaped_logitsr  r  r  s                      r/   r   #MobileBertForMultipleChoice.forward   s   X &1%<k$++B]B],5,Aiooa(}GZGZ[\G]>G>SINN2y~~b'9:Y]	M[Mg,,R1D1DR1HImqM[Mg,,R1D1DR1HImqGSG_|((\->->r-BCei ( r=#5#5b#9=;M;Mb;QR 	 //))%'/!5# " 

  
]3/ ++b+6')HO4D%''!"+5F)-)9TGf$EvE("!//))	
 	
r|   )r:   r   r$   r  )r   r   r   r   rs   r   r   rY   r   r   r   r   r   r   r   r   r   s   @r/   rN  rN    s     -11515/3,004)-,0/3&*X
ELL)X
 !.X
 !.	X

 u||,X
 ELL)X
  -X
 &X
 $D>X
 'tnX
 d^X
 
uU\\"$==	>X
 X
r|   rN  c                   l  ^  \ rS rSrU 4S jr\          SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\	   S\\	   S\\	   S\
\\R                     \4   4S jj5       rSrU =r$ ) MobileBertForTokenClassificationi|  c                 d  > [         TU ]  U5        UR                  U l        [        USS9U l        UR
                  b  UR
                  OUR                  n[        R                  " U5      U l	        [        R                  " UR                  UR                  5      U l        U R                  5         g r  )rr   rs   r2  r  r$   r3  r   r   r   r   r   r   r:   r  r4  s      r/   rs   )MobileBertForTokenClassification.__init__  s      ++)&EJ)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	r|   r   r   r   r   r   r   r  r   rp  rq  r~   c                    U
b  U
OU R                   R                  n
U R                  UUUUUUUU	U
S9	nUS   nU R                  U5      nU R	                  U5      nSnUb<  [        5       nU" UR                  SU R                  5      UR                  S5      5      nU
(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  S9$ )z
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
Nr  r   r   r;   r  )r]   r  r$   r   r:   r	   r   r2  r   r   rw  )rw   r   r   r   r   r   r   r  r   rp  rq  r   r  r  r  r  r  s                    r/   r   (MobileBertForTokenClassification.forward  s    $ &1%<k$++B]B]//))%'/!5# " 

 "!*,,71')HFKKDOO<fkk"oNDY,F)-)9TGf$EvE$!//))	
 	
r|   )r:   r   r$   r2  r  )r   r   r   r   rs   r   r   rY   r   r   r   r   r   r   r   r   r   s   @r/   rV  rV  |  s     -11515/3,004)-,0/3&*2
ELL)2
 !.2
 !.	2

 u||,2
 ELL)2
  -2
 &2
 $D>2
 'tn2
 d^2
 
uU\\"$99	:2
 2
r|   rV  )r  rN  r%  r  r>  r0  rV  rY  r  r  rl   )Kr   rC   r)  dataclassesr   typingr   r   r   rY   r   torch.nnr   r	   r
   activationsr   modeling_outputsr   r   r   r   r   r   r   r   modeling_utilsr   pytorch_utilsr   r   utilsr   r   r   configuration_mobilebertr   
get_loggerr   rA   rl   Modulern   r!   r   r   r   r   r  r  r#  r)  r4  r=  rJ  rP  rY  rk  r~  r  r  r  r  r  r  r  r  r  r  r%  r0  r>  rN  rV  __all__r,   r|   r/   <module>rg     s  .  	  ! ) )   A A !	 	 	 . Q 9 9 6 
		H	%K\6RYY 6 &
9I299 IX7bii 7t299 "/")) /dRYY ryy ryy 0	bii 	!] !]H			 		ryy 	<bii <~'
		 '
T!ryy !&		 " ,!BII !	9 	9 % % %0 :[ : :B g
/ g
 g
T f
8 f
f
R M
5 M
 M
`&BII & 
Z
*C Z

Z
z V
*C V
V
r J
%> J
 J
Z g
"; g
 g
T B
'@ B
 B
Jr|   