
    fTh9                        S r SSKJrJrJr  SSKrSSKrSSKJr  SSKJ	r	J
r
Jr  SSKJr  SSKJrJrJrJrJr  SS	KJr  SS
KJrJr  SSKJr  \R6                  " \5      r " S S\R<                  5      r " S S\R<                  5      r \RB                  RD                  S 5       r#\RB                  RD                  S 5       r$\RB                  RD                  S 5       r%\RB                  RD                  S 5       r&\RB                  RD                  S\RN                  S\(4S j5       r)\RB                  RD                  S\RN                  S\RN                  4S j5       r*\RB                  RD                  S\RN                  S\RN                  S\(4S j5       r+\RB                  RD                  S\RN                  S\RN                  4S j5       r, " S S\R<                  5      r- " S S \R<                  5      r. " S! S"\R<                  5      r/ " S# S$\R<                  5      r0 " S% S&\R<                  5      r1 " S' S(\R<                  5      r2 " S) S*\R<                  5      r3\ " S+ S,\5      5       r4\ " S- S.\45      5       r5 " S/ S0\R<                  5      r6 " S1 S2\R<                  5      r7 " S3 S4\R<                  5      r8 " S5 S6\R<                  5      r9 " S7 S8\R<                  5      r:\ " S9 S:\45      5       r; " S; S<\R<                  5      r<\" S=S>9 " S? S@\45      5       r=\ " SA SB\45      5       r>\ " SC SD\45      5       r?/ SEQr@g)FzPyTorch DeBERTa model.    )OptionalTupleUnionN)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputMaskedLMOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)auto_docstringlogging   )DebertaConfigc                   6   ^  \ rS rSrSrSU 4S jjrS rSrU =r$ )DebertaLayerNorm(   zBLayerNorm module in the TF style (epsilon inside the square root).c                    > [         TU ]  5         [        R                  " [        R
                  " U5      5      U l        [        R                  " [        R                  " U5      5      U l        X l	        g N)
super__init__r   	Parametertorchonesweightzerosbiasvariance_epsilon)selfsizeeps	__class__s      d/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/deberta/modeling_deberta.pyr   DebertaLayerNorm.__init__+   sF    ll5::d#34LLT!23	 #    c                 H   UR                   nUR                  5       nUR                  SSS9nX-
  R                  S5      R                  SSS9nX-
  [        R
                  " X@R                  -   5      -  nUR                  U5      nU R                  U-  U R                  -   nU$ )NT)keepdim   )
dtypefloatmeanpowr   sqrtr#   tor    r"   )r$   hidden_states
input_typer1   varianceys         r(   forwardDebertaLayerNorm.forward1   s    "((
%++-!!"d!3!(--a055b$5G&-HG\G\<\1]]%((4KK-'$))3r*   )r"   r#   r    )g-q=	__name__
__module____qualname____firstlineno____doc__r   r9   __static_attributes____classcell__r'   s   @r(   r   r   (   s    L$ r*   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )DebertaSelfOutput<   c                   > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        UR                  UR                  5      U l        [        R                  " UR                  5      U l        g r   )r   r   r   Linearhidden_sizedenser   layer_norm_eps	LayerNormDropouthidden_dropout_probdropoutr$   configr'   s     r(   r   DebertaSelfOutput.__init__=   s\    YYv1163E3EF
)&*<*<f>S>STzz&"<"<=r*   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   rJ   rO   rL   r$   r5   input_tensors      r(   r9   DebertaSelfOutput.forwardC   5    

=1]3}'CDr*   )rL   rJ   rO   r<   r=   r>   r?   r   r9   rA   rB   rC   s   @r(   rE   rE   <   s    > r*   rE   c                    U R                  S5      nUR                  S5      n[        R                  " U[        R                  U R                  S9n[        R                  " U[        R                  UR                  S9nUSS2S4   UR                  SS5      R                  US5      -
  nUSU2SS24   nUR                  S5      nU$ )a  
Build relative position according to the query and key

We assume the absolute position of query \(P_q\) is range from (0, query_size) and the absolute position of key
\(P_k\) is range from (0, key_size), The relative positions from query to key is \(R_{q \rightarrow k} = P_q -
P_k\)

Args:
    query_size (int): the length of query
    key_size (int): the length of key

Return:
    `torch.LongTensor`: A tensor with shape [1, query_size, key_size]

r/   deviceNr   r,   r   )r%   r   arangelongr]   viewrepeat	unsqueeze)query_layer	key_layer
query_sizekey_sizeq_idsk_idsrel_pos_idss          r(   build_relative_positionrj   J   s    $ !!"%J~~b!HLL5::k>P>PQELLI<L<LME4.5::a#4#;#;J#JJKkzk1n-K''*Kr*   c                     U R                  UR                  S5      UR                  S5      UR                  S5      UR                  S5      /5      $ )Nr   r   r.   r,   expandr%   )c2p_posrc   relative_poss      r(   c2p_dynamic_expandrp   g   sI    >>;++A.0@0@0C[EUEUVWEXZfZkZklnZopqqr*   c                     U R                  UR                  S5      UR                  S5      UR                  S5      UR                  S5      /5      $ )Nr   r   r[   rl   )rn   rc   rd   s      r(   p2c_dynamic_expandrr   l   sG    >>;++A.0@0@0CY^^TVEWYbYgYghjYklmmr*   c                     U R                  UR                  5       S S U R                  S5      UR                  S5      4-   5      $ )Nr.   r[   rl   )	pos_indexp2c_attrd   s      r(   pos_dynamic_expandrv   q   s=    GLLN2A.)..2DinnUWFX1YYZZr*   rc   scale_factorc                     [         R                  " [         R                  " U R                  S5      [         R                  S9U-  5      $ )Nr,   r/   )r   r3   tensorr%   r0   )rc   rw   s     r(   scaled_size_sqrtr{   y   s0    ::ell;#3#3B#7u{{KlZ[[r*   rd   c                 d    U R                  S5      UR                  S5      :w  a  [        X5      $ U$ Nr[   )r%   rj   )rc   rd   ro   s      r(   
build_rposr~   ~   s/    y~~b11&{>>r*   max_relative_positionsc           
          [         R                  " [        [        U R	                  S5      UR	                  S5      5      U5      5      $ r}   )r   rz   minmaxr%   )rc   rd   r   s      r(   compute_attention_spanr      s4    <<C 0 0 4innR6HIKabccr*   c           	          UR                  S5      UR                  S5      :w  a>  US S 2S S 2S S 2S4   R                  S5      n[        R                  " U S[	        X@U5      S9$ U $ )Nr[   r   r,   r.   dimindex)r%   rb   r   gatherrv   )ru   rc   rd   ro   rt   s        r(   uneven_size_correctedr      s\    y~~b11 Aq!,66r:	||G2DYYb2cddr*   c                     ^  \ rS rSrSrU 4S jrS r    SS\R                  S\R                  S\	S\
\R                     S	\
\R                     S
\
\R                     S\\R                  \
\R                     4   4S jjrS\R                  S\R                  S	\R                  S
\R                  S\4
S jrSrU =r$ )DisentangledSelfAttention   z
Disentangled self-attention module

Parameters:
    config (`str`):
        A model config class instance with the configuration to build a new model. The schema is similar to
        *BertConfig*, for more details, please refer [`DebertaConfig`]

c                 n  > [         TU ]  5         UR                  UR                  -  S:w  a&  [	        SUR                   SUR                   S35      eUR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l        [        R                  " UR                  U R                  S-  SS9U l
        [        R                  " [        R                  " U R                  [        R                  S95      U l        [        R                  " [        R                  " U R                  [        R                  S95      U l        UR"                  b  UR"                  O/ U l        [%        US	S5      U l        [%        US
S5      U l        U R(                  (       a_  [        R                  " UR                  UR                  SS9U l        [        R                  " UR                  UR                  SS9U l        OS U l        S U l        U R&                  (       a  [%        USS5      U l        U R.                  S:  a  UR0                  U l        [        R2                  " UR4                  5      U l        SU R"                  ;   a/  [        R                  " UR                  U R                  SS9U l        SU R"                  ;   a0  [        R                  " UR                  U R                  5      U l        [        R2                  " UR<                  5      U l        g )Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()r
   Fr"   ry   relative_attentiontalking_headr   r,   r   c2pp2c) r   r   rI   num_attention_heads
ValueErrorintattention_head_sizeall_head_sizer   rH   in_projr   r   r!   r0   q_biasv_biaspos_att_typegetattrr   r   head_logits_projhead_weights_projr   max_position_embeddingsrM   rN   pos_dropoutpos_proj
pos_q_projattention_probs_dropout_probrO   rP   s     r(   r   "DisentangledSelfAttention.__init__   sm    : ::a?#F$6$6#7 8 445Q8  $*#=#= #&v'9'9F<V<V'V#W !558P8PPyy!3!3T5G5G!5KRWXll5;;0B0B5;;#WXll5;;0B0B5;;#WX393F3F3RF//XZ")&2F"N#FNEB$&IIf.H.H&JdJdkp$qD!%'YYv/I/I6KeKelq%rD"$(D!%)D"""*1&:RTV*WD'**Q..4.L.L+!zz&*D*DED))) "		&*<*<d>P>PW\ ])))"$))F,>,>@R@R"Szz&"E"EFr*   c                     UR                  5       S S U R                  S4-   nUR                  U5      nUR                  SSSS5      $ )Nr,   r   r.   r   r
   )r%   r   r`   permute)r$   xnew_x_shapes      r(   transpose_for_scores.DisentangledSelfAttention.transpose_for_scores   sF    ffhsmt'?'?&DDFF;yyAq!$$r*   r5   attention_maskoutput_attentionsquery_statesro   rel_embeddingsreturnc                 J   Uc5  U R                  U5      nU R                  U5      R                  SSS9u  pn
GOzU R                   R                  R                  U R                  S-  SS9n[        S5       VVs/ s HD  n[        R                  " [        U R                  5       Vs/ s H  oUS-  U-      PM     snSS9PMF     nnn[        R                  " US   UR                  5       R                  US   R                  S95      n[        R                  " US   UR                  5       R                  US   R                  S95      n[        R                  " US   UR                  5       R                  US   R                  S95      nXU4 Vs/ s H  nU R                  U5      PM     snu  pn
XR                  U R                  SSSS24   5      -   nXR                  U R                  SSSS24   5      -   n
SnS[        U R                  5      -   n[!        UU5      nUUR                  UR                  S9-  n[        R                  " XR#                  SS	5      5      nU R$                  (       a*  Ub'  Ub$  U R'                  U5      nU R)                  XXVU5      nUb  UU-   nU R*                  b5  U R+                  UR-                  SSSS5      5      R-                  SSSS5      nUR/                  5       nUR1                  U) [        R2                  " UR                  5      R4                  5      n[6        R8                  R;                  USS9nU R=                  U5      nU R>                  b5  U R?                  UR-                  SSSS5      5      R-                  SSSS5      n[        R                  " UU
5      nUR-                  SSSS5      RA                  5       nURC                  5       SS	 S
-   nURE                  U5      nU(       d  US4$ UU4$ s  snf s  snnf s  snf )a1  
Call the module

Args:
    hidden_states (`torch.FloatTensor`):
        Input states to the module usually the output from previous layer, it will be the Q,K and V in
        *Attention(Q,K,V)*

    attention_mask (`torch.BoolTensor`):
        An attention mask matrix of shape [*B*, *N*, *N*] where *B* is the batch size, *N* is the maximum
        sequence length in which element [i,j] = *1* means the *i* th token in the input can attend to the *j*
        th token.

    output_attentions (`bool`, *optional*):
        Whether return the attention matrix.

    query_states (`torch.FloatTensor`, *optional*):
        The *Q* state in *Attention(Q,K,V)*.

    relative_pos (`torch.LongTensor`):
        The relative position encoding between the tokens in the sequence. It's of shape [*B*, *N*, *N*] with
        values ranging in [*-max_relative_positions*, *max_relative_positions*].

    rel_embeddings (`torch.FloatTensor`):
        The embedding of relative distances. It's a tensor of shape [\(2 \times
        \text{max_relative_positions}\), *hidden_size*].


Nr
   r,   r   r   ry   r   r.   r[   )r,   )#r   r   chunkr    r   ranger   catmatmultr4   r/   r   r   lenr   r{   	transposer   r   disentangled_att_biasr   r   boolmasked_fillfinfor   r   
functionalsoftmaxrO   r   
contiguousr%   r`   )r$   r5   r   r   r   ro   r   qprc   rd   value_layerwskiqkvwqvr   rel_attrw   scaleattention_scoresattention_probscontext_layernew_context_layer_shapes                            r(   r9   !DisentangledSelfAttention.forward   s   L m,B262K2KB2O2U2UVW]_2U2`/KK$$**4+C+Ca+GQ*OBhmnohpqhpcdEIIeD<T<T6UV6U!a%!)}6UV\]^hpDqT!Wlnn&6&9&9Q&9&NOAT!Wmoo&7&:&:a&:&OPAT!Wmoo&7&:&:a&:&OPAZ[`aYb2cYbTU43L3LQ3OYb2c/KK!$=$=dkk$PTVW->X$YY!$=$=dkk$PTVW->X$YY3t0011 l;!EHH;3D3DH$EE <<5H5HR5PQ""~'AlF^!--n=N00gstG/'9   ,#445E5M5MaQRTUWX5YZbbcdfgijlmn',,.+77.8I5;;WbWhWhKiKmKmn--//0@b/I,,7!!-"44_5L5LQPQSTVW5XYaabcefhiklmO_kB%--aAq9DDF"/"4"4"6s";e"C%**+BC !4((//U Wq 3ds   8-P%P9
PP Prc   rd   rw   c           	         Uc  [        XUR                  5      nUR                  5       S:X  a!  UR                  S5      R                  S5      nOVUR                  5       S:X  a  UR                  S5      nO0UR                  5       S:w  a  [	        SUR                  5        35      e[        XU R                  5      nUR                  5       nUU R                  U-
  U R                  U-   2S S 24   R                  S5      nSnSU R                  ;   a  U R                  U5      nU R                  U5      n[        R                  " XR                  SS	5      5      n	[        R                  " X6-   SUS-  S-
  5      n
[        R                  " U	S[!        XU5      S
9n	Xy-  nSU R                  ;   a  U R#                  U5      nU R                  U5      nU[%        X5      -  n['        UUU5      n[        R                  " U* U-   SUS-  S-
  5      n[        R                  " X+R                  SS	5      R)                  UR*                  S95      n[        R                  " US[-        XU5      S
9R                  SS	5      n[/        XX#5      nX~-  nU$ )Nr.   r   r
   r      z2Relative position ids must be of dim 2 or 3 or 4. r   r,   r[   r   r   ry   )rj   r]   r   rb   r   r   r   r_   r   r   r   r   r   r   clampr   rp   r   r{   r~   r4   r/   rr   r   )r$   rc   rd   ro   r   rw   att_spanscorepos_key_layerc2p_attrn   pos_query_layerr_posp2c_posru   s                  r(   r   /DisentangledSelfAttention.disentangled_att_bias$  sq    2;;K]K]^L"'11!4>>qAL1$'11!4L1$QR^RbRbRdQefgg)+$B]B]^#((*'''(2T5P5PS[5[[]^^

)A, 	  D%%% MM.9M 55mDMll;0G0GB0OPGkk,"91hlQ>NOGll7:LWco:pqGE D%%%"oon=O"77HO/NNOE
 kk5&8"3Q1q8HIGll9.G.GB.O.R.RYbYhYh.R.ijGllR'9'PY'ZiB  ,G)ZGEr*   )r   r   rO   r   r   r   r   r   r   r   r   r   r   r   r   r   FNNN)r<   r=   r>   r?   r@   r   r   r   Tensorr   r   r   r9   r   r   rA   rB   rC   s   @r(   r   r      s    $GL% #(/3/315U0||U0 U0  	U0
 u||,U0 u||,U0 !.U0 
u||Xell33	4U0n6\\6 <<6 ll	6
 6 6 6r*   r   c                   6   ^  \ rS rSrSrU 4S jrSS jrSrU =r$ )DebertaEmbeddingsi]  zGConstruct the embeddings from word, position and token_type embeddings.c                   > [         TU ]  5         [        USS5      n[        USUR                  5      U l        [
        R                  " UR                  U R                  US9U l        [        USS5      U l	        U R                  (       d  S U l
        O0[
        R                  " UR                  U R                  5      U l
        UR                  S:  a1  [
        R                  " UR                  U R                  5      U l        OS U l        U R                  UR                  :w  a0  [
        R                  " U R                  UR                  SS9U l        OS U l        [!        UR                  UR"                  5      U l        [
        R&                  " UR(                  5      U l        Xl        U R/                  S	[0        R2                  " UR                  5      R5                  S
5      SS9  g )Npad_token_idr   embedding_size)padding_idxposition_biased_inputTFr   position_ids)r   r,   )
persistent)r   r   r   rI   r   r   	Embedding
vocab_sizeword_embeddingsr   position_embeddingsr   type_vocab_sizetoken_type_embeddingsrH   
embed_projr   rK   rL   rM   rN   rO   rQ   register_bufferr   r^   rm   )r$   rQ   r   r'   s      r(   r   DebertaEmbeddings.__init__`  sw   v~q9%f.>@R@RS!||F,=,=t?R?R`lm%,V5Ld%S"))'+D$')||F4R4RTXTgTg'hD$!!A%)+f6L6LdNaNa)bD&)-D&&"4"44 ii(;(;V=O=OV[\DO"DO)&*<*<f>S>STzz&"<"<= 	ELL)G)GHOOPWXej 	 	
r*   c                    Ub  UR                  5       nOUR                  5       S S nUS   nUc  U R                  S S 2S U24   nUc8  [        R                  " U[        R                  U R                  R
                  S9nUc  U R                  U5      nU R                  b   U R                  UR	                  5       5      nO[        R                  " U5      nUn	U R                  (       a  X-   n	U R                  b  U R                  U5      n
X-   n	U R                  b  U R                  U	5      n	U R                  U	5      n	Ub  UR                  5       U	R                  5       :w  aE  UR                  5       S:X  a   UR                  S5      R                  S5      nUR                  S5      nUR!                  U	R"                  5      nX-  n	U R%                  U	5      n	U	$ )Nr,   r   r\   r   r.   )r%   r   r   r!   r_   r]   r   r   
zeros_liker   r   r   rL   r   squeezerb   r4   r/   rO   )r$   	input_idstoken_type_idsr   maskinputs_embedsinput_shape
seq_lengthr   
embeddingsr   s              r(   r9   DebertaEmbeddings.forward  s    #..*K',,.s3K ^
,,Q^<L!"[[EJJtO`O`OgOghN  00;M##/"&":":<;L;L;N"O"'"2"2="A"
%%#9J%%1$($>$>~$N!#;J??&4J^^J/
xxzZ^^--88:?<<?2215D~~a(77:++,D#*J\\*-
r*   )	rL   rQ   rO   r   r   r   r   r   r   )NNNNNr;   rC   s   @r(   r   r   ]  s    Q
>, ,r*   r   c                   ~   ^  \ rS rSrU 4S jr    SS\S\\R                  \	\R                     4   4S jjr
SrU =r$ )DebertaAttentioni  c                 n   > [         TU ]  5         [        U5      U l        [	        U5      U l        Xl        g r   )r   r   r   r$   rE   outputrQ   rP   s     r(   r   DebertaAttention.__init__  s+    -f5	'/r*   r   r   c           	      v    U R                  UUUUUUS9u  pxUc  UnU R                  Xt5      n	U(       a  X4$ U	S 4$ )N)r   ro   r   )r$   r   )
r$   r5   r   r   r   ro   r   self_output
att_matrixattention_outputs
             r(   r9   DebertaAttention.forward  s_     #'))%%) #, #
 (L;;{A$11$d++r*   )rQ   r   r$   r   r<   r=   r>   r?   r   r   r   r   r   r   r9   rA   rB   rC   s   @r(   r   r     sK     #(,  	, 
u||Xell33	4, ,r*   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )DebertaIntermediatei  c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g r   )r   r   r   rH   rI   intermediate_sizerJ   
isinstance
hidden_actstrr   intermediate_act_fnrP   s     r(   r   DebertaIntermediate.__init__  s`    YYv1163K3KL
f''--'-f.?.?'@D$'-'8'8D$r*   r5   r   c                 J    U R                  U5      nU R                  U5      nU$ r   rJ   r  r$   r5   s     r(   r9   DebertaIntermediate.forward  s&    

=100?r*   r  
r<   r=   r>   r?   r   r   r   r9   rA   rB   rC   s   @r(   r  r    s(    9U\\ ell  r*   r  c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )DebertaOutputi  c                 "  > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR
                  UR                  5      U l	        [        R                  " UR                  5      U l        Xl        g r   )r   r   r   rH   r  rI   rJ   r   rK   rL   rM   rN   rO   rQ   rP   s     r(   r   DebertaOutput.__init__  sa    YYv779K9KL
)&*<*<f>S>STzz&"<"<=r*   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   rT   rU   s      r(   r9   DebertaOutput.forward  rX   r*   )rL   rQ   rJ   rO   rY   rC   s   @r(   r  r    s     r*   r  c                   ~   ^  \ rS rSrU 4S jr    SS\S\\R                  \	\R                     4   4S jjr
SrU =r$ )DebertaLayeri  c                    > [         TU ]  5         [        U5      U l        [	        U5      U l        [        U5      U l        g r   )r   r   r   	attentionr  intermediater  r   rP   s     r(   r   DebertaLayer.__init__  s3    )&1/7#F+r*   r   r   c           	          U R                  UUUUUUS9u  pxU R                  U5      n	U R                  X5      n
U(       a  X4$ U
S 4$ )Nr   r   ro   r   r  r  r   )r$   r5   r   r   ro   r   r   r  r  intermediate_outputlayer_outputs              r(   r9   DebertaLayer.forward  sg     (,~~/%%) (6 (
$ #//0@A{{#6I -- $''r*   r!  )NNNFr  rC   s   @r(   r  r    sK    , "'(  ( 
u||Xell33	4( (r*   r  c                      ^  \ rS rSrSrU 4S jrS rS rSS jr     SS\	R                  S\	R                  S	\S
\S\4
S jjrSrU =r$ )DebertaEncoderi  z8Modified BertEncoder with relative position bias supportc                   > [         TU ]  5         [        R                  " [	        UR
                  5       Vs/ s H  n[        U5      PM     sn5      U l        [        USS5      U l	        U R                  (       af  [        USS5      U l
        U R                  S:  a  UR                  U l
        [        R                  " U R                  S-  UR                  5      U l        SU l        g s  snf )Nr   Fr   r,   r   r.   )r   r   r   
ModuleListr   num_hidden_layersr  layerr   r   r   r   r   rI   r   gradient_checkpointing)r$   rQ   _r'   s      r(   r   DebertaEncoder.__init__  s    ]]%H`H`Ba#bBaQL$8Ba#bc
")&2F"N""*1&:RTV*WD'**Q..4.L.L+"$,,t/J/JQ/NPVPbPb"cD&+# $cs   C)c                 \    U R                   (       a  U R                  R                  nU$ S nU$ r   )r   r   r    )r$   r   s     r(   get_rel_embedding DebertaEncoder.get_rel_embedding  s0    7;7N7N,,33 UYr*   c                     UR                  5       S::  aD  UR                  S5      R                  S5      nX"R                  S5      R                  S5      -  nU$ UR                  5       S:X  a  UR                  S5      nU$ )Nr.   r   r[   r,   r
   )r   rb   r   )r$   r   extended_attention_masks      r(   get_attention_mask!DebertaEncoder.get_attention_mask"  s    1$&4&>&>q&A&K&KA&N#47V7VWY7Z7d7deg7hhN  !Q&+55a8Nr*   c                 d    U R                   (       a  Uc  Ub  [        X!5      nU$ [        X5      nU$ r   )r   rj   )r$   r5   r   ro   s       r(   get_rel_posDebertaEncoder.get_rel_pos+  s:    ""|';'6|S   7}Tr*   r5   r   output_hidden_statesr   return_dictc           
      
   U R                  U5      nU R                  XU5      nU(       a  U4OS nU(       a  SOS n	Un
U R                  5       n[        U R                  5       Hz  u  pU R
                  (       a5  U R                  (       a$  U R                  UR                  U
UUUUU5      u  pOU" U
UUUUUS9u  pU(       a  X4-   nUb  UnOUn
U(       d  Mu  X4-   n	M|     U(       d  [        S XU	4 5       5      $ [        XU	S9$ )N )r   ro   r   r   c              3   .   #    U  H  oc  M  Uv   M     g 7fr   r;  ).0r   s     r(   	<genexpr>)DebertaEncoder.forward.<locals>.<genexpr>g  s     h$Vq$Vs   	last_hidden_stater5   
attentions)r3  r6  r/  	enumerater*  r+  training_gradient_checkpointing_func__call__tupler   )r$   r5   r   r8  r   r   ro   r9  all_hidden_statesall_attentionsnext_kvr   r   layer_moduleatt_ms                  r(   r9   DebertaEncoder.forward3  s%    00@''\ROcM;Kim0d//1(4OA**t}}'+'H'H ))"  "%($u (4"!-!-#1&7($ $$58H$H!','  !/(!:=  5@ h]~$Vhhh+Yg
 	
r*   )r+  r*  r   r   r   )NN)TFNNT)r<   r=   r>   r?   r@   r   r/  r3  r6  r   r   r   r9   rA   rB   rC   s   @r(   r&  r&    sm    B	, &*"' 7
||7
 7
 #	7

  7
 7
 7
r*   r&  c                   ,    \ rS rSr\rSrS/rSrS r	Sr
g)DebertaPreTrainedModelim  debertar   Tc                 `   [        U[        R                  5      (       ak  UR                  R                  R                  SU R                  R                  S9  UR                  b%  UR                  R                  R                  5         gg[        U[        R                  5      (       ax  UR                  R                  R                  SU R                  R                  S9  UR                  b2  UR                  R                  UR                     R                  5         gg[        U[        R                  [        45      (       aJ  UR                  R                  R                  S5        UR                  R                  R                  5         g[        U[        5      (       aI  UR                   R                  R                  5         UR"                  R                  R                  5         g[        U[$        [&        45      (       a%  UR                  R                  R                  5         gg)zInitialize the weights.g        )r1   stdNg      ?)r	  r   rH   r    datanormal_rQ   initializer_ranger"   zero_r   r   rL   r   fill_r   r   r   LegacyDebertaLMPredictionHeadDebertaLMPredictionHead)r$   modules     r(   _init_weights$DebertaPreTrainedModel._init_weightst  sz   fbii(( MM&&CT[[5R5R&S{{&  &&( '--MM&&CT[[5R5R&S!!-""6#5#56<<> ./? @AAMM$$S)KK""$ 9::MM$$&MM$$&!>@W XYYKK""$ Zr*   r;  N)r<   r=   r>   r?   r   config_classbase_model_prefix"_keys_to_ignore_on_load_unexpectedsupports_gradient_checkpointingr[  rA   r;  r*   r(   rO  rO  m  s!     L!*?)@&&*#%r*   rO  c                   $  ^  \ rS rSrU 4S jrS rS rS r\        SS\	\
R                     S\	\
R                     S\	\
R                     S	\	\
R                     S
\	\
R                     S\	\   S\	\   S\	\   S\\\4   4S jj5       rSrU =r$ )DebertaModeli  c                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        SU l        Xl        U R                  5         g Nr   )	r   r   r   r   r&  encoderz_stepsrQ   	post_initrP   s     r(   r   DebertaModel.__init__  s>     +F3%f-r*   c                 .    U R                   R                  $ r   r   r   r$   s    r(   get_input_embeddings!DebertaModel.get_input_embeddings  s    ...r*   c                 $    XR                   l        g r   rj  r$   new_embeddingss     r(   set_input_embeddings!DebertaModel.set_input_embeddings  s    *8'r*   c                     [        S5      e)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
z7The prune function is not implemented in DeBERTa model.)NotImplementedError)r$   heads_to_prunes     r(   _prune_headsDebertaModel._prune_heads  s    
 ""[\\r*   r   r   r   r   r   r   r8  r9  r   c	           
         Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUb  Ub  [	        S5      eUb"  U R                  X5        UR                  5       n	O"Ub  UR                  5       S S n	O[	        S5      eUb  UR                  OUR                  n
Uc  [        R                  " XS9nUc$  [        R                  " U	[        R                  U
S9nU R                  UUUUUS9nU R                  UUSUUS9nUS	   nU R                  S	:  a  US
   n[        U R                  5       Vs/ s H  oR                  R                   S   PM     nnUS   nU R                  R#                  5       nU R                  R%                  U5      nU R                  R'                  U5      nUS	S   H  nU" UUSUUUS9nUR)                  U5        M!     US   nU(       d  U4X(       a  S	S  -   $ SS  -   $ [+        UU(       a  UR,                  OS UR.                  S9$ s  snf )NzDYou cannot specify both input_ids and inputs_embeds at the same timer,   z5You have to specify either input_ids or inputs_embeds)r]   r\   )r   r   r   r   r   T)r8  r   r9  r   r[   Fr   r.   r@  )rQ   r   r8  use_return_dictr   %warn_if_padding_and_no_attention_maskr%   r]   r   r   r!   r_   r   re  rf  r   r*  r/  r3  r6  appendr   r5   rB  )r$   r   r   r   r   r   r   r8  r9  r   r]   embedding_outputencoder_outputsencoded_layersr5   r,  layersr   r   rel_posr*  sequence_outputs                         r(   r9   DebertaModel.forward  sr    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66yQ#..*K&',,.s3KTUU%.%:!!@T@T!"ZZCN!"[[EJJvVN??)%' + 
 ,,!%/# ' 
 )+<<!*2.M6;DLL6IJ6Ill((,6IFJ)"-L!\\;;=N!\\<<^LNll../?@G$!"&+!-!(#1  %%l3 $ ),#%>R8\(]]]XY8\(]]]-;O/77UY&11
 	
+ Ks   "I)rQ   r   re  rf  )NNNNNNNN)r<   r=   r>   r?   r   rl  rq  rv  r   r   r   r   r   r   r   r   r9   rA   rB   rC   s   @r(   rb  rb    s    /9]  -11515/304,0/3&*N
ELL)N
 !.N
 !.	N

 u||,N
  -N
 $D>N
 'tnN
 d^N
 
uo%	&N
 N
r*   rb  c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )$LegacyDebertaPredictionHeadTransformi  c                   > [         TU ]  5         [        USUR                  5      U l        [
        R                  " UR                  U R                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        OUR                  U l        [
        R                  " U R                  UR                  S9U l        g )Nr   )r&   )r   r   r   rI   r   r   rH   rJ   r	  r
  r  r   transform_act_fnrL   rK   rP   s     r(   r   -LegacyDebertaPredictionHeadTransform.__init__  s    %f.>@R@RSYYv1143F3FG
f''--$*6+<+<$=D!$*$5$5D!d&9&9v?T?TUr*   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )rJ   r  rL   r  s     r(   r9   ,LegacyDebertaPredictionHeadTransform.forward  s4    

=1--m<}5r*   )rL   rJ   r   r  rY   rC   s   @r(   r  r    s    	V r*   r  c                   4   ^  \ rS rSrU 4S jrS rS rSrU =r$ )rX  i  c                   > [         TU ]  5         [        U5      U l        [	        USUR
                  5      U l        [        R                  " U R                  UR                  SS9U l
        [        R                  " [        R                  " UR                  5      5      U l        U R                  U R                  l        g )Nr   Fr   )r   r   r  	transformr   rI   r   r   rH   r   decoderr   r   r!   r"   rP   s     r(   r   &LegacyDebertaLMPredictionHead.__init__	  s    =fE%f.>@R@RS yy!4!4f6G6GeTLLV->->!?@	 !IIr*   c                 :    U R                   U R                  l         g r   )r"   r  rk  s    r(   _tie_weights*LegacyDebertaLMPredictionHead._tie_weights  s     IIr*   c                 J    U R                  U5      nU R                  U5      nU$ r   )r  r  r  s     r(   r9   %LegacyDebertaLMPredictionHead.forward  s$    }5]3r*   )r"   r  r   r  )	r<   r=   r>   r?   r   r  r9   rA   rB   rC   s   @r(   rX  rX    s    && r*   rX  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )LegacyDebertaOnlyMLMHeadi!  c                 B   > [         TU ]  5         [        U5      U l        g r   )r   r   rX  predictionsrP   s     r(   r   !LegacyDebertaOnlyMLMHead.__init__"  s    8@r*   r  r   c                 (    U R                  U5      nU$ r   r  )r$   r  prediction_scoress      r(   r9    LegacyDebertaOnlyMLMHead.forward&  s     ,,_=  r*   r  r  rC   s   @r(   r  r  !  s)    A!u|| ! ! !r*   r  c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )rY  i+  zMhttps://github.com/microsoft/DeBERTa/blob/master/DeBERTa/deberta/bert.py#L270c                   > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        UR                  [        5      (       a  [        UR                     U l
        OUR                  U l
        [        R                  " UR                  UR                  SS9U l        [        R                  " [        R                  " UR                   5      5      U l        g )NT)r&   elementwise_affine)r   r   r   rH   rI   rJ   r	  r
  r  r   r  rL   rK   r   r   r!   r   r"   rP   s     r(   r    DebertaLMPredictionHead.__init__.  s    YYv1163E3EF
f''--$*6+<+<$=D!$*$5$5D!f&8&8f>S>ShlmLLV->->!?@	r*   c                     U R                  U5      nU R                  U5      nU R                  U5      n[        R                  " XR
                  R                  5       5      U R                  -   nU$ r   )rJ   r  rL   r   r   r    r   r"   )r$   r5   r   s      r(   r9   DebertaLMPredictionHead.forward<  sb    

=1--m<
 ]4J4J4L4L4NORVR[R[[r*   )rL   r"   rJ   r  r;   rC   s   @r(   rY  rY  +  s    WA r*   rY  c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )DebertaOnlyMLMHeadiF  c                 B   > [         TU ]  5         [        U5      U l        g r   )r   r   rY  lm_headrP   s     r(   r   DebertaOnlyMLMHead.__init__G  s    .v6r*   c                 (    U R                  X5      nU$ r   r  )r$   r  r   r  s       r(   r9   DebertaOnlyMLMHead.forwardL  s     LLJ  r*   r  rY   rC   s   @r(   r  r  F  s    7
! !r*   r  c                   F  ^  \ rS rSrSS/rU 4S jrS rS r\         SS\	\
R                     S\	\
R                     S	\	\
R                     S
\	\
R                     S\	\
R                     S\	\
R                     S\	\   S\	\   S\	\   S\\\4   4S jj5       rSrU =r$ )DebertaForMaskedLMiQ  zcls.predictions.decoder.weightzcls.predictions.decoder.biasc                    > [         TU ]  U5        UR                  U l        [        U5      U l        U R                  (       a  [        U5      U l        OSS/U l        [        U5      U l	        U R                  5         g )Nzlm_predictions.lm_head.weightz)deberta.embeddings.word_embeddings.weight)r   r   legacyrb  rP  r  cls_tied_weights_keysr  lm_predictionsrg  rP   s     r(   r   DebertaForMaskedLM.__init__U  sa     mm#F+;;/7DH'FHs&tD#"4V"<D 	r*   c                     U R                   (       a   U R                  R                  R                  $ U R                  R
                  R                  $ r   )r  r  r  r  r  r  rJ   rk  s    r(   get_output_embeddings(DebertaForMaskedLM.get_output_embeddingsb  s7    ;;88''///&&..444r*   c                 $   U R                   (       a@  XR                  R                  l        UR                  U R                  R                  l        g XR
                  R                  l        UR                  U R
                  R                  l        g r   )r  r  r  r  r"   r  r  rJ   ro  s     r(   set_output_embeddings(DebertaForMaskedLM.set_output_embeddingsh  s]    ;;+9HH  ((6(;(;DHH  %0>''-/=/B/BD'',r*   r   r   r   r   r   labelsr   r8  r9  r   c
                    U	b  U	OU R                   R                  n	U R                  UUUUUUUU	S9n
U
S   nU R                  (       a  U R	                  U5      nO/U R                  XR                  R                  R                  5      nSnUbF  [        5       nU" UR                  SU R                   R                  5      UR                  S5      5      nU	(       d  U4U
SS -   nUb  U4U-   $ U$ [        UUU
R                  U
R                  S9$ )az  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
Nr   r   r   r   r   r8  r9  r   r,   r   losslogitsr5   rB  )rQ   ry  rP  r  r  r  r   r   r   r`   r   r   r5   rB  )r$   r   r   r   r   r   r  r   r8  r9  outputsr  r  masked_lm_lossloss_fctr   s                   r(   r9   DebertaForMaskedLM.forwardp  s   ( &1%<k$++B]B],,))%'/!5#  	
 "!*;; $ 9 $ 3 3O\\E\E\ElEl m')H%&7&<&<RAWAW&XZ`ZeZefhZijN')GABK7F3A3M^%.YSYY$!//))	
 	
r*   )r  r  rP  r  r  	NNNNNNNNN)r<   r=   r>   r?   r  r   r  r  r   r   r   r   r   r   r   r   r9   rA   rB   rC   s   @r(   r  r  Q  s    :<Z[5C  -11515/304)-,0/3&*4
ELL)4
 !.4
 !.	4

 u||,4
  -4
 &4
 $D>4
 'tn4
 d^4
 
un$	%4
 4
r*   r  c                   >   ^  \ rS rSrU 4S jrS r\S 5       rSrU =r	$ )ContextPooleri  c                    > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  5      U l        Xl	        g r   )
r   r   r   rH   pooler_hidden_sizerJ   rM   pooler_dropoutrO   rQ   rP   s     r(   r   ContextPooler.__init__  sG    YYv88&:S:ST
zz&"7"78r*   c                     US S 2S4   nU R                  U5      nU R                  U5      n[        U R                  R                     " U5      nU$ rd  )rO   rJ   r   rQ   pooler_hidden_act)r$   r5   context_tokenpooled_outputs       r(   r9   ContextPooler.forward  sM     &ad+]3

=1t{{<<=mLr*   c                 .    U R                   R                  $ r   )rQ   rI   rk  s    r(   
output_dimContextPooler.output_dim  s    {{&&&r*   )rQ   rJ   rO   )
r<   r=   r>   r?   r   r9   propertyr  rA   rB   rC   s   @r(   r  r    s!     ' 'r*   r  z
    DeBERTa Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    )custom_introc                   >  ^  \ rS rSrU 4S jrS rS r\         SS\\	R                     S\\	R                     S\\	R                     S\\	R                     S	\\	R                     S
\\	R                     S\\   S\\   S\\   S\\\4   4S jj5       rSrU =r$ ) DebertaForSequenceClassificationi  c                   > [         TU ]  U5        [        USS5      nX l        [	        U5      U l        [        U5      U l        U R                  R                  n[        R                  " X25      U l        [        USS 5      nUc  U R                  R                  OUn[        R                  " U5      U l        U R!                  5         g )N
num_labelsr.   cls_dropout)r   r   r   r  rb  rP  r  poolerr  r   rH   
classifierrQ   rN   rM   rO   rg  )r$   rQ   r  r  drop_outr'   s        r(   r   )DebertaForSequenceClassification.__init__  s     V\15
$#F+#F+[[++
))J;6=$76>6F4;;22Hzz(+ 	r*   c                 6    U R                   R                  5       $ r   )rP  rl  rk  s    r(   rl  5DebertaForSequenceClassification.get_input_embeddings  s    ||0022r*   c                 :    U R                   R                  U5        g r   )rP  rq  ro  s     r(   rq  5DebertaForSequenceClassification.set_input_embeddings  s    )).9r*   r   r   r   r   r   r  r   r8  r9  r   c
                 $   U	b  U	OU R                   R                  n	U R                  UUUUUUUU	S9n
U
S   nU R                  U5      nU R	                  U5      nU R                  U5      nSnUGb  U R                   R                  Gc  U R                  S:X  aX  [        R                  " 5       nUR                  S5      R                  UR                  5      nU" XR                  S5      5      nGOhUR                  5       S:X  d  UR                  S5      S:X  Ga  US:  R                  5       nUR!                  5       nUR                  S5      S:  a  ["        R$                  " USUR'                  UR                  S5      UR                  S5      5      5      n["        R$                  " USUR                  S5      5      n[)        5       nU" UR                  SU R                  5      R+                  5       UR                  S5      5      nGOM["        R,                  " S5      R                  U5      nGO&[        R.                  " S5      nU" U5      U-  R1                  S5      R3                  5       * nOU R                   R                  S:X  aI  [        5       nU R                  S:X  a&  U" UR5                  5       UR5                  5       5      nOU" X5      nOU R                   R                  S:X  a=  [)        5       nU" UR                  SU R                  5      UR                  S5      5      nO,U R                   R                  S:X  a  [7        5       nU" X5      nU	(       d  U4U
SS -   nUb  U4U-   $ U$ [9        XU
R:                  U
R<                  S	9$ )
ae  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
N)r   r   r   r   r   r8  r9  r   r   r,   
regressionsingle_label_classificationmulti_label_classificationr  )rQ   ry  rP  r  rO   r  problem_typer  r   r	   r`   r4   r/   r   r%   nonzeror_   r   r   rm   r   r0   rz   
LogSoftmaxsumr1   r   r   r   r5   rB  )r$   r   r   r   r   r   r  r   r8  r9  r  encoder_layerr  r  r  loss_fnlabel_indexlabeled_logitsr  log_softmaxr   s                        r(   r9   (DebertaForSequenceClassification.forward  s   & &1%<k$++B]B],,))%'/!5#  	
  
M2]3/{{''/??a' jjlG#[[_//=F"6;;r?;DZZ\Q&&++b/Q*>#)Q;"7"7"9K#[[]F"''*Q.)."A{'9'9+:J:J1:Mv{{[\~'^* "'fa9I9I"9M!N#3#5'(;(;B(P(V(V(XZ`ZeZefhZij$||A11&9"$--"3K)&1F:??CIIKKD))\9"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'G4I4IV]VhVh
 	
r*   )r  rP  rO   r  r  r  )r<   r=   r>   r?   r   rl  rq  r   r   r   r   r   r   r   r   r9   rA   rB   rC   s   @r(   r  r    s    $3:  -11515/304)-,0/3&*M
ELL)M
 !.M
 !.	M

 u||,M
  -M
 &M
 $D>M
 'tnM
 d^M
 
u..	/M
 M
r*   r  c                   2  ^  \ rS rSrU 4S jr\         SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\	   S
\\	   S\\	   S\
\\4   4S jj5       rSrU =r$ )DebertaForTokenClassificationi.  c                 0  > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r   )r   r   r  rb  rP  r   rM   rN   rO   rH   rI   r  rg  rP   s     r(   r   &DebertaForTokenClassification.__init__0  si      ++#F+zz&"<"<=))F$6$68I8IJ 	r*   r   r   r   r   r   r  r   r8  r9  r   c
                    U	b  U	OU R                   R                  n	U R                  UUUUUUUU	S9n
U
S   nU R                  U5      nU R	                  U5      nSnUb<  [        5       nU" UR                  SU R                  5      UR                  S5      5      nU	(       d  U4U
SS -   nUb  U4U-   $ U$ [        XU
R                  U
R                  S9$ )z
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
Nr  r   r,   r   r  )rQ   ry  rP  rO   r  r   r`   r  r   r5   rB  )r$   r   r   r   r   r   r  r   r8  r9  r  r  r  r  r  r   s                   r(   r9   %DebertaForTokenClassification.forward;  s    " &1%<k$++B]B],,))%'/!5#  	
 "!*,,71')HFKKDOO<fkk"oNDY,F)-)9TGf$EvE$G4I4IV]VhVh
 	
r*   )r  rP  rO   r  r  )r<   r=   r>   r?   r   r   r   r   r   r   r   r   r   r9   rA   rB   rC   s   @r(   r  r  .  s    	  -11515/304)-,0/3&*-
ELL)-
 !.-
 !.	-

 u||,-
  --
 &-
 $D>-
 'tn-
 d^-
 
u++	,-
 -
r*   r  c                   R  ^  \ rS rSrU 4S jr\          SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\	   S\\	   S\\	   S\
\\4   4S jj5       rSrU =r$ )DebertaForQuestionAnsweringil  c                    > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r   )
r   r   r  rb  rP  r   rH   rI   
qa_outputsrg  rP   s     r(   r   $DebertaForQuestionAnswering.__init__n  sS      ++#F+))F$6$68I8IJ 	r*   r   r   r   r   r   start_positionsend_positionsr   r8  r9  r   c                    U
b  U
OU R                   R                  n
U R                  UUUUUUU	U
S9nUS   nU R                  U5      nUR	                  SSS9u  pUR                  S5      R                  5       nUR                  S5      R                  5       nS nUb  Ub  [        UR                  5       5      S:  a  UR                  S5      n[        UR                  5       5      S:  a  UR                  S5      nUR                  S5      nUR                  SU5      nUR                  SU5      n[        US9nU" X5      nU" X5      nUU-   S-  nU
(       d  X4USS  -   nUb  U4U-   $ U$ [        UUUUR                  UR                  S9$ )	Nr  r   r   r,   r   )ignore_indexr.   )r  start_logits
end_logitsr5   rB  )rQ   ry  rP  r  splitr   r   r   r%   r   r   r   r5   rB  )r$   r   r   r   r   r   r  r  r   r8  r9  r  r  r  r  r  
total_lossignored_indexr  
start_lossend_lossr   s                         r(   r9   #DebertaForQuestionAnswering.forwardx  s    &1%<k$++B]B],,))%'/!5#  	
 "!*1#)<<r<#: #++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
:H$x/14J"/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
r*   )rP  r  r  )
NNNNNNNNNN)r<   r=   r>   r?   r   r   r   r   r   r   r   r   r   r9   rA   rB   rC   s   @r(   r  r  l  s      -11515/3042604,0/3&*<
ELL)<
 !.<
 !.	<

 u||,<
  -<
 "%,,/<
  -<
 $D><
 'tn<
 d^<
 
u22	3<
 <
r*   r  )r  r  r  r  rb  rO  )Ar@   typingr   r   r   r   torch.utils.checkpointr   torch.nnr   r   r	   activationsr   modeling_outputsr   r   r   r   r   modeling_utilsr   utilsr   r   configuration_debertar   
get_loggerr<   loggerModuler   rE   jitscriptrj   rp   rr   rv   r   r   r{   r~   r   r   r   r   r   r  r  r  r&  rO  rb  r  rX  r  rY  r  r  r  r  r  r  __all__r;  r*   r(   <module>r     sh    ) )    A A !  . , 0 
		H	%ryy (		   8 r r n n [ [ \%,, \c \ \ ELL U\\   d d dgj d d    C		 CLN		 Nb,ryy ,F")) BII (299 (BZ
RYY Z
z %_ % %8 g
) g
 g
T299 &BII 2!ryy !bii 6! ! S
/ S
 S
l'BII ', g
'= g
g
T :
$: :
 :
z H
"8 H
 H
Vr*   