
    fTh                        S r SSKrSSKrSSKJrJrJr  SSKrSSKrSSKJ	r	  SSK
JrJrJr  SSKJr  SSKJrJrJrJrJrJr  SS	KJrJrJrJr  SS
KJrJr  SSKJ r   \RB                  " \"5      r# " S S\	RH                  5      r%S7S jr& " S S\	RH                  5      r' " S S\	RH                  5      r( " S S\	RH                  5      r) " S S\	RH                  5      r* " S S\	RH                  5      r+ " S S\	RH                  5      r, " S S\	RH                  5      r- " S S\	RH                  5      r. " S  S!\	RH                  5      r/S"\/0r0 " S# S$\	RH                  5      r1 " S% S&\	RH                  5      r2 " S' S(\	RH                  5      r3\ " S) S*\5      5       r4\ " S+ S,\45      5       r5\ " S- S.\45      5       r6\" S/S09 " S1 S2\45      5       r7\" S3S09 " S4 S5\45      5       r8/ S6Qr9g)8zPyTorch MarkupLM model.    N)OptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN))BaseModelOutputWithPastAndCrossAttentions,BaseModelOutputWithPoolingAndCrossAttentionsMaskedLMOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModelapply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging   )MarkupLMConfigc                   6   ^  \ rS rSrSrU 4S jrSS jrSrU =r$ )XPathEmbeddings0   zConstruct the embeddings from xpath tags and subscripts.

We drop tree-id in this version, as its info can be covered by xpath.
c           	        > [         [        U ]  5         UR                  U l        [        R
                  " UR                  U R                  -  UR                  5      U l        [        R                  " UR                  5      U l        [        R                  " 5       U l        [        R
                  " UR                  U R                  -  SUR                  -  5      U l        [        R
                  " SUR                  -  UR                  5      U l        [        R                   " [#        U R                  5       Vs/ s H.  n[        R$                  " UR&                  UR                  5      PM0     sn5      U l        [        R                   " [#        U R                  5       Vs/ s H.  n[        R$                  " UR*                  UR                  5      PM0     sn5      U l        g s  snf s  snf )N   )superr   __init__	max_depthr   Linearxpath_unit_hidden_sizehidden_sizexpath_unitseq2_embeddingsDropouthidden_dropout_probdropoutReLU
activationxpath_unitseq2_inner	inner2emb
ModuleListrange	Embeddingmax_xpath_tag_unit_embeddingsxpath_tag_sub_embeddingsmax_xpath_subs_unit_embeddingsxpath_subs_sub_embeddingsselfconfig_	__class__s      f/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/markuplm/modeling_markuplm.pyr    XPathEmbeddings.__init__6   sf   ot-/)))+63P3PSWSaSa3acicucu)v&zz&"<"<='')$&IIf.K.Kdnn.\^_bhbtbt^t$u!1v'9'9#96;M;MN(* t~~..A VAA6C`C`a.)
% *, t~~..A VBBFDaDab.*
&s   ;5G%#5G*c           	         / n/ n[        U R                  5       H_  nUR                  U R                  U   " US S 2S S 2U4   5      5        UR                  U R                  U   " US S 2S S 2U4   5      5        Ma     [
        R                  " USS9n[
        R                  " USS9nX4-   nU R                  U R                  U R                  U R                  U5      5      5      5      nU$ )Ndim)r.   r!   appendr1   r3   torchcatr,   r(   r*   r+   )r5   xpath_tags_seqxpath_subs_seqxpath_tags_embeddingsxpath_subs_embeddingsixpath_embeddingss          r9   forwardXPathEmbeddings.forwardP   s     " "t~~&A!(()F)Fq)I.YZ\]_`Y`Ja)bc!(()G)G)J>Z[]^`aZaKb)cd ' !&		*?R H %		*?R H0H>>$,,ttG`G`aqGr7s*tu    )r*   r(   r,   r!   r3   r1   r%   r+   )NN)	__name__
__module____qualname____firstlineno____doc__r    rH   __static_attributes____classcell__r8   s   @r9   r   r   0   s    

4   rJ   r   c                     U R                  U5      R                  5       n[        R                  " USS9R	                  U5      U-   U-  nUR                  5       U-   $ )z
Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
are ignored. This is modified from fairseq's `utils.make_positions`.

Args:
    x: torch.Tensor x:

Returns: torch.Tensor
r   r=   )neintr@   cumsumtype_aslong)	input_idspadding_idxpast_key_values_lengthmaskincremental_indicess        r9   "create_position_ids_from_input_idsr^   c   sW     <<$((*D <<!4<<TBE[[_cc##%33rJ   c                   J   ^  \ rS rSrSrU 4S jrS r       SS jrSrU =r	$ )MarkupLMEmbeddingss   zGConstruct the embeddings from word, position and token_type embeddings.c                 `  > [         [        U ]  5         Xl        [        R
                  " UR                  UR                  UR                  S9U l	        [        R
                  " UR                  UR                  5      U l        UR                  U l        [        U5      U l        [        R
                  " UR                  UR                  5      U l        [        R"                  " UR                  UR$                  S9U l        [        R&                  " UR(                  5      U l        U R-                  S[.        R0                  " UR                  5      R3                  S5      SS9  UR                  U l        [        R
                  " UR                  UR                  U R4                  S9U l        g )N)rZ   epsposition_ids)r   r<   F)
persistent)r   r`   r    r6   r   r/   
vocab_sizer$   pad_token_idword_embeddingsmax_position_embeddingsposition_embeddingsr!   r   rG   type_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsr&   r'   r(   register_bufferr@   arangeexpandrZ   r5   r6   r8   s     r9   r    MarkupLMEmbeddings.__init__v   s;    $02!||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c )) / 7%'\\&2H2H&J\J\%]"f&8&8f>S>STzz&"<"<=ELL)G)GHOOPWXej 	 	
 "..#%<<**F,>,>DL\L\$
 rJ   c                    UR                  5       SS nUS   n[        R                  " U R                  S-   X0R                  -   S-   [        R                  UR
                  S9nUR                  S5      R                  U5      $ )z
We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

Args:
    inputs_embeds: torch.Tensor

Returns: torch.Tensor
Nr<   r   dtypedevicer   )sizer@   rq   rZ   rX   rx   	unsqueezerr   )r5   inputs_embedsinput_shapesequence_lengthre   s        r9   &create_position_ids_from_inputs_embeds9MarkupLMEmbeddings.create_position_ids_from_inputs_embeds   s~     $((*3B/%a.||q /4D4D"Dq"HPUPZPZcpcwcw
 %%a(//<<rJ   c                    Ub  UR                  5       nOUR                  5       S S nUb  UR                  OUR                  n	Uc+  Ub  [        XR                  U5      nOU R	                  U5      nUc$  [
        R                  " U[
        R                  U	S9nUc  U R                  U5      nUc[  U R                  R                  [
        R                  " [        [        U5      U R                  /-   5      [
        R                  U	S9-  nUc[  U R                  R                  [
        R                  " [        [        U5      U R                  /-   5      [
        R                  U	S9-  nUn
U R!                  U5      nU R#                  U5      nU R%                  X#5      nX-   U-   U-   nU R'                  U5      nU R)                  U5      nU$ )Nr<   rv   )ry   rx   r^   rZ   r~   r@   zerosrX   ri   r6   
tag_pad_idonestuplelistr!   subs_pad_idrk   rm   rG   rn   r(   )r5   rY   rB   rC   token_type_idsre   r{   r[   r|   rx   words_embeddingsrk   rm   rG   
embeddingss                  r9   rH   MarkupLMEmbeddings.forward   s     #..*K',,.s3K%.%:!!@T@T$A)M]M]_uv#JJ=Y!"[[EJJvVN  00;M !![[33ejjd;'4>>*::;5::V\7 N !![[44uzzd;'4>>*::;5::V\8 N )"66|D $ : :> J00P%;>SSVff
^^J/
\\*-
rJ   )	rn   r6   r(   r!   rZ   rk   rm   ri   rG   )NNNNNNr   )
rK   rL   rM   rN   rO   r    r~   rH   rP   rQ   rR   s   @r9   r`   r`   s   s1    Q
2=&  2 2rJ   r`   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )MarkupLMSelfOutput   c                 (  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  5      U l
        g Nrc   )r   r    r   r"   r$   densern   ro   r&   r'   r(   rs   s     r9   r    MarkupLMSelfOutput.__init__   s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=rJ   hidden_statesinput_tensorreturnc                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ Nr   r(   rn   r5   r   r   s      r9   rH   MarkupLMSelfOutput.forward   5    

=1]3}'CDrJ   rn   r   r(   
rK   rL   rM   rN   r    r@   TensorrH   rP   rQ   rR   s   @r9   r   r      6    >U\\  RWR^R^  rJ   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MarkupLMIntermediate   c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g r   )r   r    r   r"   r$   intermediate_sizer   
isinstance
hidden_actstrr   intermediate_act_fnrs   s     r9   r    MarkupLMIntermediate.__init__   s`    YYv1163K3KL
f''--'-f.?.?'@D$'-'8'8D$rJ   r   r   c                 J    U R                  U5      nU R                  U5      nU$ r   r   r   r5   r   s     r9   rH   MarkupLMIntermediate.forward   s&    

=100?rJ   r   r   rR   s   @r9   r   r      s(    9U\\ ell  rJ   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )MarkupLMOutput   c                 (  > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g r   )r   r    r   r"   r   r$   r   rn   ro   r&   r'   r(   rs   s     r9   r    MarkupLMOutput.__init__   s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=rJ   r   r   r   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   r   r   s      r9   rH   MarkupLMOutput.forward   r   rJ   r   r   rR   s   @r9   r   r      r   rJ   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MarkupLMPooleri  c                    > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " 5       U l        g r   )r   r    r   r"   r$   r   Tanhr*   rs   s     r9   r    MarkupLMPooler.__init__  s9    YYv1163E3EF
'')rJ   r   r   c                 \    US S 2S4   nU R                  U5      nU R                  U5      nU$ )Nr   )r   r*   )r5   r   first_token_tensorpooled_outputs       r9   rH   MarkupLMPooler.forward
  s6     +1a40

#566rJ   )r*   r   r   rR   s   @r9   r   r     s(    $
U\\ ell  rJ   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MarkupLMPredictionHeadTransformi  c                 p  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        UR                  [        5      (       a  [        UR                     U l
        OUR                  U l
        [        R                  " UR                  UR                  S9U l        g r   )r   r    r   r"   r$   r   r   r   r   r   transform_act_fnrn   ro   rs   s     r9   r    (MarkupLMPredictionHeadTransform.__init__  s~    YYv1163E3EF
f''--$*6+<+<$=D!$*$5$5D!f&8&8f>S>STrJ   r   r   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r   r   rn   r   s     r9   rH   'MarkupLMPredictionHeadTransform.forward  s4    

=1--m<}5rJ   )rn   r   r   r   rR   s   @r9   r   r     s)    UU\\ ell  rJ   r   c                   4   ^  \ rS rSrU 4S jrS rS rSrU =r$ )MarkupLMLMPredictionHeadi&  c                 H  > [         TU ]  5         [        U5      U l        [        R
                  " UR                  UR                  SS9U l        [        R                  " [        R                  " UR                  5      5      U l        U R                  U R                  l        g )NF)bias)r   r    r   	transformr   r"   r$   rg   decoder	Parameterr@   r   r   rs   s     r9   r    !MarkupLMLMPredictionHead.__init__'  sm    8@ yy!3!3V5F5FUSLLV->->!?@	 !IIrJ   c                 :    U R                   U R                  l         g r   )r   r   r5   s    r9   _tie_weights%MarkupLMLMPredictionHead._tie_weights4  s     IIrJ   c                 J    U R                  U5      nU R                  U5      nU$ r   )r   r   r   s     r9   rH    MarkupLMLMPredictionHead.forward7  s$    }5]3rJ   )r   r   r   )	rK   rL   rM   rN   r    r   rH   rP   rQ   rR   s   @r9   r   r   &  s    && rJ   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MarkupLMOnlyMLMHeadi>  c                 B   > [         TU ]  5         [        U5      U l        g r   )r   r    r   predictionsrs   s     r9   r    MarkupLMOnlyMLMHead.__init__?  s    3F;rJ   sequence_outputr   c                 (    U R                  U5      nU$ r   r   )r5   r   prediction_scoress      r9   rH   MarkupLMOnlyMLMHead.forwardC  s     ,,_=  rJ   r   r   rR   s   @r9   r   r   >  s(    <!u|| ! ! !rJ   r   c                   b  ^  \ rS rSrSU 4S jjrS\R                  S\R                  4S jr      SS\R                  S\\R                     S\\R                     S	\\R                     S
\\R                     S\\
\
\R                           S\\   S\
\R                     4S jjrSrU =r$ )MarkupLMSelfAttentioniI  c                   > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eUR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  5      U l        U=(       d    [#        USS5      U l        U R$                  S:X  d  U R$                  S	:X  aG  UR&                  U l        [        R(                  " S
UR&                  -  S-
  U R                  5      U l        UR,                  U l        g )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()position_embedding_typeabsoluterelative_keyrelative_key_query   r   )r   r    r$   num_attention_headshasattr
ValueErrorrU   attention_head_sizeall_head_sizer   r"   querykeyvaluer&   attention_probs_dropout_probr(   getattrr   rj   r/   distance_embedding
is_decoderr5   r6   r   r8   s      r9   r    MarkupLMSelfAttention.__init__J  s    : ::a?PVXhHiHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF'> (
'-zC
$ ''>9T=Y=Y]q=q+1+I+ID(&(ll1v7U7U3UXY3Y[_[s[s&tD# ++rJ   xr   c                     UR                  5       S S U R                  U R                  4-   nUR                  U5      nUR	                  SSSS5      $ )Nr<   r   r   r   r
   )ry   r   r   viewpermute)r5   r   new_x_shapes      r9   transpose_for_scores*MarkupLMSelfAttention.transpose_for_scoresd  sL    ffhsmt'?'?AYAY&ZZFF;yyAq!$$rJ   r   attention_mask	head_maskencoder_hidden_statesencoder_attention_maskpast_key_valueoutput_attentionsc                 V   U R                  U5      nUS Ln	U	(       a  Ub  US   n
US   nUnGOU	(       aC  U R                  U R                  U5      5      n
U R                  U R                  U5      5      nUnOUbu  U R                  U R                  U5      5      n
U R                  U R                  U5      5      n[        R
                  " US   U
/SS9n
[        R
                  " US   U/SS9nO@U R                  U R                  U5      5      n
U R                  U R                  U5      5      nU R                  U5      nUS LnU R                  (       a  X4n[        R                  " XR                  SS5      5      nU R                  S:X  d  U R                  S:X  Ga  UR                  S   U
R                  S   nnU(       aB  [        R                  " US-
  [        R                  UR                  S	9R                  SS5      nO>[        R                  " U[        R                  UR                  S	9R                  SS5      n[        R                  " U[        R                  UR                  S	9R                  SS5      nUU-
  nU R!                  UU R"                  -   S-
  5      nUR%                  UR&                  S
9nU R                  S:X  a  [        R(                  " SUU5      nUU-   nOHU R                  S:X  a8  [        R(                  " SUU5      n[        R(                  " SU
U5      nUU-   U-   nU[*        R,                  " U R.                  5      -  nUb  X-   n[0        R2                  R5                  USS9nU R7                  U5      nUb  UU-  n[        R                  " UU5      nUR9                  SSSS5      R;                  5       nUR=                  5       S S U R>                  4-   nUR                  U5      nU(       a  UU4OU4nU R                  (       a  UU4-   nU$ )Nr   r   r   r=   r<   r   r   rv   rw   zbhld,lrd->bhlrzbhrd,lrd->bhlrr
   ) r   r   r   r   r@   rA   r   matmul	transposer   shapetensorrX   rx   r   rq   r   rj   torw   einsummathsqrtr   r   
functionalsoftmaxr(   r   
contiguousry   r   )r5   r   r   r   r   r   r   r   mixed_query_layeris_cross_attention	key_layervalue_layerquery_layer	use_cacheattention_scoresquery_length
key_lengthposition_ids_lposition_ids_rdistancepositional_embeddingrelative_position_scoresrelative_position_scores_queryrelative_position_scores_keyattention_probscontext_layernew_context_layer_shapeoutputss                               r9   rH   MarkupLMSelfAttention.forwardi  s    !JJ}5
 3$>."<&q)I(+K3N11$((;P2QRI33DJJ?T4UVK3N'11$((=2IJI33DJJ}4MNK		>!#4i"@aHI))^A%6$D!LK11$((=2IJI33DJJ}4MNK//0AB"$.	?? (5N !<<5H5HR5PQ''>9T=Y=Y]q=q'2'8'8';Y__Q=O*L!&j1nEJJWdWkWk!l!q!q" "'l%**UbUiUi!j!o!oprtu!v"\\*EJJ}OcOcdiijkmopN%6H#'#:#:8dFbFb;bef;f#g #7#:#:ARAR#:#S ++~=+0<<8H+Wk+l(#36N#N --1EE16>NP[]q1r./4||<LiYm/n,#36T#TWs#s +dii8P8P.QQ%/@ --//0@b/I ,,7  -	9O_kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S%**+BC6G=/2mM]?? 11GrJ   )r   r   r   r(   r   r   rj   r   r   r   r   r   NNNNNF)rK   rL   rM   rN   r    r@   r   r   r   FloatTensorr   boolrH   rP   rQ   rR   s   @r9   r   r   I  s    ,4%ell %u|| % 7;15=A>BDH,1c||c !!2!23c E--.	c
  ((9(9:c !)):): ;c !uU->->'?!@Ac $D>c 
u||	c crJ   r   eagerc                   .  ^  \ rS rSrSU 4S jjrS r      SS\R                  S\\R                     S\\R                     S\\R                     S\\R                     S	\\
\
\R                           S
\\   S\
\R                     4S jjrSrU =r$ )MarkupLMAttentioni  c                    > [         TU ]  5         [        UR                     " XS9U l        [        U5      U l        [        5       U l        g )Nr   )	r   r    MARKUPLM_SELF_ATTENTION_CLASSES_attn_implementationr5   r   outputsetpruned_headsr   s      r9   r    MarkupLMAttention.__init__  s@    3F4O4OP
	 )0ErJ   c                 6   [        U5      S:X  a  g [        XR                  R                  U R                  R                  U R
                  5      u  p[        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l	        [        U R                  R                  USS9U R                  l        U R                  R                  [        U5      -
  U R                  l        U R                  R                  U R                  R                  -  U R                  l        U R
                  R                  U5      U l        g )Nr   r   r=   )lenr   r5   r   r   r0  r   r   r   r   r.  r   r   union)r5   headsindexs      r9   prune_headsMarkupLMAttention.prune_heads  s   u:?79900$))2O2OQUQbQb

 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:rJ   r   r   r   r   r   r   r   r   c           	      p    U R                  UUUUUUU5      nU R                  US   U5      n	U	4USS  -   n
U
$ )Nr   r   )r5   r.  )r5   r   r   r   r   r   r   r   self_outputsattention_outputr"  s              r9   rH   MarkupLMAttention.forward  sW     yy!"
  ;;|AF#%QR(88rJ   )r.  r0  r5   r   r$  )rK   rL   rM   rN   r    r7  r@   r   r   r%  r   r&  rH   rP   rQ   rR   s   @r9   r)  r)    s    ";* 7;15=A>BDH,1|| !!2!23 E--.	
  ((9(9: !)):): ; !uU->->'?!@A $D> 
u||	 rJ   r)  c                   *  ^  \ rS rSrU 4S jr      SS\R                  S\\R                     S\\R                     S\\R                     S\\R                     S\\	\	\R                           S	\\
   S
\	\R                     4S jjrS rSrU =r$ )MarkupLMLayeri	  c                 t  > [         TU ]  5         UR                  U l        SU l        [	        U5      U l        UR                  U l        UR                  U l        U R                  (       a.  U R                  (       d  [        U  S35      e[	        USS9U l	        [        U5      U l        [        U5      U l        g )Nr   z> should be used as a decoder model if cross attention is addedr   r+  )r   r    chunk_size_feed_forwardseq_len_dimr)  	attentionr   add_cross_attentionr   crossattentionr   intermediater   r.  rs   s     r9   r    MarkupLMLayer.__init__
  s    '-'E'E$*62 ++#)#=#= ##?? D6)g!hii"3FT^"_D08$V,rJ   r   r   r   r   r   r   r   r   c           	         Ub  US S OS nU R                  UUUUUS9n	U	S   n
U R                  (       a  U	SS nU	S   nOU	SS  nS nU R                  (       aZ  UbW  [        U S5      (       d  [        SU  S35      eUb  US	S  OS nU R	                  U
UUUUUU5      nUS   n
XSS -   nUS   nWU-   n[        U R                  U R                  U R                  U
5      nU4U-   nU R                  (       a  UW4-   nU$ )
Nr   )r   r   r   r   r<   rD  z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r  )	rB  r   r   r   rD  r   feed_forward_chunkr@  rA  )r5   r   r   r   r   r   r   r   self_attn_past_key_valueself_attention_outputsr;  r"  present_key_valuecross_attn_present_key_valuecross_attn_past_key_valuecross_attention_outputslayer_outputs                    r9   rH   MarkupLMLayer.forward  s}    :H9S>"1#5Y] !%/3 "0 "
 2!4 ??,Qr2G 6r :,QR0G'+$??4@4!122 =dV DD D  @N?Yrs(;_c%&*&9&9 %&)!'#  7q9" ==G ,C2+F( 14P P0##T%A%A4CSCSUe
  /G+ ??!2 44GrJ   c                 J    U R                  U5      nU R                  X!5      nU$ r   )rE  r.  )r5   r;  intermediate_outputrO  s       r9   rH   MarkupLMLayer.feed_forward_chunkY  s)    "//0@A{{#6IrJ   )rC  rB  r@  rD  rE  r   r.  rA  r$  )rK   rL   rM   rN   r    r@   r   r   r%  r   r&  rH   rH  rP   rQ   rR   s   @r9   r>  r>  	  s    -" 7;15=A>BDH,1?||? !!2!23? E--.	?
  ((9(9:? !)):): ;? !uU->->'?!@A? $D>? 
u||	?B rJ   r>  c                   R  ^  \ rS rSrU 4S jr         SS\R                  S\\R                     S\\R                     S\\R                     S\\R                     S\\	\	\R                           S	\\
   S
\\
   S\\
   S\\
   S\\	\R                     \4   4S jjrSrU =r$ )MarkupLMEncoderi`  c                    > [         TU ]  5         Xl        [        R                  " [        UR                  5       Vs/ s H  n[        U5      PM     sn5      U l        SU l	        g s  snf )NF)
r   r    r6   r   r-   r.   num_hidden_layersr>  layergradient_checkpointingr4   s      r9   r    MarkupLMEncoder.__init__a  sR    ]]5IaIaCb#cCbaM&$9Cb#cd
&+# $ds   A&r   r   r   r   r   past_key_valuesr  r   output_hidden_statesreturn_dictr   c                 8   U	(       a  SOS nU(       a  SOS nU(       a  U R                   R                  (       a  SOS nU R                  (       a/  U R                  (       a  U(       a  [        R                  S5        SnU(       a  SOS n[        U R                  5       H  u  nnU	(       a  X4-   nUb  X?   OS nUb  Xo   OS nU R                  (       a4  U R                  (       a#  U R                  UR                  UUUUUUU5      nOU" UUUUUUU5      nUS   nU(       a	  UUS   4-  nU(       d  M  UUS   4-   nU R                   R                  (       d  M  UUS   4-   nM     U	(       a  X4-   nU
(       d  [        S UUUUU4 5       5      $ [        UUUUUS	9$ )
N zZ`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...Fr   r<   r   r   c              3   0   #    U  H  nUc  M  Uv   M     g 7fr   r_  ).0vs     r9   	<genexpr>*MarkupLMEncoder.forward.<locals>.<genexpr>  s"      
A  s   	)last_hidden_stater[  r   
attentionscross_attentions)r6   rC  rY  trainingloggerwarning_once	enumeraterX  _gradient_checkpointing_func__call__r   r   )r5   r   r   r   r   r   r[  r  r   r\  r]  all_hidden_statesall_self_attentionsall_cross_attentionsnext_decoder_cacherF   layer_modulelayer_head_maskr   layer_outputss                       r9   rH   MarkupLMEncoder.forwardg  s    #7BD$5b4%64;;;Z;Zr`d&&4==##p "	#,R$(4OA|#$58H$H!.7.CilO3B3N_/TXN**t}} $ A A ))!"#)*"%	! !-!"#)*"%! *!,M"}R'8&::"  &9]1=M<O&O#;;222+?=QRCSBU+U(G  5J   14D D 
 "&%'(
 
 
 9+.+*1
 	
rJ   )r6   rY  rX  )	NNNNNNFFT)rK   rL   rM   rN   r    r@   r   r   r%  r   r&  r   r   rH   rP   rQ   rR   s   @r9   rU  rU  `  s   , 7;15=A>BEI$(,1/4&*S
||S
 !!2!23S
 E--.	S

  ((9(9:S
 !)):): ;S
 "%e.?.?(@"ABS
 D>S
 $D>S
 'tnS
 d^S
 
uU\\"$MM	NS
 S
rJ   rU  c                   l   ^  \ rS rSr\rSrS r\S\	\
\\R                  4      4U 4S jj5       rSrU =r$ )MarkupLMPreTrainedModeli  markuplmc                    [        U[        R                  5      (       ak  UR                  R                  R                  SU R                  R                  S9  UR                  b%  UR                  R                  R                  5         gg[        U[        R                  5      (       ax  UR                  R                  R                  SU R                  R                  S9  UR                  b2  UR                  R                  UR                     R                  5         gg[        U[        R                  5      (       aJ  UR                  R                  R                  5         UR                  R                  R                  S5        g[        U[        5      (       a%  UR                  R                  R                  5         gg)zInitialize the weightsg        )meanstdN      ?)r   r   r"   weightdatanormal_r6   initializer_ranger   zero_r/   rZ   rn   fill_r   )r5   modules     r9   _init_weights%MarkupLMPreTrainedModel._init_weights  s3   fbii(( MM&&CT[[5R5R&S{{&  &&( '--MM&&CT[[5R5R&S!!-""6#5#56<<> .--KK""$MM$$S) 899KK""$ :rJ   pretrained_model_name_or_pathc                 4   > [         [        U ]
  " U/UQ70 UD6$ r   )r   rw  from_pretrained)clsr  
model_argskwargsr8   s       r9   r  'MarkupLMPreTrainedModel.from_pretrained  s+    ,cB)
,6
:@
 	
rJ   r_  )rK   rL   rM   rN   r   config_classbase_model_prefixr  classmethodr   r   r   osPathLiker  rP   rQ   rR   s   @r9   rw  rw    sD    !L"%$ 
HU3PRP[P[K[E\<] 
 
rJ   rw  c                     ^  \ rS rSrSU 4S jjrS rS rS r\           SS\	\
R                     S\	\
R                     S\	\
R                     S	\	\
R                     S
\	\
R                     S\	\
R                     S\	\
R                     S\	\
R                     S\	\   S\	\   S\	\   S\\\4   4S jj5       rS rSrU =r$ )MarkupLMModeli  c                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        U(       a  [        U5      OSU l        U R                  5         g)z^
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
N)
r   r    r6   r`   r   rU  encoderr   pooler	post_init)r5   r6   add_pooling_layerr8   s      r9   r    MarkupLMModel.__init__  sK    
 	 ,V4&v.0AnV,t 	rJ   c                 .    U R                   R                  $ r   r   ri   r   s    r9   get_input_embeddings"MarkupLMModel.get_input_embeddings  s    ...rJ   c                 $    XR                   l        g r   r  )r5   r   s     r9   set_input_embeddings"MarkupLMModel.set_input_embeddings  s    */'rJ   c                     UR                  5        H7  u  p#U R                  R                  U   R                  R	                  U5        M9     g)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
N)itemsr  rX  rB  r7  )r5   heads_to_prunerX  r5  s       r9   _prune_headsMarkupLMModel._prune_heads  s<    
 +002LELLu%//;;EB 3rJ   rY   rB   rC   r   r   re   r   r{   r   r\  r]  r   c           	         U	b  U	OU R                   R                  n	U
b  U
OU R                   R                  n
Ub  UOU R                   R                  nUb  Ub  [	        S5      eUb"  U R                  X5        UR                  5       nO"Ub  UR                  5       SS nO[	        S5      eUb  UR                  OUR                  nUc  [        R                  " XS9nUc$  [        R                  " U[        R                  US9nUR                  S5      R                  S5      nUR                  U R                  S	9nS
U-
  S-  nUb  UR                  5       S:X  ah  UR                  S5      R                  S5      R                  S5      R                  S5      nUR!                  U R                   R"                  SSSS5      nOCUR                  5       S:X  a/  UR                  S5      R                  S5      R                  S5      nUR                  [%        U R'                  5       5      R                  S	9nOS/U R                   R"                  -  nU R)                  UUUUUUS9nU R+                  UUUU	U
US9nUS   nU R,                  b  U R-                  U5      OSnU(       d
  UU4USS -   $ [/        UUUR0                  UR2                  UR4                  S9$ )ao  
xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Tag IDs for each token in the input sequence, padded up to config.max_depth.
xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Subscript IDs for each token in the input sequence, padded up to config.max_depth.

Examples:

```python
>>> from transformers import AutoProcessor, MarkupLMModel

>>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
>>> model = MarkupLMModel.from_pretrained("microsoft/markuplm-base")

>>> html_string = "<html> <head> <title>Page Title</title> </head> </html>"

>>> encoding = processor(html_string, return_tensors="pt")

>>> outputs = model(**encoding)
>>> last_hidden_states = outputs.last_hidden_state
>>> list(last_hidden_states.shape)
[1, 4, 768]
```NzDYou cannot specify both input_ids and inputs_embeds at the same timer<   z5You have to specify either input_ids or inputs_embeds)rx   rv   r   r   r  r|  g     r   )rY   rB   rC   re   r   r{   )r   r   r\  r]  )re  pooler_outputr   rf  rg  )r6   r   r\  use_return_dictr   %warn_if_padding_and_no_attention_maskry   rx   r@   r   r   rX   rz   r  rw   r>   rr   rW  next
parametersr   r  r  r   r   rf  rg  )r5   rY   rB   rC   r   r   re   r   r{   r   r\  r]  r|   rx   extended_attention_maskembedding_outputencoder_outputsr   r   s                      r9   rH   MarkupLMModel.forward  s   L 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66yQ#..*K&',,.s3KTUU%.%:!!@T@T!"ZZCN!"[[EJJvVN"0":":1"="G"G"J"9"<"<4::"<"N#&)@#@H"L }}!#%//2<<Q?II"MWWXZ[	%,,T[[-J-JBPRTVXZ[	A%%//2<<R@JJ2N	!40A+B+H+HII!>!>>I??))%)' + 
 ,,#/!5# ' 
 *!,8<8OO4UY#]3oab6III;-')77&11,==
 	
rJ   c                 P   ^ SnU H  nU[        U4S jU 5       5      4-  nM     U$ )Nr_  c              3   x   >#    U  H/  oR                  S TR                  UR                  5      5      v   M1     g7f)r   N)index_selectr  rx   )ra  
past_statebeam_idxs     r9   rc  /MarkupLMModel._reorder_cache.<locals>.<genexpr>m  s1     ncmU_--aZ=N=N1OPPcms   7:)r   )r5   r[  r  reordered_past
layer_pasts     `  r9   _reorder_cacheMarkupLMModel._reorder_cachei  s8    )Jncmnn N * rJ   )r6   r   r  r  )T)NNNNNNNNNNN)rK   rL   rM   rN   r    r  r  r  r   r   r@   
LongTensorr%  r&  r   r   r   rH   r  rP   rQ   rR   s   @r9   r  r    sM    /0C  1559596:59371559,0/3&*h
E,,-h
 !!1!12h
 !!1!12	h

 !!2!23h
 !!1!12h
 u//0h
 E--.h
   1 12h
 $D>h
 'tnh
 d^h
 
uBB	Ch
 h
V rJ   r  c                      ^  \ rS rSrU 4S jr\             SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\R                     S\\R                     S\\	   S\\	   S\\	   S\
\\R                     \4   4S jj5       rSrU =r$ )MarkupLMForQuestionAnsweringir  c                    > [         TU ]  U5        UR                  U l        [        USS9U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g NF)r  )
r   r    
num_labelsr  rx  r   r"   r$   
qa_outputsr  rs   s     r9   r    %MarkupLMForQuestionAnswering.__init__u  sU      ++%fF))F$6$68I8IJ 	rJ   rY   rB   rC   r   r   re   r   r{   start_positionsend_positionsr   r\  r]  r   c                 *   Ub  UOU R                   R                  nU R                  UUUUUUUUUUUS9nUS   nU R                  U5      nUR	                  SSS9u  nnUR                  S5      R                  5       nUR                  S5      R                  5       nSnU	b  U
b  [        U	R                  5       5      S:  a  U	R                  S5      n	[        U
R                  5       5      S:  a  U
R                  S5      n
UR                  S5      nU	R                  SU5        U
R                  SU5        [        US9nU" UU	5      nU" UU
5      nUU-   S-  nU(       d  UU4USS -   nUb  U4U-   $ U$ [        UUUUR                  UR                  S	9$ )
a  
xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Tag IDs for each token in the input sequence, padded up to config.max_depth.
xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Subscript IDs for each token in the input sequence, padded up to config.max_depth.

Examples:

```python
>>> from transformers import AutoProcessor, MarkupLMForQuestionAnswering
>>> import torch

>>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base-finetuned-websrc")
>>> model = MarkupLMForQuestionAnswering.from_pretrained("microsoft/markuplm-base-finetuned-websrc")

>>> html_string = "<html> <head> <title>My name is Niels</title> </head> </html>"
>>> question = "What's his name?"

>>> encoding = processor(html_string, questions=question, return_tensors="pt")

>>> with torch.no_grad():
...     outputs = model(**encoding)

>>> answer_start_index = outputs.start_logits.argmax()
>>> answer_end_index = outputs.end_logits.argmax()

>>> predict_answer_tokens = encoding.input_ids[0, answer_start_index : answer_end_index + 1]
>>> processor.decode(predict_answer_tokens).strip()
'Niels'
```N
rB   rC   r   r   re   r   r{   r   r\  r]  r   r   r<   r=   )ignore_indexr   )lossstart_logits
end_logitsr   rf  )r6   r  rx  r  splitsqueezer  r3  ry   clamp_r   r   r   rf  )r5   rY   rB   rC   r   r   re   r   r{   r  r  r   r\  r]  r"  r   logitsr  r  
total_lossignored_indexloss_fct
start_lossend_lossr.  s                            r9   rH   $MarkupLMForQuestionAnswering.forward  s   ^ &1%<k$++B]B]--))))%'/!5#   
 "!*1#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M""1m4  M2']CH!,@J
M:H$x/14J"J/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
rJ   )rx  r  r  )NNNNNNNNNNNNN)rK   rL   rM   rN   r    r   r   r@   r   r&  r   r   r   rH   rP   rQ   rR   s   @r9   r  r  r  sY     -115151515/3,0042604,0/3&*`
ELL)`
 !.`
 !.	`

 !.`
 !.`
 u||,`
 ELL)`
  -`
 "%,,/`
  -`
 $D>`
 'tn`
 d^`
 
uU\\"$@@	A`
 `
rJ   r  zC
    MarkupLM Model with a `token_classification` head on top.
    )custom_introc                     ^  \ rS rSrU 4S jr\            SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\R                     S\\	   S\\	   S\\	   S\
\\R                     \4   4S jj5       rSrU =r$ )MarkupLMForTokenClassificationi  c                 d  > [         TU ]  U5        UR                  U l        [        USS9U l        UR
                  b  UR
                  OUR                  n[        R                  " U5      U l	        [        R                  " UR                  UR                  5      U l        U R                  5         g r  )r   r    r  r  rx  classifier_dropoutr'   r   r&   r(   r"   r$   
classifierr  r5   r6   r  r8   s      r9   r    'MarkupLMForTokenClassification.__init__  s      ++%fF)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	rJ   rY   rB   rC   r   r   re   r   r{   labelsr   r\  r]  r   c                    Ub  UOU R                   R                  nU R                  UUUUUUUUU
UUS9nUS   nU R                  U5      nSnU	bF  [	        5       nU" UR                  SU R                   R                  5      U	R                  S5      5      nU(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  S9$ )a  
xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Tag IDs for each token in the input sequence, padded up to config.max_depth.
xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Subscript IDs for each token in the input sequence, padded up to config.max_depth.
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

Examples:

```python
>>> from transformers import AutoProcessor, AutoModelForTokenClassification
>>> import torch

>>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
>>> processor.parse_html = False
>>> model = AutoModelForTokenClassification.from_pretrained("microsoft/markuplm-base", num_labels=7)

>>> nodes = ["hello", "world"]
>>> xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span"]
>>> node_labels = [1, 2]
>>> encoding = processor(nodes=nodes, xpaths=xpaths, node_labels=node_labels, return_tensors="pt")

>>> with torch.no_grad():
...     outputs = model(**encoding)

>>> loss = outputs.loss
>>> logits = outputs.logits
```Nr  r   r<   r   r  r  r   rf  )
r6   r  rx  r  r   r   r  r   r   rf  )r5   rY   rB   rC   r   r   re   r   r{   r  r   r\  r]  r"  r   r   r  r  r.  s                      r9   rH   &MarkupLMForTokenClassification.forward  s    Z &1%<k$++B]B]--))))%'/!5#   
 "!* OOO<')H!&&r4;;+A+ABBD
 ')GABK7F)-)9TGf$EvE$$!//))	
 	
rJ   )r  r(   rx  r  NNNNNNNNNNNN)rK   rL   rM   rN   r    r   r   r@   r   r&  r   r   r   rH   rP   rQ   rR   s   @r9   r  r    sA     -115151515/3,004)-,0/3&*P
ELL)P
 !.P
 !.	P

 !.P
 !.P
 u||,P
 ELL)P
  -P
 &P
 $D>P
 'tnP
 d^P
 
uU\\"N2	3P
 P
rJ   r  z
    MarkupLM Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    c                     ^  \ rS rSrU 4S jr\            SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\R                     S\\	   S\\	   S\\	   S\
\\R                     \4   4S jj5       rSrU =r$ )!MarkupLMForSequenceClassificationiL  c                 r  > [         TU ]  U5        UR                  U l        Xl        [	        U5      U l        UR                  b  UR                  OUR                  n[        R                  " U5      U l
        [        R                  " UR                  UR                  5      U l        U R                  5         g r   )r   r    r  r6   r  rx  r  r'   r   r&   r(   r"   r$   r  r  r  s      r9   r    *MarkupLMForSequenceClassification.__init__T  s      ++%f-)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	rJ   rY   rB   rC   r   r   re   r   r{   r  r   r\  r]  r   c                 V   Ub  UOU R                   R                  nU R                  UUUUUUUUU
UUS9nUS   nU R                  U5      nU R	                  U5      nSnU	Gb  U R                   R
                  c  U R                  S:X  a  SU R                   l        OoU R                  S:  aN  U	R                  [        R                  :X  d  U	R                  [        R                  :X  a  SU R                   l        OSU R                   l        U R                   R
                  S:X  aI  [        5       nU R                  S:X  a&  U" UR                  5       U	R                  5       5      nOU" X5      nOU R                   R
                  S:X  a=  [        5       nU" UR                  SU R                  5      U	R                  S5      5      nO,U R                   R
                  S:X  a  [        5       nU" X5      nU(       d  U4USS -   nUb  U4U-   $ U$ [!        UUUR"                  UR$                  S	9$ )
a  
xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Tag IDs for each token in the input sequence, padded up to config.max_depth.
xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Subscript IDs for each token in the input sequence, padded up to config.max_depth.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

Examples:

```python
>>> from transformers import AutoProcessor, AutoModelForSequenceClassification
>>> import torch

>>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
>>> model = AutoModelForSequenceClassification.from_pretrained("microsoft/markuplm-base", num_labels=7)

>>> html_string = "<html> <head> <title>Page Title</title> </head> </html>"
>>> encoding = processor(html_string, return_tensors="pt")

>>> with torch.no_grad():
...     outputs = model(**encoding)

>>> loss = outputs.loss
>>> logits = outputs.logits
```Nr  r   
regressionsingle_label_classificationmulti_label_classificationr<   r   r  )r6   r  rx  r(   r  problem_typer  rw   r@   rX   rU   r	   r  r   r   r   r   r   rf  )r5   rY   rB   rC   r   r   re   r   r{   r  r   r\  r]  r"  r   r  r  r  r.  s                      r9   rH   )MarkupLMForSequenceClassification.forwardc  s   X &1%<k$++B]B]--))))%'/!5#   
  
]3/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'!//))	
 	
rJ   )r  r6   r(   rx  r  r  )rK   rL   rM   rN   r    r   r   r@   r   r&  r   r   r   rH   rP   rQ   rR   s   @r9   r  r  L  sB     -115151515/3,004)-,0/3&*_
ELL)_
 !._
 !.	_

 !._
 !._
 u||,_
 ELL)_
  -_
 &_
 $D>_
 'tn_
 d^_
 
uU\\"$<<	=_
 _
rJ   r  )r  r  r  r  rw  )r   ):rO   r
  r  typingr   r   r   r@   torch.utils.checkpointr   torch.nnr   r   r	   activationsr   modeling_outputsr   r   r   r   r   r   modeling_utilsr   r   r   r   utilsr   r   configuration_markuplmr   
get_loggerrK   ri  Moduler   r^   r`   r   r   r   r   r   r   r   r   r,  r)  r>  rU  rw  r  r  r  r  __all__r_  rJ   r9   <module>r     s     	 ) )    A A !   - 2 
		H	%/ bii / f4 _ _F 299  RYY RYY  bii $ryy 0!")) !CBII CN "# 0		 0hSBII SnZ
bii Z
z 
o 
 
< R+ R Rj m
#: m
 m
` 
a
%< a

a
H q
(? q
q
hrJ   