
    fTh                        S r SSKrSSKJrJrJr  SSKrSSKrSSKJr  SSK	J
r
JrJr  SSKJr  SSKJrJrJrJrJrJr  SS	KJr  SS
KJrJrJr  SSKJrJr  SSKJ r   \RB                  " \"5      r#\RH                  r% " S S\RL                  5      r' " S S\RL                  5      r( " S S\RL                  5      r)S\(0r* " S S\RL                  5      r+ " S S\RL                  5      r, " S S\RL                  5      r- " S S\RL                  5      r. " S S\RL                  5      r/ " S S \RL                  5      r0 " S! S"\RL                  5      r1 " S# S$\RL                  5      r2 " S% S&\RL                  5      r3\ " S' S(\5      5       r4\ " S) S*\45      5       r5\ " S+ S,\45      5       r6\" S-S.9 " S/ S0\45      5       r7\" S1S.9 " S2 S3\45      5       r8\ " S4 S5\45      5       r9/ S6Qr:g)7zPyTorch LayoutLM model.    N)OptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN))BaseModelOutputWithPastAndCrossAttentions,BaseModelOutputWithPoolingAndCrossAttentionsMaskedLMOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging   )LayoutLMConfigc                   @   ^  \ rS rSrSrU 4S jr     SS jrSrU =r$ )LayoutLMEmbeddings.   zGConstruct the embeddings from word, position and token_type embeddings.c                   > [         [        U ]  5         [        R                  " UR
                  UR                  UR                  S9U l        [        R                  " UR                  UR                  5      U l
        [        R                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  5      U l        [        R                  " UR                   UR                  5      U l        [%        UR                  UR&                  S9U l        [        R*                  " UR,                  5      U l        U R1                  S[2        R4                  " UR                  5      R7                  S5      SS9  g )N)padding_idxepsposition_ids)r   F)
persistent)superr   __init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingsmax_2d_position_embeddingsx_position_embeddingsy_position_embeddingsh_position_embeddingsw_position_embeddingstype_vocab_sizetoken_type_embeddingsLayoutLMLayerNormlayer_norm_eps	LayerNormDropouthidden_dropout_probdropoutregister_buffertorcharangeexpandselfconfig	__class__s     f/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/layoutlm/modeling_layoutlm.pyr%   LayoutLMEmbeddings.__init__1   s^    $02!||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2S2SU[UgUg%h"%'\\&2S2SU[UgUg%h"%'\\&2S2SU[UgUg%h"%'\\&2S2SU[UgUg%h"%'\\&2H2H&J\J\%]"*6+=+=6CXCXYzz&"<"<=ELL)G)GHOOPWXej 	 	
    c                    Ub  UR                  5       nOUR                  5       S S nUS   nUb  UR                  OUR                  nUc  U R                  S S 2S U24   nUc$  [        R                  " U[        R
                  US9nUc  U R                  U5      nUn	U R                  U5      n
 U R                  US S 2S S 2S4   5      nU R                  US S 2S S 2S4   5      nU R                  US S 2S S 2S4   5      nU R                  US S 2S S 2S4   5      nU R                  US S 2S S 2S4   US S 2S S 2S4   -
  5      nU R                  US S 2S S 2S4   US S 2S S 2S4   -
  5      nU R                  U5      nU	U
-   U-   U-   U-   U-   U-   U-   U-   nU R                  U5      nU R                  U5      nU$ ! [         a  n[        S5      UeS nAff = f)Nr"   r   dtypedevicer      r
   z:The `bbox`coordinate values should be within 0-1000 range.)sizerH   r!   r;   zeroslongr*   r,   r.   r/   
IndexErrorr0   r1   r3   r6   r9   )r?   	input_idsbboxtoken_type_idsr!   inputs_embedsinput_shape
seq_lengthrH   words_embeddingsr,   left_position_embeddingsupper_position_embeddingsright_position_embeddingslower_position_embeddingser0   r1   r3   
embeddingss                       rB   forwardLayoutLMEmbeddings.forwardB   s1     #..*K',,.s3K ^
%.%:!!@T@T,,Q^<L!"[[EJJvVN  00;M("66|D	b'+'A'A$q!Qw-'P$(,(B(B41a=(Q%(,(B(B41a=(Q%(,(B(B41a=(Q% !% : :41a=4PQSTVWPW=;X Y $ : :41a=4PQSTVWPW=;X Y $ : :> J !"&' (( (	(
 (( $$ $$ $$ 	 ^^J/
\\*-
)  	bYZ`aa	bs   /A,F: :
GGG)	r6   r9   r0   r,   r3   r1   r*   r.   r/   )NNNNN)	__name__
__module____qualname____firstlineno____doc__r%   r[   __static_attributes____classcell__rA   s   @rB   r   r   .   s&    Q
& 5 5rD   r   c                   b  ^  \ rS rSrSU 4S jjrS\R                  S\R                  4S jr      SS\R                  S\\R                     S\\R                     S	\\R                     S
\\R                     S\\
\
\R                           S\\   S\
\R                     4S jjrSrU =r$ )LayoutLMSelfAttention{   c                   > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eUR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  5      U l        U=(       d    [#        USS5      U l        U R$                  S:X  d  U R$                  S	:X  aG  UR&                  U l        [        R(                  " S
UR&                  -  S-
  U R                  5      U l        UR,                  U l        g )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()position_embedding_typeabsoluterelative_keyrelative_key_queryrI   r   )r$   r%   r(   num_attention_headshasattr
ValueErrorintattention_head_sizeall_head_sizer   Linearquerykeyvaluer7   attention_probs_dropout_probr9   getattrrk   r+   r&   distance_embedding
is_decoderr?   r@   rk   rA   s      rB   r%   LayoutLMSelfAttention.__init__|   s    : ::a?PVXhHiHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF'> (
'-zC
$ ''>9T=Y=Y]q=q+1+I+ID(&(ll1v7U7U3UXY3Y[_[s[s&tD# ++rD   xreturnc                     UR                  5       S S U R                  U R                  4-   nUR                  U5      nUR	                  SSSS5      $ )Nr"   r   rI   r   r
   )rJ   ro   rs   viewpermute)r?   r   new_x_shapes      rB   transpose_for_scores*LayoutLMSelfAttention.transpose_for_scores   sL    ffhsmt'?'?AYAY&ZZFF;yyAq!$$rD   hidden_statesattention_mask	head_maskencoder_hidden_statesencoder_attention_maskpast_key_valueoutput_attentionsc                 V   U R                  U5      nUS Ln	U	(       a  Ub  US   n
US   nUnGOU	(       aC  U R                  U R                  U5      5      n
U R                  U R                  U5      5      nUnOUbu  U R                  U R                  U5      5      n
U R                  U R                  U5      5      n[        R
                  " US   U
/SS9n
[        R
                  " US   U/SS9nO@U R                  U R                  U5      5      n
U R                  U R                  U5      5      nU R                  U5      nUS LnU R                  (       a  X4n[        R                  " XR                  SS5      5      nU R                  S:X  d  U R                  S:X  Ga  UR                  S   U
R                  S   nnU(       aB  [        R                  " US-
  [        R                  UR                  S	9R                  SS5      nO>[        R                  " U[        R                  UR                  S	9R                  SS5      n[        R                  " U[        R                  UR                  S	9R                  SS5      nUU-
  nU R!                  UU R"                  -   S-
  5      nUR%                  UR&                  S
9nU R                  S:X  a  [        R(                  " SUU5      nUU-   nOHU R                  S:X  a8  [        R(                  " SUU5      n[        R(                  " SU
U5      nUU-   U-   nU[*        R,                  " U R.                  5      -  nUb  X-   n[0        R2                  R5                  USS9nU R7                  U5      nUb  UU-  n[        R                  " UU5      nUR9                  SSSS5      R;                  5       nUR=                  5       S S U R>                  4-   nUR                  U5      nU(       a  UU4OU4nU R                  (       a  UU4-   nU$ )Nr   r   rI   dimr"   rm   rn   rF   rG   zbhld,lrd->bhlrzbhrd,lrd->bhlrr
   ) rv   r   rw   rx   r;   catr|   matmul	transposerk   shapetensorrL   rH   r   r<   r{   r+   torG   einsummathsqrtrs   r   
functionalsoftmaxr9   r   
contiguousrJ   rt   )r?   r   r   r   r   r   r   r   mixed_query_layeris_cross_attention	key_layervalue_layerquery_layer	use_cacheattention_scoresquery_length
key_lengthposition_ids_lposition_ids_rdistancepositional_embeddingrelative_position_scoresrelative_position_scores_queryrelative_position_scores_keyattention_probscontext_layernew_context_layer_shapeoutputss                               rB   r[   LayoutLMSelfAttention.forward   s    !JJ}5
 3$>."<&q)I(+K3N11$((;P2QRI33DJJ?T4UVK3N'11$((=2IJI33DJJ}4MNK		>!#4i"@aHI))^A%6$D!LK11$((=2IJI33DJJ}4MNK//0AB"$.	?? (5N !<<5H5HR5PQ''>9T=Y=Y]q=q'2'8'8';Y__Q=O*L!&j1nEJJWdWkWk!l!q!q" "'l%**UbUiUi!j!o!oprtu!v"\\*EJJ}OcOcdiijkmopN%6H#'#:#:8dFbFb;bef;f#g #7#:#:ARAR#:#S ++~=+0<<8H+Wk+l(#36N#N --1EE16>NP[]q1r./4||<LiYm/n,#36T#TWs#s +dii8P8P.QQ%/@ --//0@b/I ,,7  -	9O_kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S%**+BC6G=/2mM]?? 11GrD   )rt   rs   r{   r9   r|   rw   r+   ro   rk   rv   rx   NNNNNNF)r]   r^   r_   r`   r%   r;   Tensorr   r   FloatTensorr   boolr[   rb   rc   rd   s   @rB   rf   rf   {   s    ,4%ell %u|| % 7;15=A>BDH,1c||c !!2!23c E--.	c
  ((9(9:c !)):): ;c !uU->->'?!@Ac $D>c 
u||	c crD   rf   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )LayoutLMSelfOutputi  c                 (  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  5      U l
        g Nr   )r$   r%   r   ru   r(   denser6   r5   r7   r8   r9   r>   s     rB   r%   LayoutLMSelfOutput.__init__  s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=rD   r   input_tensorr   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   r   r9   r6   r?   r   r   s      rB   r[   LayoutLMSelfOutput.forward	  5    

=1]3}'CDrD   r6   r   r9   
r]   r^   r_   r`   r%   r;   r   r[   rb   rc   rd   s   @rB   r   r     6    >U\\  RWR^R^  rD   r   eagerc                   .  ^  \ rS rSrSU 4S jjrS r      SS\R                  S\\R                     S\\R                     S\\R                     S\\R                     S	\\
\
\R                           S
\\   S\
\R                     4S jjrSrU =r$ )LayoutLMAttentioni  c                    > [         TU ]  5         [        UR                     " XS9U l        [        U5      U l        [        5       U l        g )Nrk   )	r$   r%   LAYOUTLM_SELF_ATTENTION_CLASSES_attn_implementationr?   r   outputsetpruned_headsr}   s      rB   r%   LayoutLMAttention.__init__  s@    3F4O4OP
	 )0ErD   c                 6   [        U5      S:X  a  g [        XR                  R                  U R                  R                  U R
                  5      u  p[        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l	        [        U R                  R                  USS9U R                  l        U R                  R                  [        U5      -
  U R                  l        U R                  R                  U R                  R                  -  U R                  l        U R
                  R                  U5      U l        g )Nr   r   r   )lenr   r?   ro   rs   r   r   rv   rw   rx   r   r   rt   union)r?   headsindexs      rB   prune_headsLayoutLMAttention.prune_heads  s   u:?79900$))2O2OQUQbQb

 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:rD   r   r   r   r   r   r   r   r   c           	      p    U R                  UUUUUUU5      nU R                  US   U5      n	U	4USS  -   n
U
$ )Nr   r   )r?   r   )r?   r   r   r   r   r   r   r   self_outputsattention_outputr   s              rB   r[   LayoutLMAttention.forward1  sW     yy!"
  ;;|AF#%QR(88rD   )r   r   r?   r   r   )r]   r^   r_   r`   r%   r   r;   r   r   r   r   r   r[   rb   rc   rd   s   @rB   r   r     s    ";* 7;15=A>BDH,1|| !!2!23 E--.	
  ((9(9: !)):): ; !uU->->'?!@A $D> 
u||	 rD   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )LayoutLMIntermediateiJ  c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g r   )r$   r%   r   ru   r(   intermediate_sizer   
isinstance
hidden_actstrr   intermediate_act_fnr>   s     rB   r%   LayoutLMIntermediate.__init__K  s`    YYv1163K3KL
f''--'-f.?.?'@D$'-'8'8D$rD   r   r   c                 J    U R                  U5      nU R                  U5      nU$ r   r   r   r?   r   s     rB   r[   LayoutLMIntermediate.forwardS  s&    

=100?rD   r   r   rd   s   @rB   r   r   J  s(    9U\\ ell  rD   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )LayoutLMOutputiZ  c                 (  > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g r   )r$   r%   r   ru   r   r(   r   r6   r5   r7   r8   r9   r>   s     rB   r%   LayoutLMOutput.__init__[  s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=rD   r   r   r   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   r   r   s      rB   r[   LayoutLMOutput.forwarda  r   rD   r   r   rd   s   @rB   r   r   Z  r   rD   r   c                   *  ^  \ rS rSrU 4S jr      SS\R                  S\\R                     S\\R                     S\\R                     S\\R                     S\\	\	\R                           S	\\
   S
\	\R                     4S jjrS rSrU =r$ )LayoutLMLayerii  c                 t  > [         TU ]  5         UR                  U l        SU l        [	        U5      U l        UR                  U l        UR                  U l        U R                  (       a.  U R                  (       d  [        U  S35      e[	        USS9U l	        [        U5      U l        [        U5      U l        g )Nr   z> should be used as a decoder model if cross attention is addedrl   r   )r$   r%   chunk_size_feed_forwardseq_len_dimr   	attentionr|   add_cross_attentionrq   crossattentionr   intermediater   r   r>   s     rB   r%   LayoutLMLayer.__init__j  s    '-'E'E$*62 ++#)#=#= ##?? D6)g!hii"3FT^"_D08$V,rD   r   r   r   r   r   r   r   r   c           	         Ub  US S OS nU R                  UUUUUS9n	U	S   n
U R                  (       a  U	SS nU	S   nOU	SS  nS nU R                  (       aZ  UbW  [        U S5      (       d  [        SU  S35      eUb  US	S  OS nU R	                  U
UUUUUU5      nUS   n
XSS -   nUS   nWU-   n[        U R                  U R                  U R                  U
5      nU4U-   nU R                  (       a  UW4-   nU$ )
NrI   )r   r   r   r   r"   r   z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r   )	r   r|   rp   rq   r   r   feed_forward_chunkr   r   )r?   r   r   r   r   r   r   r   self_attn_past_key_valueself_attention_outputsr   r   present_key_valuecross_attn_present_key_valuecross_attn_past_key_valuecross_attention_outputslayer_outputs                    rB   r[   LayoutLMLayer.forwardx  s}    :H9S>"1#5Y] !%/3 "0 "
 2!4 ??,Qr2G 6r :,QR0G'+$??4@4!122 =dV DD D  @N?Yrs(;_c%&*&9&9 %&)!'#  7q9" ==G ,C2+F( 14P P0##T%A%A4CSCSUe
  /G+ ??!2 44GrD   c                 J    U R                  U5      nU R                  X!5      nU$ r   )r   r   )r?   r   intermediate_outputr  s       rB   r    LayoutLMLayer.feed_forward_chunk  s)    "//0@A{{#6IrD   )r   r   r   r   r   r|   r   r   r   )r]   r^   r_   r`   r%   r;   r   r   r   r   r   r[   r   rb   rc   rd   s   @rB   r   r   i  s    -" 7;15=A>BDH,1?||? !!2!23? E--.	?
  ((9(9:? !)):): ;? !uU->->'?!@A? $D>? 
u||	?B rD   r   c                   R  ^  \ rS rSrU 4S jr         SS\R                  S\\R                     S\\R                     S\\R                     S\\R                     S\\	\	\R                           S	\\
   S
\\
   S\\
   S\\
   S\\	\R                     \4   4S jjrSrU =r$ )LayoutLMEncoderi  c                    > [         TU ]  5         Xl        [        R                  " [        UR                  5       Vs/ s H  n[        U5      PM     sn5      U l        SU l	        g s  snf )NF)
r$   r%   r@   r   
ModuleListrangenum_hidden_layersr   layergradient_checkpointing)r?   r@   _rA   s      rB   r%   LayoutLMEncoder.__init__  sR    ]]5IaIaCb#cCbaM&$9Cb#cd
&+# $ds   A&r   r   r   r   r   past_key_valuesr   r   output_hidden_statesreturn_dictr   c                 8   U	(       a  SOS nU(       a  SOS nU(       a  U R                   R                  (       a  SOS nU R                  (       a/  U R                  (       a  U(       a  [        R                  S5        SnU(       a  SOS n[        U R                  5       H  u  nnU	(       a  X4-   nUb  X?   OS nUb  Xo   OS nU R                  (       a4  U R                  (       a#  U R                  UR                  UUUUUUU5      nOU" UUUUUUU5      nUS   nU(       a	  UUS   4-  nU(       d  M  UUS   4-   nU R                   R                  (       d  M  UUS   4-   nM     U	(       a  X4-   nU
(       d  [        S UUUUU4 5       5      $ [        UUUUUS	9$ )
N zZ`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...Fr   r"   r   rI   c              3   0   #    U  H  nUc  M  Uv   M     g 7fr   r  ).0vs     rB   	<genexpr>*LayoutLMEncoder.forward.<locals>.<genexpr>	  s"      
A  s   	)last_hidden_stater  r   
attentionscross_attentions)r@   r   r  trainingloggerwarning_once	enumerater  _gradient_checkpointing_func__call__tupler   )r?   r   r   r   r   r   r  r   r   r  r  all_hidden_statesall_self_attentionsall_cross_attentionsnext_decoder_cacheilayer_modulelayer_head_maskr   layer_outputss                       rB   r[   LayoutLMEncoder.forward  s    #7BD$5b4%64;;;Z;Zr`d&&4==##p "	#,R$(4OA|#$58H$H!.7.CilO3B3N_/TXN**t}} $ A A ))!"#)*"%	! !-!"#)*"%! *!,M"}R'8&::"  &9]1=M<O&O#;;222+?=QRCSBU+U(G  5J   14D D 
 "&%'(
 
 
 9+.+*1
 	
rD   )r@   r  r  )	NNNNNNFFT)r]   r^   r_   r`   r%   r;   r   r   r   r   r   r   r   r[   rb   rc   rd   s   @rB   r	  r	    s   , 7;15=A>BEI$(,1/4&*S
||S
 !!2!23S
 E--.	S

  ((9(9:S
 !)):): ;S
 "%e.?.?(@"ABS
 D>S
 $D>S
 'tnS
 d^S
 
uU\\"$MM	NS
 S
rD   r	  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )LayoutLMPooleri  c                    > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " 5       U l        g r   )r$   r%   r   ru   r(   r   Tanh
activationr>   s     rB   r%   LayoutLMPooler.__init__  s9    YYv1163E3EF
'')rD   r   r   c                 \    US S 2S4   nU R                  U5      nU R                  U5      nU$ )Nr   )r   r3  )r?   r   first_token_tensorpooled_outputs       rB   r[   LayoutLMPooler.forward$  s6     +1a40

#566rD   )r3  r   r   rd   s   @rB   r0  r0    s(    $
U\\ ell  rD   r0  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )LayoutLMPredictionHeadTransformi.  c                 p  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        UR                  [        5      (       a  [        UR                     U l
        OUR                  U l
        [        R                  " UR                  UR                  S9U l        g r   )r$   r%   r   ru   r(   r   r   r   r   r   transform_act_fnr6   r5   r>   s     rB   r%   (LayoutLMPredictionHeadTransform.__init__/  s~    YYv1163E3EF
f''--$*6+<+<$=D!$*$5$5D!f&8&8f>S>STrD   r   r   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r   r<  r6   r   s     rB   r[   'LayoutLMPredictionHeadTransform.forward8  s4    

=1--m<}5rD   )r6   r   r<  r   rd   s   @rB   r:  r:  .  s)    UU\\ ell  rD   r:  c                   4   ^  \ rS rSrU 4S jrS rS rSrU =r$ )LayoutLMLMPredictionHeadi@  c                 H  > [         TU ]  5         [        U5      U l        [        R
                  " UR                  UR                  SS9U l        [        R                  " [        R                  " UR                  5      5      U l        U R                  U R                  l        g )NF)bias)r$   r%   r:  	transformr   ru   r(   r'   decoder	Parameterr;   rK   rC  r>   s     rB   r%   !LayoutLMLMPredictionHead.__init__A  sm    8@ yy!3!3V5F5FUSLLV->->!?@	 !IIrD   c                 :    U R                   U R                  l         g r   )rC  rE  r?   s    rB   _tie_weights%LayoutLMLMPredictionHead._tie_weightsN  s     IIrD   c                 J    U R                  U5      nU R                  U5      nU$ r   )rD  rE  r   s     rB   r[    LayoutLMLMPredictionHead.forwardQ  s$    }5]3rD   )rC  rE  rD  )	r]   r^   r_   r`   r%   rJ  r[   rb   rc   rd   s   @rB   rA  rA  @  s    && rD   rA  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )LayoutLMOnlyMLMHeadiX  c                 B   > [         TU ]  5         [        U5      U l        g r   )r$   r%   rA  predictionsr>   s     rB   r%   LayoutLMOnlyMLMHead.__init__Y  s    3F;rD   sequence_outputr   c                 (    U R                  U5      nU$ r   rQ  )r?   rS  prediction_scoress      rB   r[   LayoutLMOnlyMLMHead.forward]  s     ,,_=  rD   rU  r   rd   s   @rB   rO  rO  X  s(    <!u|| ! ! !rD   rO  c                   &    \ rS rSr\rSrSrS rSr	g)LayoutLMPreTrainedModelib  layoutlmTc                 x   [        U[        R                  5      (       ak  UR                  R                  R                  SU R                  R                  S9  UR                  b%  UR                  R                  R                  5         gg[        U[        R                  5      (       ax  UR                  R                  R                  SU R                  R                  S9  UR                  b2  UR                  R                  UR                     R                  5         gg[        U[        5      (       aJ  UR                  R                  R                  5         UR                  R                  R                  S5        g[        U[        5      (       a%  UR                  R                  R                  5         gg)zInitialize the weightsg        )meanstdN      ?)r   r   ru   weightdatanormal_r@   initializer_rangerC  zero_r&   r   r4   fill_rA  )r?   modules     rB   _init_weights%LayoutLMPreTrainedModel._init_weightsh  s0   fbii(( MM&&CT[[5R5R&S{{&  &&( '--MM&&CT[[5R5R&S!!-""6#5#56<<> . 122KK""$MM$$S) 899KK""$ :rD   r  N)
r]   r^   r_   r`   r   config_classbase_model_prefixsupports_gradient_checkpointingrf  rb   r  rD   rB   rY  rY  b  s    !L"&*#%rD   rY  c                     ^  \ rS rSrU 4S jrS rS rS r\            SS\	\
R                     S\	\
R                     S\	\
R                     S	\	\
R                     S
\	\
R                     S\	\
R                     S\	\
R                     S\	\
R                     S\	\
R                     S\	\   S\	\   S\	\   S\\\4   4S jj5       rSrU =r$ )LayoutLMModeli{  c                    > [         [        U ]  U5        Xl        [	        U5      U l        [        U5      U l        [        U5      U l	        U R                  5         g r   )r$   rl  r%   r@   r   rZ   r	  encoderr0  pooler	post_initr>   s     rB   r%   LayoutLMModel.__init__}  sG    mT+F3,V4&v.$V, 	rD   c                 .    U R                   R                  $ r   rZ   r*   rI  s    rB   get_input_embeddings"LayoutLMModel.get_input_embeddings  s    ...rD   c                 $    XR                   l        g r   rs  )r?   rx   s     rB   set_input_embeddings"LayoutLMModel.set_input_embeddings  s    */'rD   c                     UR                  5        H7  u  p#U R                  R                  U   R                  R	                  U5        M9     g)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
N)itemsrn  r  r   r   )r?   heads_to_pruner  r   s       rB   _prune_headsLayoutLMModel._prune_heads  s<    
 +002LELLu%//;;EB 3rD   rN   rO   r   rP   r!   r   rQ   r   r   r   r  r  r   c           	      8   U
b  U
OU R                   R                  n
Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  Ub  [	        S5      eUb"  U R                  X5        UR                  5       nO"Ub  UR                  5       SS nO[	        S5      eUb  UR                  OUR                  nUc  [        R                  " XS9nUc$  [        R                  " U[        R                  US9nUc'  [        R                  " US-   [        R                  US9nUR                  S5      R                  S	5      nUR                  U R                  S
9nSU-
  [        R                  " U R                  5      R                   -  nUb  UR#                  5       S:X  ah  UR                  S5      R                  S5      R                  S5      R                  S5      nUR%                  U R                   R&                  SSSS5      nOCUR#                  5       S	:X  a/  UR                  S5      R                  S5      R                  S5      nUR                  [)        U R+                  5       5      R                  S
9nOS/U R                   R&                  -  nU R-                  UUUUUS9nU R/                  UUUU
UUS9nUS   nU R1                  U5      nU(       d
  UU4USS -   $ [3        UUUR4                  UR6                  UR8                  S9$ )a  
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
    Bounding boxes of each input sequence tokens. Selected in the range `[0,
    config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
    format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
    y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.

Examples:

```python
>>> from transformers import AutoTokenizer, LayoutLMModel
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
>>> model = LayoutLMModel.from_pretrained("microsoft/layoutlm-base-uncased")

>>> words = ["Hello", "world"]
>>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

>>> token_boxes = []
>>> for word, box in zip(words, normalized_word_boxes):
...     word_tokens = tokenizer.tokenize(word)
...     token_boxes.extend([box] * len(word_tokens))
>>> # add bounding boxes of cls + sep tokens
>>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

>>> encoding = tokenizer(" ".join(words), return_tensors="pt")
>>> input_ids = encoding["input_ids"]
>>> attention_mask = encoding["attention_mask"]
>>> token_type_ids = encoding["token_type_ids"]
>>> bbox = torch.tensor([token_boxes])

>>> outputs = model(
...     input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids
... )

>>> last_hidden_states = outputs.last_hidden_state
```NzDYou cannot specify both input_ids and inputs_embeds at the same timer"   z5You have to specify either input_ids or inputs_embeds)rH   rF   )   r   rI   r   r^  r   )rN   rO   r!   rP   rQ   )r   r   r  r  )r  pooler_outputr   r  r  )r@   r   r  use_return_dictrq   %warn_if_padding_and_no_attention_maskrJ   rH   r;   onesrK   rL   	unsqueezer   rG   finfominr   r=   r  next
parametersrZ   rn  ro  r   r   r  r  )r?   rN   rO   r   rP   r!   r   rQ   r   r   r   r  r  rR   rH   extended_attention_maskembedding_outputencoder_outputsrS  r7  s                       rB   r[   LayoutLMModel.forward  s   l 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66yQ#..*K&',,.s3KTUU%.%:!!@T@T!"ZZCN!"[[EJJvVN<;;{T1FSD"0":":1"="G"G"J"9"<"<4::"<"N#&)@#@EKKPTPZPZD[D_D_"_ }}!#%//2<<Q?II"MWWXZ[	%,,T[[-J-JBPRTVXZ[	A%%//2<<R@JJ2N	!40A+B+H+HII!>!>>I??%)' + 
 ,,#/!5# ' 
 *!,O4#]3oab6III;-')77&11,==
 	
rD   )r@   rZ   rn  ro  NNNNNNNNNNNN)r]   r^   r_   r`   r%   rt  rw  r|  r   r   r;   
LongTensorr   r   r   r   r   r[   rb   rc   rd   s   @rB   rl  rl  {  s[   	/0C  15+/6:59371559=A>B,0/3&*y
E,,-y
 u''(y
 !!2!23	y

 !!1!12y
 u//0y
 E--.y
   1 12y
  ((9(9:y
 !)):): ;y
 $D>y
 'tny
 d^y
 
uBB	Cy
 y
rD   rl  c                      ^  \ rS rSrSS/rU 4S jrS rS rS r\	             SS\
\R                     S	\
\R                     S
\
\R                     S\
\R                     S\
\R                     S\
\R                     S\
\R                     S\
\R                     S\
\R                     S\
\R                     S\
\   S\
\   S\
\   S\\\4   4S jj5       rSrU =r$ )LayoutLMForMaskedLMi  zcls.predictions.decoder.biaszcls.predictions.decoder.weightc                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        U R                  5         g r   )r$   r%   rl  rZ  rO  clsrp  r>   s     rB   r%   LayoutLMForMaskedLM.__init__  s4     %f-&v. 	rD   c                 B    U R                   R                  R                  $ r   rZ  rZ   r*   rI  s    rB   rt  (LayoutLMForMaskedLM.get_input_embeddings       }}''777rD   c                 B    U R                   R                  R                  $ r   )r  rQ  rE  rI  s    rB   get_output_embeddings)LayoutLMForMaskedLM.get_output_embeddings#  s    xx##+++rD   c                     XR                   R                  l        UR                  U R                   R                  l        g r   )r  rQ  rE  rC  )r?   new_embeddingss     rB   set_output_embeddings)LayoutLMForMaskedLM.set_output_embeddings&  s*    '5$$2$7$7!rD   rN   rO   r   rP   r!   r   rQ   labelsr   r   r   r  r  r   c                    Ub  UOU R                   R                  nU R                  UUUUUUUU	U
UUUS9nUS   nU R                  U5      nSnUbF  [	        5       nU" UR                  SU R                   R                  5      UR                  S5      5      nU(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  S9$ )a  
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
    Bounding boxes of each input sequence tokens. Selected in the range `[0,
    config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
    format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
    y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`

Examples:

```python
>>> from transformers import AutoTokenizer, LayoutLMForMaskedLM
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
>>> model = LayoutLMForMaskedLM.from_pretrained("microsoft/layoutlm-base-uncased")

>>> words = ["Hello", "[MASK]"]
>>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

>>> token_boxes = []
>>> for word, box in zip(words, normalized_word_boxes):
...     word_tokens = tokenizer.tokenize(word)
...     token_boxes.extend([box] * len(word_tokens))
>>> # add bounding boxes of cls + sep tokens
>>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

>>> encoding = tokenizer(" ".join(words), return_tensors="pt")
>>> input_ids = encoding["input_ids"]
>>> attention_mask = encoding["attention_mask"]
>>> token_type_ids = encoding["token_type_ids"]
>>> bbox = torch.tensor([token_boxes])

>>> labels = tokenizer("Hello world", return_tensors="pt")["input_ids"]

>>> outputs = model(
...     input_ids=input_ids,
...     bbox=bbox,
...     attention_mask=attention_mask,
...     token_type_ids=token_type_ids,
...     labels=labels,
... )

>>> loss = outputs.loss
```N)
r   rP   r!   r   rQ   r   r   r   r  r  r   r"   rI   losslogitsr   r  )
r@   r  rZ  r  r   r   r'   r   r   r  )r?   rN   rO   r   rP   r!   r   rQ   r  r   r   r   r  r  r   rS  rV  masked_lm_lossloss_fctr   s                       rB   r[   LayoutLMForMaskedLM.forward*  s   B &1%<k$++B]B]--))%'"7#9/!5#   
 "!* HH_5')H%!&&r4;;+A+ABBN
 ')GABK7F3A3M^%.YSYY$!//))	
 	
rD   )r  rZ  )NNNNNNNNNNNNN)r]   r^   r_   r`   _tied_weights_keysr%   rt  r  r  r   r   r;   r  r   r   r   r   r   r[   rb   rc   rd   s   @rB   r  r    s~   8:Z[8,8  15+/6:59371559-1=A>B,0/3&*e
E,,-e
 u''(e
 !!2!23	e

 !!1!12e
 u//0e
 E--.e
   1 12e
 ))*e
  ((9(9:e
 !)):): ;e
 $D>e
 'tne
 d^e
 
un$	%e
 e
rD   r  z
    LayoutLM Model with a sequence classification head on top (a linear layer on top of the pooled output) e.g. for
    document image classification tasks such as the [RVL-CDIP](https://www.cs.cmu.edu/~aharley/rvl-cdip/) dataset.
    )custom_introc                   x  ^  \ rS rSrU 4S jrS r\           SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\R                     S\\   S\\   S\\   S\\\4   4S jj5       rSrU =r$ )!LayoutLMForSequenceClassificationi  c                 0  > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r   r$   r%   
num_labelsrl  rZ  r   r7   r8   r9   ru   r(   
classifierrp  r>   s     rB   r%   *LayoutLMForSequenceClassification.__init__  i      ++%f-zz&"<"<=))F$6$68I8IJ 	rD   c                 B    U R                   R                  R                  $ r   r  rI  s    rB   rt  6LayoutLMForSequenceClassification.get_input_embeddings  r  rD   rN   rO   r   rP   r!   r   rQ   r  r   r  r  r   c                 T   Ub  UOU R                   R                  nU R                  UUUUUUUU	U
US9
nUS   nU R                  U5      nU R	                  U5      nSnUGb  U R                   R
                  c  U R                  S:X  a  SU R                   l        OoU R                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                   l        OSU R                   l        U R                   R
                  S:X  aI  [        5       nU R                  S:X  a&  U" UR                  5       UR                  5       5      nOU" X5      nOU R                   R
                  S:X  a=  [        5       nU" UR                  SU R                  5      UR                  S5      5      nO,U R                   R
                  S:X  a  [        5       nU" X5      nU(       d  U4USS -   nUb  U4U-   $ U$ [!        UUUR"                  UR$                  S	9$ )
a
  
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
    Bounding boxes of each input sequence tokens. Selected in the range `[0,
    config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
    format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
    y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

Examples:

```python
>>> from transformers import AutoTokenizer, LayoutLMForSequenceClassification
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
>>> model = LayoutLMForSequenceClassification.from_pretrained("microsoft/layoutlm-base-uncased")

>>> words = ["Hello", "world"]
>>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

>>> token_boxes = []
>>> for word, box in zip(words, normalized_word_boxes):
...     word_tokens = tokenizer.tokenize(word)
...     token_boxes.extend([box] * len(word_tokens))
>>> # add bounding boxes of cls + sep tokens
>>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

>>> encoding = tokenizer(" ".join(words), return_tensors="pt")
>>> input_ids = encoding["input_ids"]
>>> attention_mask = encoding["attention_mask"]
>>> token_type_ids = encoding["token_type_ids"]
>>> bbox = torch.tensor([token_boxes])
>>> sequence_label = torch.tensor([1])

>>> outputs = model(
...     input_ids=input_ids,
...     bbox=bbox,
...     attention_mask=attention_mask,
...     token_type_ids=token_type_ids,
...     labels=sequence_label,
... )

>>> loss = outputs.loss
>>> logits = outputs.logits
```N
rN   rO   r   rP   r!   r   rQ   r   r  r  r   
regressionsingle_label_classificationmulti_label_classificationr"   rI   r  )r@   r  rZ  r9   r  problem_typer  rG   r;   rL   rr   r	   squeezer   r   r   r   r   r  )r?   rN   rO   r   rP   r!   r   rQ   r  r   r  r  r   r7  r  r  r  r   s                     rB   r[   )LayoutLMForSequenceClassification.forward  s   ~ &1%<k$++B]B]--))%'/!5#   
  
]3/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'!//))	
 	
rD   r  r9   rZ  r  NNNNNNNNNNN)r]   r^   r_   r`   r%   rt  r   r   r;   r  r   r   r   r   r   r[   rb   rc   rd   s   @rB   r  r    s7   8  15+/6:59371559-1,0/3&*q
E,,-q
 u''(q
 !!2!23	q

 !!1!12q
 u//0q
 E--.q
   1 12q
 ))*q
 $D>q
 'tnq
 d^q
 
u..	/q
 q
rD   r  a3  
    LayoutLM Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for
    sequence labeling (information extraction) tasks such as the [FUNSD](https://guillaumejaume.github.io/FUNSD/)
    dataset and the [SROIE](https://rrc.cvc.uab.es/?ch=13) dataset.
    c                   x  ^  \ rS rSrU 4S jrS r\           SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\R                     S\\   S\\   S\\   S\\\4   4S jj5       rSrU =r$ )LayoutLMForTokenClassificationi  c                 0  > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r   r  r>   s     rB   r%   'LayoutLMForTokenClassification.__init__$  r  rD   c                 B    U R                   R                  R                  $ r   r  rI  s    rB   rt  3LayoutLMForTokenClassification.get_input_embeddings.  r  rD   rN   rO   r   rP   r!   r   rQ   r  r   r  r  r   c                    Ub  UOU R                   R                  nU R                  UUUUUUUU	U
US9
nUS   nU R                  U5      nU R	                  U5      nSnUb<  [        5       nU" UR                  SU R                  5      UR                  S5      5      nU(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  S9$ )a  
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
    Bounding boxes of each input sequence tokens. Selected in the range `[0,
    config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
    format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
    y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

Examples:

```python
>>> from transformers import AutoTokenizer, LayoutLMForTokenClassification
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
>>> model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-base-uncased")

>>> words = ["Hello", "world"]
>>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

>>> token_boxes = []
>>> for word, box in zip(words, normalized_word_boxes):
...     word_tokens = tokenizer.tokenize(word)
...     token_boxes.extend([box] * len(word_tokens))
>>> # add bounding boxes of cls + sep tokens
>>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

>>> encoding = tokenizer(" ".join(words), return_tensors="pt")
>>> input_ids = encoding["input_ids"]
>>> attention_mask = encoding["attention_mask"]
>>> token_type_ids = encoding["token_type_ids"]
>>> bbox = torch.tensor([token_boxes])
>>> token_labels = torch.tensor([1, 1, 0, 0]).unsqueeze(0)  # batch size of 1

>>> outputs = model(
...     input_ids=input_ids,
...     bbox=bbox,
...     attention_mask=attention_mask,
...     token_type_ids=token_type_ids,
...     labels=token_labels,
... )

>>> loss = outputs.loss
>>> logits = outputs.logits
```Nr  r   r"   rI   r  )r@   r  rZ  r9   r  r   r   r  r   r   r  )r?   rN   rO   r   rP   r!   r   rQ   r  r   r  r  r   rS  r  r  r  r   s                     rB   r[   &LayoutLMForTokenClassification.forward1  s    z &1%<k$++B]B]--))%'/!5#   
 "!*,,71')HFKKDOO<fkk"oNDY,F)-)9TGf$EvE$!//))	
 	
rD   r  r  )r]   r^   r_   r`   r%   rt  r   r   r;   r  r   r   r   r   r   r[   rb   rc   rd   s   @rB   r  r    s7   8  15+/6:59371559-1,0/3&*^
E,,-^
 u''(^
 !!2!23	^

 !!1!12^
 u//0^
 E--.^
   1 12^
 ))*^
 $D>^
 'tn^
 d^^
 
u++	,^
 ^
rD   r  c                     ^  \ rS rSrSU 4S jjrS r\            SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\R                     S\\R                     S\\   S\\   S\\   S\\\4   4S jj5       rSrU =r$ )LayoutLMForQuestionAnsweringi  c                    > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g)z}
has_visual_segment_embedding (`bool`, *optional*, defaults to `True`):
    Whether or not to add visual segment embeddings.
N)
r$   r%   r  rl  rZ  r   ru   r(   
qa_outputsrp  )r?   r@   has_visual_segment_embeddingrA   s      rB   r%   %LayoutLMForQuestionAnswering.__init__  sU    
 	  ++%f-))F$6$68I8IJ 	rD   c                 B    U R                   R                  R                  $ r   r  rI  s    rB   rt  1LayoutLMForQuestionAnswering.get_input_embeddings  r  rD   rN   rO   r   rP   r!   r   rQ   start_positionsend_positionsr   r  r  r   c                 (   Ub  UOU R                   R                  nU R                  UUUUUUUU
UUS9
nUS   nU R                  U5      nUR	                  SSS9u  nnUR                  S5      R                  5       nUR                  S5      R                  5       nSnUb  U	b  [        UR                  5       5      S:  a  UR                  S5      n[        U	R                  5       5      S:  a  U	R                  S5      n	UR                  S5      nUR                  SU5      nU	R                  SU5      n	[        US9nU" UU5      nU" UU	5      nUU-   S-  nU(       d  UU4USS -   nUb  U4U-   $ U$ [        UUUUR                  UR                  S	9$ )
a  
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
    Bounding boxes of each input sequence tokens. Selected in the range `[0,
    config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
    format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
    y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.

Example:

In the example below, we prepare a question + context pair for the LayoutLM model. It will give us a prediction
of what it thinks the answer is (the span of the answer within the texts parsed from the image).

```python
>>> from transformers import AutoTokenizer, LayoutLMForQuestionAnswering
>>> from datasets import load_dataset
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True)
>>> model = LayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac")

>>> dataset = load_dataset("nielsr/funsd", split="train", trust_remote_code=True)
>>> example = dataset[0]
>>> question = "what's his name?"
>>> words = example["words"]
>>> boxes = example["bboxes"]

>>> encoding = tokenizer(
...     question.split(), words, is_split_into_words=True, return_token_type_ids=True, return_tensors="pt"
... )
>>> bbox = []
>>> for i, s, w in zip(encoding.input_ids[0], encoding.sequence_ids(0), encoding.word_ids(0)):
...     if s == 1:
...         bbox.append(boxes[w])
...     elif i == tokenizer.sep_token_id:
...         bbox.append([1000] * 4)
...     else:
...         bbox.append([0] * 4)
>>> encoding["bbox"] = torch.tensor([bbox])

>>> word_ids = encoding.word_ids(0)
>>> outputs = model(**encoding)
>>> loss = outputs.loss
>>> start_scores = outputs.start_logits
>>> end_scores = outputs.end_logits
>>> start, end = word_ids[start_scores.argmax(-1)], word_ids[end_scores.argmax(-1)]
>>> print(" ".join(words[start : end + 1]))
M. Hamann P. Harper, P. Martinez
```Nr  r   r   r"   r   )ignore_indexrI   )r  start_logits
end_logitsr   r  )r@   r  rZ  r  splitr  r   r   rJ   clampr   r   r   r  )r?   rN   rO   r   rP   r!   r   rQ   r  r  r   r  r  r   rS  r  r  r  
total_lossignored_indexr  
start_lossend_lossr   s                           rB   r[   $LayoutLMForQuestionAnswering.forward  s   B &1%<k$++B]B]--))%'/!5#   
 "!*1#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
M:H$x/14J"J/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
rD   )rZ  r  r  )Tr  )r]   r^   r_   r`   r%   rt  r   r   r;   r  r   r   r   r   r   r[   rb   rc   rd   s   @rB   r  r    sP   8  15+/6:593715596:48,0/3&*q
E,,-q
 u''(q
 !!2!23	q

 !!1!12q
 u//0q
 E--.q
   1 12q
 "%"2"23q
   0 01q
 $D>q
 'tnq
 d^q
 
u22	3q
 q
rD   r  )r  r  r  r  rl  rY  );ra   r   typingr   r   r   r;   torch.utils.checkpointr   torch.nnr   r   r	   activationsr   modeling_outputsr   r   r   r   r   r   modeling_utilsr   pytorch_utilsr   r   r   utilsr   r   configuration_layoutlmr   
get_loggerr]   r   r6   r4   Moduler   rf   r   r   r   r   r   r   r	  r0  r:  rA  rO  rY  rl  r  r  r  r  __all__r  rD   rB   <module>r     s     ) )    A A !  . l l , 2 
		H	% LL I IZCBII CN  "# 0		 0h299  RYY SBII SnZ
bii Z
|RYY  bii $ryy 0!")) ! %o % %0 T
+ T
 T
n |
1 |
 |
~ @
(? @
@
F m
%< m
m
` D
#: D
 D
NrD   