
    fTh                     Z   S r SSKrSSKJrJrJr  SSKrSSKrSSKJr  SSK	J
r
JrJr  SSKJr  SSKJrJrJrJrJr  SS	KJr  SS
KJrJrJr  SSKJrJr  SSKJr  \R@                  " \!5      r" " S S\RF                  5      r$ " S S\RF                  5      r% " S S\RF                  5      r& " S S\RF                  5      r' " S S\RF                  5      r( " S S\RF                  5      r) " S S\RF                  5      r* " S S\RF                  5      r+ " S S\RF                  5      r, " S  S!\RF                  5      r-\ " S" S#\5      5       r.\ " S$ S%\.5      5       r/\" S&S'9 " S( S)\.5      5       r0\ " S* S+\.5      5       r1 " S, S-\RF                  5      r2\ " S. S/\.5      5       r3/ S0Qr4g)1zPyTorch LiLT model.    N)OptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputBaseModelOutputWithPoolingQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging   )
LiltConfigc                   F   ^  \ rS rSrU 4S jr    SS jrS rS rSrU =r	$ )LiltTextEmbeddings*   c                 .  > [         TU ]  5         [        R                  " UR                  UR
                  UR                  S9U l        [        R                  " UR                  UR
                  5      U l	        [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        U R#                  S[$        R&                  " UR                  5      R)                  S5      SS9  [+        USS5      U l        UR                  U l        [        R                  " UR                  UR
                  U R.                  S9U l	        g )	Npadding_idxepsposition_ids)r   F)
persistentposition_embedding_typeabsolute)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutregister_buffertorcharangeexpandgetattrr$   r   selfconfig	__class__s     ^/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/lilt/modeling_lilt.pyr'   LiltTextEmbeddings.__init__+   s2   !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2H2H&J\J\%]" f&8&8f>S>STzz&"<"<= 	ELL)G)GHOOPWXej 	 	
 (/v7PR\']$ "..#%<<**F,>,>DL\L\$
     c                 D   UcI  Ub5  U R                  XR                  5      R                  UR                  5      nOU R	                  U5      nUb  UR                  5       nOUR                  5       S S nUc8  [        R                  " U[        R                  U R                  R                  S9nUc  U R                  U5      nU R                  U5      nXF-   nU R                  S:X  a  U R                  U5      nXx-  nU R                  U5      nU R                  U5      nXs4$ )Nr"   dtypedevicer%   )"create_position_ids_from_input_idsr   torE   &create_position_ids_from_inputs_embedssizer7   zeroslongr!   r,   r0   r$   r.   r1   r5   )	r<   	input_idstoken_type_idsr!   inputs_embedsinput_shaper0   
embeddingsr.   s	            r?   forwardLiltTextEmbeddings.forwardB   s    $#FFyRbRbcff$$   $JJ=Y #..*K',,.s3K!"[[EJJtO`O`OgOghN  00;M $ : :> J":
'':5"&":":<"H-J^^J/
\\*-
''rA   c                     UR                  U5      R                  5       n[        R                  " USS9R	                  U5      U-  nUR                  5       U-   $ )z
Args:
Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding
symbols are ignored. This is modified from fairseq's `utils.make_positions`.
    x: torch.Tensor x:
Returns: torch.Tensor
r   dim)neintr7   cumsumtype_asrK   )r<   rL   r   maskincremental_indicess        r?   rF   5LiltTextEmbeddings.create_position_ids_from_input_idsf   sP     ||K(,,.$||Da8@@F$N"'')K77rA   c                    UR                  5       SS nUS   n[        R                  " U R                  S-   X0R                  -   S-   [        R                  UR
                  S9nUR                  S5      R                  U5      $ )z
Args:
We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.:
    inputs_embeds: torch.Tensor
Returns: torch.Tensor
Nr"   r   rC   r   )rI   r7   r8   r   rK   rE   	unsqueezer9   )r<   rN   rO   sequence_lengthr!   s        r?   rH   9LiltTextEmbeddings.create_position_ids_from_inputs_embedss   s~     $((*3B/%a.||q /4D4D"Dq"HPUPZPZcpcwcw
 %%a(//<<rA   )r1   r5   r   r$   r.   r0   r,   )NNNN)
__name__
__module____qualname____firstlineno__r'   rQ   rF   rH   __static_attributes____classcell__r>   s   @r?   r   r   *   s+    
2 "(H8= =rA   r   c                   2   ^  \ rS rSrU 4S jrSS jrSrU =r$ )LiltLayoutEmbeddings   c                   > [         TU ]  5         [        R                  " UR                  UR
                  S-  5      U l        [        R                  " UR                  UR
                  S-  5      U l        [        R                  " UR                  UR
                  S-  5      U l        [        R                  " UR                  UR
                  S-  5      U l	        UR                  U l        [        R                  " UR                  UR
                  UR                  -  U R                  S9U l        [        R                  " UR
                  UR
                  UR                  -  S9U l        [        R"                  " UR
                  UR                  -  UR$                  S9U l        [        R&                  " UR(                  5      U l        g )N   r   )in_featuresout_featuresr   )r&   r'   r   r(   max_2d_position_embeddingsr*   x_position_embeddingsy_position_embeddingsh_position_embeddingsw_position_embeddingsr+   r   r-   channel_shrink_ratiobox_position_embeddingsLinearbox_linear_embeddingsr1   r2   r3   r4   r5   r;   s     r?   r'   LiltLayoutEmbeddings.__init__   s^    &(\\&2S2SU[UgUgklUl%m"%'\\&2S2SU[UgUgklUl%m"%'\\&2S2SU[UgUgklUl%m"%'\\&2S2SU[UgUgklUl%m"!..')||**&"="==(((
$
 &(YY**9K9KvOjOj9j&
" f&8&8F<W<W&W]c]r]rszz&"<"<=rA   c                     U R                  US S 2S S 2S4   5      nU R                  US S 2S S 2S4   5      nU R                  US S 2S S 2S4   5      nU R                  US S 2S S 2S4   5      nU R                  US S 2S S 2S4   US S 2S S 2S4   -
  5      nU R	                  US S 2S S 2S4   US S 2S S 2S4   -
  5      n	[
        R                  " UUUUUU	/SS9n
U R                  U
5      n
U R                  U5      nX-   n
U R                  U
5      n
U R                  U
5      n
U
$ ! [         a  n[        S5      UeS nAff = f)Nr   r      r
   z;The `bbox` coordinate values should be within 0-1000 range.r"   rT   )rp   rq   
IndexErrorrr   rs   r7   catrw   ru   r1   r5   )r<   bboxr!   left_position_embeddingsupper_position_embeddingsright_position_embeddingslower_position_embeddingserr   rs   spatial_position_embeddingsru   s               r?   rQ   LiltLayoutEmbeddings.forward   st   	c'+'A'A$q!Qw-'P$(,(B(B41a=(Q%(,(B(B41a=(Q%(,(B(B41a=(Q% !% : :41a=4PQSTVWPW=;X Y $ : :41a=4PQSTVWPW=;X Y&+ii()))%% 
'
# '+&@&@A\&]#"&">">|"L&A&[#&*nn5P&Q#&*ll3N&O#**3  	cZ[abb	cs   A,D# #
D>-D99D>)	r1   rw   ru   r5   rr   r   rs   rp   rq   )NN)ra   rb   rc   rd   r'   rQ   re   rf   rg   s   @r?   ri   ri      s    >*+ +rA   ri   c                   F   ^  \ rS rSrSU 4S jjrSS jr   SS jrSrU =r$ )	LiltSelfAttention   c                   > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eUR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  UR                  -  U R                  UR                  -  5      U l        [        R                  " UR                  UR                  -  U R                  UR                  -  5      U l        [        R                  " UR                  UR                  -  U R                  UR                  -  5      U l        [        R$                  " UR&                  5      U l        U=(       d    [+        USS5      U l        U R,                  S:X  d  U R,                  S	:X  aG  UR.                  U l        [        R0                  " S
UR.                  -  S-
  U R                  5      U l        UR                  U l        g )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()r$   r%   relative_keyrelative_key_queryrz   r   )r&   r'   r*   num_attention_headshasattr
ValueErrorrW   attention_head_sizeall_head_sizer   rv   querykeyvaluert   layout_query
layout_keylayout_valuer3   attention_probs_dropout_probr5   r:   r$   r-   r(   distance_embedding)r<   r=   r$   r>   s      r?   r'   LiltSelfAttention.__init__   sQ    : ::a?PVXhHiHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
II&"="==t?Q?QU[UpUp?p
 ))&"="==t?Q?QU[UpUp?p
 II&"="==t?Q?QU[UpUp?p
 zz&"E"EF'> (
'-zC
$ ''>9T=Y=Y]q=q+1+I+ID(&(ll1v7U7U3UXY3Y[_[s[s&tD#$*$?$?!rA   c                     UR                  5       S S U R                  U R                  U-  4-   nUR                  " U6 nUR	                  SSSS5      $ )Nr"   r   rz   r   r
   )rI   r   r   viewpermute)r<   xrnew_x_shapes       r?   transpose_for_scores&LiltSelfAttention.transpose_for_scores   sR    ffhsmt'?'?AYAY]^A^&__FFK yyAq!$$rA   c                    U R                  U R                  U5      U R                  S9nU R                  U R                  U5      U R                  S9nU R                  U R	                  U5      U R                  S9nU R                  U5      n	U R                  U R                  U5      5      n
U R                  U R                  U5      5      nU R                  U	5      n[        R                  " XR                  SS5      5      n[        R                  " XR                  SS5      5      nU R                  S:X  d  U R                  S:X  GaD  UR                  5       S   n[        R                  " U[        R                  UR                  S9R!                  SS5      n[        R                  " U[        R                  UR                  S9R!                  SS5      nUU-
  nU R#                  UU R$                  -   S-
  5      nUR'                  UR(                  S9nU R                  S:X  a  [        R*                  " S	UU5      nUU-   nOHU R                  S:X  a8  [        R*                  " S	UU5      n[        R*                  " S
U
U5      nUU-   U-   nU[,        R.                  " U R0                  5      -  nU[,        R.                  " U R0                  U R                  -  5      -  nUU-   nUU-   nUb  X-   n[2        R4                  " SS9" U5      nU R7                  U5      nUb  UU-  n[        R                  " UU5      nUR9                  SSSS5      R;                  5       nUR                  5       S S U R<                  U R                  -  4-   nUR                   " U6 nUb  X-   n[2        R4                  " SS9" U5      nU R7                  U5      nUb  UU-  n[        R                  " UU5      nUR9                  SSSS5      R;                  5       nUR                  5       S S U R<                  4-   nUR                   " U6 nU(       a  UU4U4nU$ UU44nU$ )N)r   r"   r   r   r   rC   )rD   zbhld,lrd->bhlrzbhrd,lrd->bhlrrT   r   rz   r
   )r   r   rt   r   r   r   r   r   r7   matmul	transposer$   rI   r8   rK   rE   r   r   r-   rG   rD   einsummathsqrtr   r   Softmaxr5   r   
contiguousr   )r<   hidden_stateslayout_inputsattention_mask	head_maskoutput_attentionslayout_value_layerlayout_key_layerlayout_query_layermixed_query_layer	key_layervalue_layerquery_layerattention_scoreslayout_attention_scores
seq_lengthposition_ids_lposition_ids_rdistancepositional_embeddingrelative_position_scoresrelative_position_scores_queryrelative_position_scores_keytmp_attention_scorestmp_layout_attention_scoreslayout_attention_probslayout_context_layernew_context_layer_shapeattention_probscontext_layeroutputss                                  r?   rQ   LiltSelfAttention.forward   sR    "66t7H7H7W[_[t[t6u44T__]5SW[WpWp4q!66t7H7H7W[_[t[t6u JJ}5--dhh}.EF	//

=0IJ//0AB <<5H5HR5PQ"',,/AC]C]^`bdCe"f''>9T=Y=Y]q=q&++-a0J"\\*EJJ}OcOcdiijlnopN"\\*EJJ}OcOcdiijkmopN%6H#'#:#:8dFbFb;bef;f#g #7#:#:ARAR#:#S ++~=+0<<8H+Wk+l(#36N#N --1EE16>NP[]q1r./4||<LiYm/n,#36T#TWs#s /$))D<T<T2UU&=		$$(A(AAA
 '
# 02MM"=@T"T%&=&N# "$!34K!L "&.D!E  %;i%G"$||,BDVW3;;Aq!QGRRT"6";";"=cr"BdFXFX\`\u\uFuEw"w388:QR%/@ **,-=> ,,7  -	9O_kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S%**,CD ! 12OD 	  !"679 	 rA   )r   r   rt   r   r5   r   r   r   r   r-   r   r$   r   r   N)r   NNF)	ra   rb   rc   rd   r'   r   rQ   re   rf   rg   s   @r?   r   r      s&    "@H% \ \rA   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )LiltSelfOutputiE  c                 (  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  5      U l
        g Nr   )r&   r'   r   rv   r*   denser1   r2   r3   r4   r5   r;   s     r?   r'   LiltSelfOutput.__init__F  s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=rA   r   input_tensorreturnc                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   r   r5   r1   r<   r   r   s      r?   rQ   LiltSelfOutput.forwardL  5    

=1]3}'CDrA   r1   r   r5   
ra   rb   rc   rd   r'   r7   TensorrQ   re   rf   rg   s   @r?   r   r   E  6    >U\\  RWR^R^  rA   r   c                      ^  \ rS rSrSU 4S jjrS r   SS\R                  S\R                  S\\R                     S\\R                     S\\
   S	\\R                     4S
 jjrSrU =r$ )LiltAttentioniS  c                    > [         TU ]  5         [        XS9U l        [	        U5      U l        [        5       U l        UR                  nUR                  UR                  -  Ul        [	        U5      U l
        X1l        g )N)r$   )r&   r'   r   r<   r   outputsetpruned_headsr*   rt   layout_output)r<   r=   r$   ori_hidden_sizer>   s       r?   r'   LiltAttention.__init__T  sg    %f^	$V,E ,,#//63N3NN+F3,rA   c                 6   [        U5      S:X  a  g [        XR                  R                  U R                  R                  U R
                  5      u  p[        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l	        [        U R                  R                  USS9U R                  l        U R                  R                  [        U5      -
  U R                  l        U R                  R                  U R                  R                  -  U R                  l        U R
                  R                  U5      U l        g )Nr   r   rT   )lenr   r<   r   r   r   r   r   r   r   r   r   r   union)r<   headsindexs      r?   prune_headsLiltAttention.prune_heads`  s   u:?79900$))2O2OQUQbQb

 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:rA   r   r   r   r   r   r   c                     U R                  UUUUU5      nU R                  US   S   U5      nU R                  US   S   U5      nXx44USS  -   n	U	$ )Nr   r   )r<   r   r   )
r<   r   r   r   r   r   self_outputsattention_outputlayout_attention_outputr   s
             r?   rQ   LiltAttention.forwardr  sw     yy
  ;;|Aq'9=I"&"4"4\!_Q5G"W$>@<PQPRCSSrA   )r   r   r   r<   r   r   )ra   rb   rc   rd   r'   r   r7   r   r   FloatTensorboolr   rQ   re   rf   rg   s   @r?   r   r   S  s    	-;, 7;15,1|| || !!2!23	
 E--. $D> 
u||	 rA   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )LiltIntermediatei  c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g r   )r&   r'   r   rv   r*   intermediate_sizer   
isinstance
hidden_actstrr   intermediate_act_fnr;   s     r?   r'   LiltIntermediate.__init__  s`    YYv1163K3KL
f''--'-f.?.?'@D$'-'8'8D$rA   r   r   c                 J    U R                  U5      nU R                  U5      nU$ r   r   r   )r<   r   s     r?   rQ   LiltIntermediate.forward  s&    

=100?rA   r   r   rg   s   @r?   r   r     s(    9U\\ ell  rA   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )
LiltOutputi  c                 (  > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g r   )r&   r'   r   rv   r   r*   r   r1   r2   r3   r4   r5   r;   s     r?   r'   LiltOutput.__init__  s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=rA   r   r   r   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   r   r   s      r?   rQ   LiltOutput.forward  r   rA   r   r   rg   s   @r?   r  r    r   rA   r  c                      ^  \ rS rSrU 4S jr   SS\R                  S\R                  S\\R                     S\\R                     S\\	   S\
\R                     4S	 jjrS
 rS rSrU =r$ )	LiltLayeri  c                   > [         TU ]  5         UR                  U l        SU l        [	        U5      U l        [        U5      U l        [        U5      U l	        UR                  nUR                  nUR                  UR                  -  Ul
        UR                  UR                  -  Ul        [        U5      U l        [        U5      U l        X!l
        X1l        g )Nr   )r&   r'   chunk_size_feed_forwardseq_len_dimr   	attentionr   intermediater  r   r*   r   rt   layout_intermediater   )r<   r=   r   ori_intermediate_sizer>   s       r?   r'   LiltLayer.__init__  s    '-'E'E$&v.,V4 ( ,, & 8 8#//63N3NN#)#;#;v?Z?Z#Z #3F#; '/,#8 rA   r   r   r   r   r   r   c                    U R                  UUUUUS9nUS   S   nUS   S   nUSS  n	[        U R                  U R                  U R                  U5      n
[        U R
                  U R                  U R                  U5      nX44U	-   n	U	$ )N)r   r   r   )r  r   feed_forward_chunkr
  r  layout_feed_forward_chunk)r<   r   r   r   r   r   self_attention_outputsr   r   r   layer_outputlayout_layer_outputs               r?   rQ   LiltLayer.forward  s     "&/ "0 "
 2!4Q7"8";A">(,0##T%A%A4CSCSUe
 8**D,H,H$JZJZ\s
 !687BrA   c                 J    U R                  U5      nU R                  X!5      nU$ r   )r  r   r<   r   intermediate_outputr  s       r?   r  LiltLayer.feed_forward_chunk  s)    "//0@A{{#6IrA   c                 J    U R                  U5      nU R                  X!5      nU$ r   )r  r   r  s       r?   r  #LiltLayer.layout_feed_forward_chunk  s+    "667GH))*=PrA   )r  r
  r  r  r   r   r  r   )ra   rb   rc   rd   r'   r7   r   r   r   r   r   rQ   r  r  re   rf   rg   s   @r?   r  r    s    9* 7;15,1|| || !!2!23	
 E--. $D> 
u||	>
 rA   r  c                      ^  \ rS rSrU 4S jr     SS\R                  S\R                  S\\R                     S\\R                     S\\	   S\\	   S	\\	   S
\
\\R                     \4   4S jjrSrU =r$ )LiltEncoderi  c                    > [         TU ]  5         Xl        [        R                  " [        UR                  5       Vs/ s H  n[        U5      PM     sn5      U l        SU l	        g s  snf )NF)
r&   r'   r=   r   
ModuleListrangenum_hidden_layersr  layergradient_checkpointing)r<   r=   _r>   s      r?   r'   LiltEncoder.__init__  sR    ]]uVE]E]?^#_?^!If$5?^#_`
&+# $`s   A&r   r   r   r   r   output_hidden_statesreturn_dictr   c           	         U(       a  SOS nU(       a  SOS n	[        U R                  5       H  u  pU(       a  X4-   nUb  XJ   OS nU R                  (       a2  U R                  (       a!  U R	                  UR
                  UUUUU5      nOU" UUUUU5      nUS   S   nUS   S   nU(       d  M  XS   4-   n	M     U(       a  X4-   nU(       d  [        S UUU	4 5       5      $ [        UUU	S9$ )N r   r   c              3   0   #    U  H  nUc  M  Uv   M     g 7fr   r+  ).0vs     r?   	<genexpr>&LiltEncoder.forward.<locals>.<genexpr>  s"      A
  s   	)last_hidden_stater   
attentions)	enumerater$  r%  training_gradient_checkpointing_func__call__tupler   )r<   r   r   r   r   r   r(  r)  all_hidden_statesall_self_attentionsilayer_modulelayer_head_masklayer_outputss                 r?   rQ   LiltEncoder.forward  s&    #7BD$5b4(4OA#$58H$H!.7.CilO**t}} $ A A ))!!"#%! !-!!"#%! *!,Q/M)!,Q/M  &91=M<O&O#9  5<   14D D  "%'   ++*
 	
rA   )r=   r%  r$  )NNFFT)ra   rb   rc   rd   r'   r7   r   r   r   r   r   r   r   rQ   re   rf   rg   s   @r?   r  r    s    , 7;15,1/4&*<
||<
 ||<
 !!2!23	<

 E--.<
 $D><
 'tn<
 d^<
 
uU\\"O3	4<
 <
rA   r  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )
LiltPooleri*  c                    > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " 5       U l        g r   )r&   r'   r   rv   r*   r   Tanh
activationr;   s     r?   r'   LiltPooler.__init__+  s9    YYv1163E3EF
'')rA   r   r   c                 \    US S 2S4   nU R                  U5      nU R                  U5      nU$ Nr   )r   rC  )r<   r   first_token_tensorpooled_outputs       r?   rQ   LiltPooler.forward0  s6     +1a40

#566rA   )rC  r   r   rg   s   @r?   r@  r@  *  s(    $
U\\ ell  rA   r@  c                   *    \ rS rSr\rSrSr/ rS r	Sr
g)LiltPreTrainedModeli9  liltTc                    [        U[        R                  5      (       ak  UR                  R                  R                  SU R                  R                  S9  UR                  b%  UR                  R                  R                  5         gg[        U[        R                  5      (       ax  UR                  R                  R                  SU R                  R                  S9  UR                  b2  UR                  R                  UR                     R                  5         gg[        U[        R                  5      (       aJ  UR                  R                  R                  5         UR                  R                  R                  S5        gg)zInitialize the weightsg        )meanstdNg      ?)r   r   rv   weightdatanormal_r=   initializer_rangebiaszero_r(   r   r1   fill_)r<   modules     r?   _init_weights!LiltPreTrainedModel._init_weights@  s   fbii(( MM&&CT[[5R5R&S{{&  &&( '--MM&&CT[[5R5R&S!!-""6#5#56<<> .--KK""$MM$$S) .rA   r+  N)ra   rb   rc   rd   r   config_classbase_model_prefixsupports_gradient_checkpointing_no_split_modulesrX  re   r+  rA   r?   rK  rK  9  s    L&*#*rA   rK  c                     ^  \ rS rSrSU 4S jjrS rS rS r\          SS\	\
R                     S\	\
R                     S\	\
R                     S	\	\
R                     S
\	\
R                     S\	\
R                     S\	\
R                     S\	\   S\	\   S\	\   S\\\
R                     \4   4S jj5       rSrU =r$ )	LiltModeliQ  c                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        [        U5      U l        U(       a  [        U5      OSU l
        U R                  5         g)z^
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
N)r&   r'   r=   r   rP   ri   layout_embeddingsr  encoderr@  pooler	post_init)r<   r=   add_pooling_layerr>   s      r?   r'   LiltModel.__init__S  sX    
 	 ,V4!5f!="6*,=j(4 	rA   c                 .    U R                   R                  $ r   rP   r,   )r<   s    r?   get_input_embeddingsLiltModel.get_input_embeddingsd  s    ...rA   c                 $    XR                   l        g r   rh  )r<   r   s     r?   set_input_embeddingsLiltModel.set_input_embeddingsg  s    */'rA   c                     UR                  5        H7  u  p#U R                  R                  U   R                  R	                  U5        M9     g)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
N)itemsrb  r$  r  r   )r<   heads_to_pruner$  r   s       r?   _prune_headsLiltModel._prune_headsj  s<    
 +002LELLu%//;;EB 3rA   rL   r}   r   rM   r!   r   rN   r   r(  r)  r   c           
         Ub  UOU R                   R                  nU	b  U	OU R                   R                  n	U
b  U
OU R                   R                  n
Ub  Ub  [	        S5      eUb"  U R                  X5        UR                  5       nO"Ub  UR                  5       SS nO[	        S5      eUu  pUb  UR                  OUR                  nUc'  [        R                  " US-   [        R                  US9nUc  [        R                  " X4US9nUcr  [        U R                  S5      (       a3  U R                  R                  SS2SU24   nUR                  X5      nUnO$[        R                  " U[        R                  US9nU R!                  X;5      nU R#                  X`R                   R$                  5      nU R                  UUUUS	9u  nnU R'                  X%S
9nU R)                  UUUUUU	U
S9nUS   nU R*                  b  U R+                  U5      OSnU
(       d
  UU4USS -   $ [-        UUUR.                  UR0                  S9$ )aE  
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
    Bounding boxes of each input sequence tokens. Selected in the range `[0,
    config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
    format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
    y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.

Examples:

```python
>>> from transformers import AutoTokenizer, AutoModel
>>> from datasets import load_dataset

>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
>>> model = AutoModel.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")

>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
>>> example = dataset[0]
>>> words = example["tokens"]
>>> boxes = example["bboxes"]

>>> encoding = tokenizer(words, boxes=boxes, return_tensors="pt")

>>> outputs = model(**encoding)
>>> last_hidden_states = outputs.last_hidden_state
```NzDYou cannot specify both input_ids and inputs_embeds at the same timer"   z5You have to specify either input_ids or inputs_embeds)   rC   )rE   rM   )rL   r!   rM   rN   )r}   r!   )r   r   r   r(  r)  r   r   )r1  pooler_outputr   r2  )r=   r   r(  use_return_dictr   %warn_if_padding_and_no_attention_maskrI   rE   r7   rJ   rK   onesr   rP   rM   r9   get_extended_attention_maskget_head_maskr#  ra  rb  rc  r   r   r2  )r<   rL   r}   r   rM   r!   r   rN   r   r(  r)  rO   
batch_sizer   rE   buffered_token_type_ids buffered_token_type_ids_expandedextended_attention_maskembedding_outputlayout_embedding_outputencoder_outputssequence_outputrH  s                          r?   rQ   LiltModel.forwardr  se   P 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66yQ#..*K&',,.s3KTUU!,
%.%:!!@T@T<;;{T1FSD!"ZZ*)A6RN!t(899*.//*H*HKZK*X'3J3Q3QR\3i0!A!&[

SY!Z 150P0PQ_0m &&y++2O2OP	)-%)'	 *9 *
&, #'"8"8d"8"^,,#2/!5# ' 
 *!,8<8OO4UY#]3oab6III)-')77&11	
 	
rA   )r=   rP   rb  ra  rc  )T)
NNNNNNNNNN)ra   rb   rc   rd   r'   ri  rl  rq  r   r   r7   r   r   r   r   r   rQ   re   rf   rg   s   @r?   r_  r_  Q  s$   "/0C  -1'+1515/3,004,0/3&*p
ELL)p
 u||$p
 !.	p

 !.p
 u||,p
 ELL)p
  -p
 $D>p
 'tnp
 d^p
 
uU\\"$>>	?p
 p
rA   r_  z
    LiLT Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
    output) e.g. for GLUE tasks.
    )custom_introc                     ^  \ rS rSrU 4S jr\           SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\   S\\   S\\   S\\\R                     \4   4S jj5       rSrU =r$ )LiltForSequenceClassificationi  c                    > [         TU ]  U5        UR                  U l        Xl        [	        USS9U l        [        U5      U l        U R                  5         g NF)re  )	r&   r'   
num_labelsr=   r_  rL  LiltClassificationHead
classifierrd  r;   s     r?   r'   &LiltForSequenceClassification.__init__  sH      ++f>	08 	rA   rL   r}   r   rM   r!   r   rN   labelsr   r(  r)  r   c                 h   Ub  UOU R                   R                  nU R                  UUUUUUUU	U
US9
nUS   nU R                  U5      nSnUGb  UR	                  UR
                  5      nU R                   R                  c  U R                  S:X  a  SU R                   l        OoU R                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                   l        OSU R                   l        U R                   R                  S:X  aI  [        5       nU R                  S:X  a&  U" UR                  5       UR                  5       5      nOU" X5      nOU R                   R                  S:X  a=  [        5       nU" UR                  SU R                  5      UR                  S5      5      nO,U R                   R                  S:X  a  [!        5       nU" X5      nU(       d  U4US	S -   nUb  U4U-   $ U$ [#        UUUR$                  UR&                  S
9$ )a$  
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
    Bounding boxes of each input sequence tokens. Selected in the range `[0,
    config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
    format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
    y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

Examples:

```python
>>> from transformers import AutoTokenizer, AutoModelForSequenceClassification
>>> from datasets import load_dataset

>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
>>> model = AutoModelForSequenceClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")

>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
>>> example = dataset[0]
>>> words = example["tokens"]
>>> boxes = example["bboxes"]

>>> encoding = tokenizer(words, boxes=boxes, return_tensors="pt")

>>> outputs = model(**encoding)
>>> predicted_class_idx = outputs.logits.argmax(-1).item()
>>> predicted_class = model.config.id2label[predicted_class_idx]
```N	r}   r   rM   r!   r   rN   r   r(  r)  r   r   
regressionsingle_label_classificationmulti_label_classificationr"   rz   losslogitsr   r2  )r=   rv  rL  r  rG   rE   problem_typer  rD   r7   rK   rW   r	   squeezer   r   r   r   r   r2  r<   rL   r}   r   rM   r!   r   rN   r  r   r(  r)  r   r  r  r  loss_fctr   s                     r?   rQ   %LiltForSequenceClassification.forward  s   \ &1%<k$++B]B]))))%'/!5#  
 "!*1YYv}}-F{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'!//))	
 	
rA   )r  r=   rL  r  NNNNNNNNNNN)ra   rb   rc   rd   r'   r   r   r7   
LongTensorr   r   r   r   r   r   rQ   re   rf   rg   s   @r?   r  r    s9   	  15'+6:59371559-1,0/3&*`
E,,-`
 u||$`
 !!2!23	`

 !!1!12`
 u//0`
 E--.`
   1 12`
 ))*`
 $D>`
 'tn`
 d^`
 
uU\\"$<<	=`
 `
rA   r  c                     ^  \ rS rSrU 4S jr\           SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\
   S\\
   S\\
   S\\\R                     \4   4S jj5       rSrU =r$ )LiltForTokenClassificationi]  c                 d  > [         TU ]  U5        UR                  U l        [        USS9U l        UR
                  b  UR
                  OUR                  n[        R                  " U5      U l	        [        R                  " UR                  UR                  5      U l        U R                  5         g r  )r&   r'   r  r_  rL  classifier_dropoutr4   r   r3   r5   rv   r*   r  rd  r<   r=   r  r>   s      r?   r'   #LiltForTokenClassification.__init__`  s      ++f>	)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	rA   rL   r}   r   rM   r!   r   rN   r  r   r(  r)  r   c                    Ub  UOU R                   R                  nU R                  UUUUUUUU	U
US9
nUS   nU R                  U5      nU R	                  U5      nSnUbW  UR                  UR                  5      n[        5       nU" UR                  SU R                  5      UR                  S5      5      nU(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  S9$ )a8  
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
    Bounding boxes of each input sequence tokens. Selected in the range `[0,
    config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
    format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
    y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

Examples:

```python
>>> from transformers import AutoTokenizer, AutoModelForTokenClassification
>>> from datasets import load_dataset

>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
>>> model = AutoModelForTokenClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")

>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
>>> example = dataset[0]
>>> words = example["tokens"]
>>> boxes = example["bboxes"]

>>> encoding = tokenizer(words, boxes=boxes, return_tensors="pt")

>>> outputs = model(**encoding)
>>> predicted_class_indices = outputs.logits.argmax(-1)
```Nr  r   r"   rz   r  )r=   rv  rL  r5   r  rG   rE   r   r   r  r   r   r2  r  s                     r?   rQ   "LiltForTokenClassification.forwardn  s   V &1%<k$++B]B]))))%'/!5#  
 "!*,,71YYv}}-F')HFKKDOO<fkk"oNDY,F)-)9TGf$EvE$!//))	
 	
rA   )r  r5   rL  r  r  )ra   rb   rc   rd   r'   r   r   r7   r  r   r   r   r   r   r   rQ   re   rf   rg   s   @r?   r  r  ]  s;     15+/6:59371559-1,0/3&*N
E,,-N
 u''(N
 !!2!23	N

 !!1!12N
 u//0N
 E--.N
   1 12N
 ))*N
 $D>N
 'tnN
 d^N
 
uU\\"$99	:N
 N
rA   r  c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )r  i  z-Head for sentence-level classification tasks.c                 b  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        UR                  b  UR                  OUR                  n[        R                  " U5      U l	        [        R                  " UR                  UR                  5      U l        g r   )r&   r'   r   rv   r*   r   r  r4   r3   r5   r  out_projr  s      r?   r'   LiltClassificationHead.__init__  s    YYv1163E3EF
)/)B)B)NF%%TZTnTn 	 zz"45		&"4"4f6G6GHrA   c                     US S 2SS S 24   nU R                  U5      nU R                  U5      n[        R                  " U5      nU R                  U5      nU R	                  U5      nU$ rF  )r5   r   r7   tanhr  )r<   featureskwargsr   s       r?   rQ   LiltClassificationHead.forward  sY    Q1WLLOJJqMJJqMLLOMM!rA   )r   r5   r  )	ra   rb   rc   rd   __doc__r'   rQ   re   rf   rg   s   @r?   r  r    s    7I rA   r  c                     ^  \ rS rSrU 4S jr\            SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\R                     S\\
   S\\
   S\\
   S\\\R                     \4   4S jj5       rSrU =r$ )LiltForQuestionAnsweringi  c                    > [         TU ]  U5        UR                  U l        [        USS9U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r  )
r&   r'   r  r_  rL  r   rv   r*   
qa_outputsrd  r;   s     r?   r'   !LiltForQuestionAnswering.__init__  sU      ++f>	))F$6$68I8IJ 	rA   rL   r}   r   rM   r!   r   rN   start_positionsend_positionsr   r(  r)  r   c                 (   Ub  UOU R                   R                  nU R                  UUUUUUUU
UUS9
nUS   nU R                  U5      nUR	                  SSS9u  nnUR                  S5      R                  5       nUR                  S5      R                  5       nSnUb  U	b  [        UR                  5       5      S:  a  UR                  S5      n[        U	R                  5       5      S:  a  U	R                  S5      n	UR                  S5      nUR                  SU5      nU	R                  SU5      n	[        US9nU" UU5      nU" UU	5      nUU-   S-  nU(       d  UU4USS -   nUb  U4U-   $ U$ [        UUUUR                  UR                  S	9$ )
aB  
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
    Bounding boxes of each input sequence tokens. Selected in the range `[0,
    config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
    format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
    y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.

Examples:

```python
>>> from transformers import AutoTokenizer, AutoModelForQuestionAnswering
>>> from datasets import load_dataset

>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
>>> model = AutoModelForQuestionAnswering.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")

>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
>>> example = dataset[0]
>>> words = example["tokens"]
>>> boxes = example["bboxes"]

>>> encoding = tokenizer(words, boxes=boxes, return_tensors="pt")

>>> outputs = model(**encoding)

>>> answer_start_index = outputs.start_logits.argmax()
>>> answer_end_index = outputs.end_logits.argmax()

>>> predict_answer_tokens = encoding.input_ids[0, answer_start_index : answer_end_index + 1]
>>> predicted_answer = tokenizer.decode(predict_answer_tokens)
```Nr  r   r   r"   rT   )ignore_indexrz   )r  start_logits
end_logitsr   r2  )r=   rv  rL  r  splitr  r   r   rI   clampr   r   r   r2  )r<   rL   r}   r   rM   r!   r   rN   r  r  r   r(  r)  r   r  r  r  r  
total_lossignored_indexr  
start_lossend_lossr   s                           r?   rQ    LiltForQuestionAnswering.forward  s   ^ &1%<k$++B]B]))))%'/!5#  
 "!*1#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
M:H$x/14J"J/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
rA   )rL  r  r  )NNNNNNNNNNNN)ra   rb   rc   rd   r'   r   r   r7   r  r   r   r   r   r   r   rQ   re   rf   rg   s   @r?   r  r    sT     15+/6:593715596:48,0/3&*_
E,,-_
 u''(_
 !!2!23	_

 !!1!12_
 u//0_
 E--._
   1 12_
 "%"2"23_
   0 01_
 $D>_
 'tn_
 d^_
 
uU\\"$@@	A_
 _
rA   r  )r  r  r  r_  rK  )5r  r   typingr   r   r   r7   torch.utils.checkpointr   torch.nnr   r   r	   activationsr   modeling_outputsr   r   r   r   r   modeling_utilsr   pytorch_utilsr   r   r   utilsr   r   configuration_liltr   
get_loggerra   loggerModuler   ri   r   r   r   r   r  r  r  r@  rK  r_  r  r  r  r  __all__r+  rA   r?   <module>r     s     ) )    A A !  . l l , * 
		H	%V= V=r5+299 5+pF		 FTRYY 1BII 1jryy   9		 9xD
")) D
P  */ * *. Q
# Q
 Q
h n
$7 n
n
b _
!4 _
 _
FRYY , l
2 l
 l
^rA   