
    fTh                     V   S r SSKrSSKJr  SSKJrJrJrJr  SSK	r
SSKrSSKJr  SSKJrJrJr  SSKJr  SS	KJrJrJrJrJrJr  SS
KJr  SSKJrJrJr  SSK J!r!  \RD                  " \#5      r$Sr%S r& " S S\RN                  5      r( " S S\RN                  5      r)S\RT                  S\+S\+S\RT                  4S jr, " S S\RN                  5      r- " S S\RN                  5      r. " S S\RN                  5      r/ " S S \RN                  5      r0 SFS!\RT                  S"\+S#\+S$\1S%\1S\RT                  4S& jjr2 " S' S(\RN                  5      r3 " S) S*\RN                  5      r4\ " S+ S,\5      5       r5 " S- S.\RN                  5      r6\ " S/ S0\5      5       r7\" S1S29 " S3 S4\55      5       r8\ " S5 S6\55      5       r9\" S7S29 " S8 S9\55      5       r:\ " S: S;\55      5       r;\" S<S29 " S= S>\55      5       r<\ " S? S@\55      5       r=\ " SA SB\55      5       r>\ " SC SD\55      5       r?/ SEQr@g)Gz!PyTorch Funnel Transformer model.    N)	dataclass)ListOptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputMaskedLMOutputMultipleChoiceModelOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)ModelOutputauto_docstringlogging   )FunnelConfigg    .Ac                     SSK nSSKnSSKn[        R                  R                  U5      n[        R                  SU 35        UR                  R                  U5      n/ n/ n	U H]  u  p[        R                  SU
 SU 35        UR                  R                  Xj5      nUR                  U
5        U	R                  U5        M_     SSS	S
SSSSSSSSSSS.n[        X5       GH"  u  pU
R                  S5      n
[!        S U
 5       5      (       a)  [        R                  SSR#                  U
5       35        MW  U
S   S:X  a  Mb  U nSnU
SS  GH.  n[%        U[&        5      (       d  UR)                  SU5      (       a  [+        UR-                  SU5      R/                  5       S   5      nUUR0                  :  aU  SnUUR2                  U   :  a,  UUR2                  U   -  nUS-  nUUR2                  U   :  a  M,  UR4                  U   U   nM  UUR0                  -  nUR6                  U   nM  US:X  a#  [%        U[8        5      (       a  UR:                  n  O)UU;   a  [=        XU   5      nGM!   [=        UU5      nGM1     U(       a  GM  [E        URB                  5      [E        URB                  5      :w  a  URG                  URB                  5      nWS:X  a  URH                  " U5      n[J        RL                  " U5      Ul'        GM%     U $ ! [         a    [        R                  S5        e f = f! [>         a/    [A        SSR#                  U
5       3URB                  5        Sn   M  f = f)z'Load tf checkpoints in a pytorch model.r   NzLoading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see https://www.tensorflow.org/install/ for installation instructions.z&Converting TensorFlow checkpoint from zLoading TF weight z with shape k_headq_headv_head	post_projlinear_1linear_2	attentionffnweightbiasword_embeddings
embeddings)kqvolayer_1layer_2rel_attnffkernelgammabetalookup_tableword_embeddinginput/c              3   ,   #    U  H
  nUS ;   v   M     g7f))adam_vadam_mAdamWeightDecayOptimizerAdamWeightDecayOptimizer_1global_stepN ).0ns     b/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/funnel/modeling_funnel.py	<genexpr>,load_tf_weights_in_funnel.<locals>.<genexpr>\   s      
 nns   z	Skipping 	generatorFr   z	layer_\d+zlayer_(\d+)rTr/   )(renumpy
tensorflowImportErrorloggererrorospathabspathinfotrainlist_variablesload_variableappendzipsplitanyjoin
isinstanceFunnelPositionwiseFFN	fullmatchintsearchgroupsnum_hidden_layersblock_sizesblockslayersFunnelRelMultiheadAttentionr_kernelgetattrAttributeErrorprintshapelenreshape	transposetorch
from_numpydata)modelconfigtf_checkpoint_pathrD   nptftf_path	init_varsnamesarraysnamere   array
_layer_mappointerskippedm_namelayer_index	block_idxs                      r?   load_tf_weights_in_funnelr}   .   s>   
 ggoo01G
KK8	BC''0IEF (l5'BC&&w5Te	 !  +J" 5)zz#  

 
 
 KK)CHHTN#3457k!12hFg'<==",,|]cBdBd!"))NF"C"J"J"LQ"OP!9!99 !I%););I)FF#v'9'9)'DD!Q	 &););I)FF &nnY7DG6#;#;;K%nn[9G3:g7R#S#S!**:%!'f+=>%gv6G' 0 w7==!S%55gmm4!U+ ++E2GLW *Z La  Q	
 	J & Ichhtn%56D"Gs   L 0L0!L-04M)(M)c                      ^  \ rS rSrS\SS4U 4S jjr S
S\\R                     S\\R                     S\R                  4S jjr	S	r
U =r$ )FunnelEmbeddings   rm   returnNc                 :  > [         TU ]  5         [        R                  " UR                  UR
                  UR                  S9U l        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  5      U l        g )N)padding_idxeps)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idr%   	LayerNormd_modellayer_norm_eps
layer_normDropouthidden_dropoutdropoutselfrm   	__class__s     r?   r   FunnelEmbeddings.__init__   sh    !||F,=,=v?Q?Q_e_r_rs,,v~~6;P;PQzz&"7"78    	input_idsinputs_embedsc                 r    Uc  U R                  U5      nU R                  U5      nU R                  U5      nU$ N)r%   r   r   )r   r   r   r&   s       r?   forwardFunnelEmbeddings.forward   s<       00;M__]3
\\*-
r   )r   r   r%   NN)__name__
__module____qualname____firstlineno__r   r   r   ri   Tensorr   __static_attributes____classcell__r   s   @r?   r   r      sX    9| 9 9 ae!%,,/GOPUP\P\G]	 r   r   c                     ^  \ rS rSr% SrSr\\S'   S\SS4U 4S jjr	  S"S	\
R                  S
\\
R                     S\\
R                     S\\
R                     4S jjrS\
R                  S\
R                  4S jrS\S\
R                   S\
R"                  S\\\
R                     \\\
R                        4   4S jrS\
R                  S\4S jrS#S\
R                  S\S\S\
R                  4S jjrS\\
R                  \\
R                     \\
R                     4   S\\\\   \\   4   S\
R                  4S jr S$S\\
R                  \\
R                     \\
R                     4   S\S\S\
R                  4S jjrS\\
R                     S\\
R                  \\
R                     4   4S jrS\\
R                     S\\
R                     4S  jrS!rU =r$ )%FunnelAttentionStructure   z6
Contains helpers for `FunnelRelMultiheadAttention `.
   cls_token_type_idrm   r   Nc                    > [         TU ]  5         Xl        [        R                  " UR
                  5      U l        [        R                  " UR
                  5      U l        S U l        g r   )	r   r   rm   r   r   r   sin_dropoutcos_dropoutpooling_multr   s     r?   r   !FunnelAttentionStructure.__init__   sK    ::f&;&;<::f&;&;< !r   r   attention_masktoken_type_idsc                 h   SU l         UR                  S5      =U l        nU R                  XAR                  UR
                  5      nUb  U R                  U5      OSnU R                  R                  (       a7  [        R                  R                  UR                  US-
  US-
  /5      S5      OSnXVX'4$ )zCReturns the attention inputs associated to the inputs of the model.r   N)r   r   r   r   )r   sizeseq_lenget_position_embedsdtypedevicetoken_type_ids_to_matrm   separate_clsr   
functionalpadnew_ones)r   r   r   r   r   position_embedstoken_type_matcls_masks           r?   init_attention_inputs.FunnelAttentionStructure.init_attention_inputs   s     !.!3!3A!66w227<O<OQ^QeQefGUGa33NCgk {{'' MMm44gk7Q;5OPR^_ 	
  JJr   c                 ~    USS2SS2S4   USS2S4   :H  nXR                   :H  nUSS2SS2S4   USS2S4   -  nXB-  $ )z-Convert `token_type_ids` to `token_type_mat`.N)r   )r   r   r   cls_idscls_mats        r?   r   .FunnelAttentionStructure.token_type_ids_to_mat   sU    '1d
3~ag7NN $:$::!Q*%4(88''r   r   r   r   c                 R   U R                   R                  nU R                   R                  S:X  Ga6  [        R                  " SUS[        R
                  US9R                  U5      n[        R                  " SUS-  S[        R
                  US9R                  U5      nSSXdS-  -  -  -  nUSS2S4   US   -  n[        R                  " U5      n	U R                  U	5      n
[        R                  " U5      nU R                  U5      n[        R                  " X/S	S
9n[        R                  " X/S	S
9n[        R                  " X/S	S
9n[        R                  " U	* U/S	S
9nXUU4$ [        R                  " SUS-  S[        R
                  US9R                  U5      nSSXdS-  -  -  -  n[        R                  " U* S-  US-  S[        R
                  US9R                  U5      nUS-  nUSS2S4   US   -  nU R                  [        R                  " U5      5      n	U R                  [        R                  " U5      5      n[        R                  " X/S	S
9n[        R                  " SU[        R
                  US9R                  U5      nUn/ n[        SU R                   R                  5       H  nUS:X  a  SnOqU R                  UU5      nSUS-
  -  nU R                  UUUSS9nUSS2S4   U-   nUR!                  UR#                  S5      U5      n[        R$                  " USU5      nUnSU-  nU R                  UU5      nUSS2S4   U-   nUR!                  UR#                  S5      U5      n[        R$                  " USU5      nUR'                  UU/5        M     U$ )a  
Create and cache inputs related to relative position encoding. Those are very different depending on whether we
are using the factorized or the relative shift attention:

For the factorized attention, it returns the matrices (phi, pi, psi, omega) used in the paper, appendix A.2.2,
final formula.

For the relative shift attention, it returns all possible vectors R used in the paper, appendix A.2.1, final
formula.

Paper link: https://arxiv.org/abs/2006.03236

factorizedr         ?r   r   r   r   i'  Ndim)shift)rm   r   attention_typeri   arangeint64tosinr   cosr   catrange
num_blocksstride_pool_posrelative_posexpandr   gatherrQ   )r   r   r   r   r   pos_seqfreq_seqinv_freqsinusoid	sin_embedsin_embed_d	cos_embedcos_embed_dphipsipiomega
rel_pos_idzero_offset	pos_embedpos
pooled_posposition_embeds_listblock_indexposition_embeds_poolingstriderel_posposition_embeds_no_poolings                               r?   r   ,FunnelAttentionStructure.get_position_embeds   sh    ++%%;;%%5 ll1gs%++fUXXY^_G||Aw!|STZ[^^_deHEhQ,&?@AHq$w'(4.8H		(+I**95K		(+I**95K))[6B?C))Y2;CK52>BII	z952>ES%(( ||Aw!|STZ[^^_deHEhQ,&?@AHwhlGaKEKK`fgjjkpqJ!A+K!!T'*Xd^;H((8)<=I((8)<=I		9"8bAI,,q'VLOOPUVCJ#% $Q(>(>? !#.2+!%!5!5c;!GJ ;?3F"//VZq/QG%ag.<G%nnW\\!_gFG.3ll9a.Q+ !K++C8!!T'*[8!..a'B-2\\)Q-P*$++-GI`,ab9  @: ('r   pos_idr   c                     U R                   R                  (       a\  UR                  SU-  * S-   /5      nU R                   R                  (       a  USS OUSS n[        R
                  " X4SSS2   /S5      $ USSS2   $ )zU
Pool `pos_id` while keeping the cls token separate (if `config.separate_cls=True`).
r   r   r   Nr   )rm   r   
new_tensortruncate_seqri   r   )r   r   r   cls_pospooled_pos_ids        r?   r   (FunnelAttentionStructure.stride_pool_pos  s~     ;;##
 ''1k>):Q)>(?@G,0KK,D,DF1RL&QRQS*M99gSqS'9:A>>#A#;r   r   r   r   c                     Uc  UnUS   US   -
  nU[        U5      -  nXVU-  -   nUS   US   -
  n[        R                  " XxS-
  U* [        R                  UR                  S9$ )zF
Build the relative positional vector between `pos` and `pooled_pos`.
r   r   r   r   )rf   ri   r   longr   )	r   r   r   r   r   	ref_point
num_removemax_distmin_dists	            r?   r   %FunnelAttentionStructure.relative_pos$  st     JqMCF*	S_,
F22a=3r7*||HlVG5::VYV`V`aar   tensoraxisc                 x  ^ ^ Uc  g[        T[        [        45      (       a  T H  nT R                  X5      nM     U$ [        U[        [        45      (       a  [	        U5      " UU 4S jU 5       5      $ TUR
                  -  mT R                  R                  (       a(  T R                  R                  (       a  [        SSS5      O[        SSS5      n[        S5      /T-  U/-   nT R                  R                  (       a6  [        S5      /T-  [        SS5      /-   n[        R                  " X   U/TS9nX   $ )zD
Perform pooling by stride slicing the tensor along the given axis.
Nc              3   H   >#    U  H  nTR                  UT5      v   M     g 7fr   )stride_pool)r=   xr  r   s     r?   r@   7FunnelAttentionStructure.stride_pool.<locals>.<genexpr>E  s!     J6a 0 0D 9 96s   "r   r   r   )r  )rV   listtupler  typendimrm   r   r   sliceri   r   )r   r  r  ax
axis_slice	enc_slice	cls_slices   ` `    r?   r  $FunnelAttentionStructure.stride_pool2  s    > dT5M**))&5 M fudm,,<J6JJJ 	 #'++":":t{{?W?WE$A]bcgimop]q 	 4[MD(J<7	;;##t,dA/??IYY 16:FF  r   modec                   ^ ^^^ Tc  g[        T[        [        45      (       a  [        T5      " UU UU4S jT 5       5      $ T R                  R
                  (       aH  T R                  R                  (       a  TSS2SS24   OTn[        R                  " TSS2SS24   U/SS9mTR                  nUS:X  a  TSS2SSS2S4   mOUS:X  a  TSS2SSS2SS24   mTS4mTS:X  a!  [        R                  R                  TTTS	S
9mO[TS:X  a!  [        R                  R                  TTTS	S
9mO4TS:X  a#  [        R                  R                  T* TTS	S
9* mO[        S5      eUS:X  a  TSS2SSS2S4   $ US:X  a	  TSS2S4   $ T$ )z3Apply 1D pooling to a tensor of size [B x T (x H)].Nc              3   F   >#    U  H  nTR                  TTTS 9v   M     g7f))r  r   N)pool_tensor)r=   r  r  r   r   r  s     r?   r@   7FunnelAttentionStructure.pool_tensor.<locals>.<genexpr>\  s&     c\bWX 0 0d6 0 R\bs   !r   r   r   r   r   meanT)r   	ceil_modemaxminz0The supported modes are 'mean', 'max' and 'min'.r   )rV   r
  r	  r  rm   r   r   ri   r   r  r   r   
avg_pool2d
max_pool2dNotImplementedError)r   r  r  r   suffixr  s   ````  r?   r  $FunnelAttentionStructure.pool_tensorS  s|    > fudm,,<c\bccc;;##'+{{'?'?VAssF^VFYYq"1"uv6A>F{{19AtQ,-FQYAtQM*F!6>]]--ffVW[-\FU]]]--ffVW[-\FU]mm..wvY].^^F%&XYY19!Q1*%%QY!Q$<r   attention_inputsc                    Uu  p4pVU R                   R                  (       a}  U R                   R                  S:X  a  U R                  USS S5      USS -   nU R                  US5      nU R                  US5      nU R	                  XR                   R
                  S9nOU =R                  S-  sl        U R                   R                  S:X  a  U R                  US5      nU R                  USS/5      nU R                  USS/5      nU R	                  USS9nU R	                  XR                   R
                  S9nX4XV4nX4$ )zTPool `output` and the proper parts of `attention_inputs` before the attention layer.r   Nr   r   r   r  r  )rm   pool_q_onlyr   r  r  pooling_typer   )r   outputr!  r   r   r   r   s          r?   pre_attention_pooling.FunnelAttentionStructure.pre_attention_poolingy  s>    EUA;;""{{))\9"&"2"2?2A3F"J_]^]_M`"`!--na@N''!4H%%f;;3K3K%LF"{{))\9"&"2"2?A"F!--nq!fEN''1a&9H!--n5-IN%%f;;3K3K%LF+^V''r   c                 J   Uu  p#pEU R                   R                  (       a~  U =R                  S-  sl        U R                   R                  S:X  a  USS U R	                  USS S5      -   nU R	                  US5      nU R	                  US5      nU R                  USS9nX#XE4nU$ )zFPool the proper parts of `attention_inputs` after the attention layer.r   r   Nr   r   r  r#  )rm   r$  r   r   r  r  )r   r!  r   r   r   r   s         r?   post_attention_pooling/FunnelAttentionStructure.post_attention_pooling  s    DTA;;"""{{))\9"1"1"58H8HYZY[I\^_8`"`!--na@N''!4H!--n5-IN+^Vr   )rm   r   r   r   r   r   Nr   )r  r   )r   r   r   r   __doc__r   rY   __annotations__r   r   ri   r   r   r   r   r   r   r   r   r   r   r   r   r  strr  r'  r*  r   r   r   s   @r?   r   r      sH    s!| ! ! 2615	K||K !.K !.	K
 
u||	K((ELL (U\\ (N(N(#(;;N(8=N(	uU\\"Dell);$<<	=N(`ell  b bc bSV b_d_k_k b!ellE%,,$7ell9KKL! CsT#Y./! 
	!D wx$ELL%*=tELL?QQR$Z]$ps$	$L((-ell(;(	u||U5<<00	1(, uU\\7J  uUZUaUaOb    r   r   positional_attncontext_lenr   r   c                     U R                   u  p4pV[        R                  " XXFU/5      n U S S 2S S 2US 2S S 24   n [        R                  " XXEXb-
  /5      n U SS U24   n U $ )N.)re   ri   rg   )r0  r1  r   
batch_sizen_headr   max_rel_lens          r?   _relative_shift_gatherr6    sp    />/D/D,J mmO&W^5_`O%aEFAo6OmmO&S^Sf5ghO%c<K<&78Or   c                      ^  \ rS rSrS\S\SS4U 4S jjrSS jrSS jr SS	\	R                  S
\	R                  S\	R                  S\\	R                     S\S\\	R                  S4   4S jjrSrU =r$ )r`   i  rm   r   r   Nc                 F  > [         TU ]  5         Xl        X l        UR                  UR
                  UR                  pTn[        R                  " UR                  5      U l	        [        R                  " UR                  5      U l
        [        R                  " X4U-  SS9U l        [        R                  " X4U-  5      U l        [        R                  " X4U-  5      U l        [        R                  " [         R"                  " XE/5      5      U l        [        R                  " [         R"                  " XE/5      5      U l        [        R                  " [         R"                  " X4U/5      5      U l        [        R                  " [         R"                  " XE/5      5      U l        [        R                  " [         R"                  " SXE/5      5      U l        [        R                  " XE-  U5      U l        [        R0                  " X1R2                  S9U l        SUS-  -  U l        g )NF)r$   r   r   r   g      ?)r   r   rm   r   r   r4  d_headr   r   r   attention_dropoutLinearr   r   r   	Parameterri   zerosr_w_biasr_r_biasra   r_s_bias	seg_embedr   r   r   r   scale)r   rm   r   r   r4  r9  r   s         r?   r   $FunnelRelMultiheadAttention.__init__  sf   &"(..&-- jj)>)>?!#F,D,D!Eii&uEii&9ii&9U[[&1A%BCU[[&1A%BCU[['61J%KLU[[&1A%BCekk1f2E&FG6?G<,,w4I4IJFCK(
r   c                 z   U R                   R                  S:X  a  Uu  pVpxU R                  U R                  -  n	U R                  n
[
        R                  " SX)-   U
5      nXSS2S4   -  nXSS2S4   -  n[
        R                  " SX5      [
        R                  " SX5      -   nOUR                  S   U:w  a  SOSnXR                     US-
     nU R                  U R                  -  nU R                  n
[
        R                  " SUU
5      n[
        R                  " SUU-   U5      n[        XU5      nUb  X-  nU$ )	z5Relative attention score for the positional encodingsr   zbinh,dnh->bindNzbind,jd->bnijr   r   ztd,dnh->tnhzbinh,tnh->bnit)
rm   r   r?  rB  ra   ri   einsumre   r   r6  )r   r   r   r1  r   r   r   r   r   uw_rq_r_attentionq_r_attention_1q_r_attention_2r0  r   rC   r)   r_heads                      r?   relative_positional_attention9FunnelRelMultiheadAttention.relative_positional_attention  s7    ;;%%5 #2CS

*A--C "LL)96:sKM+!T'l:O+Dk9O $ll?OQTYT`T`U O  aK7AQE   0 01%!)<A

*A--C \\-C8F#ll+;VaZPO4_SXYO'Or   c                    Uc  gUR                   u  pEnU R                  U R                  -  n[        R                  " SX'-   U R
                  5      nUSS2S4   R                  XBR                   S   XV/5      n[        R                  " USSS9u  p[        R                  " XR                  UR                   5      U	R                  UR                   5      5      nUb  X-  nU$ )z/Relative attention score for the token_type_idsNr   zbind,snd->bnisr   r   r   r   )	re   r@  rB  ri   rE  rA  r   rS   where)r   r   r   r   r3  r   r1  r@  token_type_biasdiff_token_typesame_token_typetoken_type_attns               r?   relative_token_type_attention9FunnelRelMultiheadAttention.relative_token_type_attention  s    !+9+?+?(
[ ==4::-  ,,'79JDNN['4077\\RS_V]8kl+0;;r+R(++22>3G3GH/J`J`aoauauJv
 'Or   querykeyvaluer!  output_attentions.c                    Uu  pgpUR                   u  pnUR                   S   nU R                  R                  U R                  R                  pU R	                  U5      R                  XX5      nU R                  U5      R                  XX5      nU R                  U5      R                  XX5      nUU R                  -  nU R                  U R                  -  n[        R                  " SUU-   U5      nU R                  UUX5      nU R                  UUU	5      nUU-   U-   nUR                  nUR                  5       nUb%  U[         SUS S 2S S 4   R                  5       -
  -  -
  n[        R"                  " USUS9nU R%                  U5      n[        R                  " SUU5      nU R'                  UR)                  XX-  5      5      nU R+                  U5      nU R-                  UU-   5      nU(       a  UU4$ U4$ )Nr   zbind,bjnd->bnijr   )r   r   zbnij,bjnd->bind)re   rm   r4  r9  r   viewr   r   rB  r>  ri   rE  rL  rT  r   floatINFsoftmaxr:  r   rg   r   r   )r   rV  rW  rX  r!  rY  r   r   r   r   r3  r   _r1  r4  r9  r   r   r   r>  content_scorer0  rS  
attn_scorer   	attn_probattn_vecattn_outr&  s                                r?   r   #FunnelRelMultiheadAttention.forward  s    EUA!&
Qiil++T[[-?-? U#((fMS!&&zOU#((&Q$**$==4::-%68I6R<<_fVal<<^VU]^ #_4F
   %%'
%#cQ41N1T1T1V-V&WWJMM*"EB	**95	 << 19fE >>("2"2:"XY&&x0!12&7	"FfYFr   )r:  r   rm   r   r   r   r   r   ra   r?  r@  r>  rB  rA  r   r   F)r   r   r   r   r   rY   r   rL  rT  ri   r   r   boolr   r   r   r   s   @r?   r`   r`     s    )| )# )$ ).(T< #(3G||3G \\3G ||	3G
  -3G  3G 
u||S 	!3G 3Gr   r`   c                   n   ^  \ rS rSrS\SS4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )	rW   i=  rm   r   Nc                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                     U l	        [        R                  " UR                  5      U l        [        R                  " UR
                  UR                  5      U l        [        R                  " UR                  5      U l        [        R                  " UR                  UR                   5      U l        g r   )r   r   r   r;  r   d_innerr   r   
hidden_actactivation_functionr   activation_dropoutr    r   r   r   r   r   r   s     r?   r   FunnelPositionwiseFFN.__init__>  s    		&..&..A#)&*;*;#< "$**V-F-F"G		&..&..Azz&"7"78,,v~~v7L7LMr   hiddenc                     U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R	                  U5      nU R                  X-   5      $ r   )r   rl  rm  r    r   r   )r   ro  hs      r?   r   FunnelPositionwiseFFN.forwardG  s\    MM&!$$Q'##A&MM!LLOvz**r   )rm  rl  r   r   r   r    )r   r   r   r   r   r   ri   r   r   r   r   r   s   @r?   rW   rW   =  s9    N| N N+ell +u|| + +r   rW   c                      ^  \ rS rSrS\S\SS4U 4S jjr SS\R                  S\R                  S	\R                  S
\	S\
4
S jjrSrU =r$ )FunnelLayeriP  rm   r   r   Nc                 b   > [         TU ]  5         [        X5      U l        [	        U5      U l        g r   )r   r   r`   r!   rW   r"   )r   rm   r   r   s      r?   r   FunnelLayer.__init__Q  s&    4VI(0r   rV  rW  rX  rY  c                 l    U R                  XX4US9nU R                  US   5      nU(       a  XvS   4$ U4$ )NrY  r   r   r!   r"   )r   rV  rW  rX  r!  rY  attnr&  s           r?   r   FunnelLayer.forwardV  sA     ~~e%Uf~g$q'"$5Q DF9Dr   ry  rf  )r   r   r   r   r   rY   r   ri   r   rg  r   r   r   r   r   s   @r?   rt  rt  P  so    1| 1# 1$ 1 #(
E||
E \\
E ||	
E  
E 

E 
Er   rt  c                      ^  \ rS rSrS\SS4U 4S jjr     SS\R                  S\\R                     S\\R                     S	\	S
\	S\	S\
\\4   4S jjrSrU =r$ )FunnelEncoderic  rm   r   Nc                 ^  > [         TU ]  5         Xl        [        U5      U l        [
        R                  " [        UR                  5       VVVs/ s H>  u  p#[
        R                  " [        U5       Vs/ s H  n[        X5      PM     sn5      PM@     snnn5      U l        g s  snf s  snnnf r   )r   r   rm   r   attention_structurer   
ModuleList	enumerater]   r   rt  r^   )r   rm   r   
block_sizer_  r   s        r?   r   FunnelEncoder.__init__d  s    #;F#C mm 099K9K/L/L+K zIZ[IZA{6?IZ[\/L
[s   %B(4B#	B(#B(r   r   r   rY  output_hidden_statesreturn_dictc                    UR                  U5      nU R                  R                  UUUS9nUnU(       a  U4OS n	U(       a  SOS n
[        U R                  5       GH?  u  pUR                  S5      U R                  R                  (       a  SOS:  nU=(       a    US:  nU(       a  U R                  R                  X5      u  p[        U5       H  u  nn[        U R                  R                  U   5       H  nUS:H  =(       a    US:H  =(       a    UnU(       a$  WnU R                  R                  (       a  UOU=nnOU=n=nnU" UUUXtS9nUS   nU(       a  U R                  R                  U5      nU(       a  U
USS  -   n
U(       d  M  X4-   n	M     M     GMB     U(       d  [        S XU
4 5       5      $ [        XU
S9$ )	Nr   r   r<   r   r   r   rx  c              3   .   #    U  H  oc  M  Uv   M     g 7fr   r<   r=   r)   s     r?   r@   (FunnelEncoder.forward.<locals>.<genexpr>       a$Oq$O   	last_hidden_statehidden_states
attentions)type_asr  r   r  r^   r   rm   r   r'  r   block_repeatsr$  r*  r
  r   )r   r   r   r   rY  r  r  r!  ro  all_hidden_statesall_attentionsr   blockpooling_flagpooled_hiddenr{   layerrepeat_index
do_poolingrV  rW  rX  layer_outputs                          r?   r   FunnelEncoder.forwardo  s    (//>33II)) J 

 0D],$0d"+DKK"8K!;;q>$++2J2JQPQRL';K!OL262J2J2`2`3/ '0&6"U$)$++*C*CK*P$QL".!"3!\+:J!\P\J! -040G0Gf]Ze.444e#(U<L#rL)!_F!+/+C+C+Z+Z[k+l(()7,qr:J)J++,=	,I) %R '7 #92 aV$Oaaaesttr   )r  r^   rm   NNFFTr   r   r   r   r   r   ri   r   r   rg  r   r   r   r   r   r   r   s   @r?   r}  r}  c  s    	
| 	
 	
 2615"'%* 0u||0u !.0u !.	0u
  0u #0u 0u 
uo%	&0u 0ur   r}  r  r   
target_lenr   r   c           	      L   US:X  a  U $ U(       a  U SS2SS24   nU SS2SS24   n [         R                  " XSS9nU(       aW  U(       a)  [        R                  R	                  USSSUS-
  SS45      nUSS2SUS-
  24   n[         R
                  " WU/SS9nU$ USS2SU24   nU$ )zs
Upsample tensor `x` to match `target_len` by repeating the tokens `stride` time on the sequence length dimension.
r   N)repeatsr   r   r   )ri   repeat_interleaver   r   r   r   )r  r   r  r   r   clsr&  s          r?   upsampler    s     {2A2haeH$$QA>F]]&&v1a!Q/JKF+Z!^++,C=a0 M ;J;'Mr   c                      ^  \ rS rSrS\SS4U 4S jjr     SS\R                  S\R                  S\\R                     S	\\R                     S
\	S\	S\	S\
\\4   4S jjrSrU =r$ )FunnelDecoderi  rm   r   Nc           	         > [         TU ]  5         Xl        [        U5      U l        [
        R                  " [        UR                  5       Vs/ s H  n[        US5      PM     sn5      U l
        g s  snf )Nr   )r   r   rm   r   r  r   r  r   num_decoder_layersrt  r_   )r   rm   r_  r   s      r?   r   FunnelDecoder.__init__  sV    #;F#C mmU6KdKdEe$fEe[%;Ee$fg$fs   A0final_hiddenfirst_block_hiddenr   r   rY  r  r  c           	         [        US[        U R                  R                  5      S-
  -  UR                  S   U R                  R
                  U R                  R                  S9nX-   n	U(       a  U	4OS n
U(       a  SOS nU R                  R                  U	UUS9nU R                   H,  nU" XXUS9nUS   n	U(       a  XSS  -   nU(       d  M'  X4-   n
M.     U(       d  [        S XU4 5       5      $ [        XUS	9$ )
Nr   r   )r   r  r   r   r<   r  rx  r   c              3   .   #    U  H  oc  M  Uv   M     g 7fr   r<   r  s     r?   r@   (FunnelDecoder.forward.<locals>.<genexpr>  r  r  r  )r  rf   rm   r]   re   r   r   r  r   r_   r
  r   )r   r  r  r   r   rY  r  r  upsampled_hiddenro  r  r  r!  r  r  s                  r?   r   FunnelDecoder.forward  s    $T[[4459:)//21111
 "6)=VI40d33II)) J 
 [[E ]noL!!_F !/qr2B!B##$5	$A! ! aV$Oaaaesttr   )r  rm   r_   r  r  r   s   @r?   r  r    s    h| h h 2615"'%* 'ull'u "LL'u !.	'u
 !.'u  'u #'u 'u 
uo%	&'u 'ur   r  c                   r   ^  \ rS rSrSrS\SS4U 4S jjrS\R                  S\R                  4S jr	S	r
U =r$ )
FunnelDiscriminatorPredictionsi  zEPrediction module for the discriminator, made up of two dense layers.rm   r   Nc                    > [         TU ]  5         Xl        [        R                  " UR
                  UR
                  5      U l        [        R                  " UR
                  S5      U l        g r,  )r   r   rm   r   r;  r   densedense_predictionr   s     r?   r   'FunnelDiscriminatorPredictions.__init__  sD    YYv~~v~~>
 "		&..! <r   discriminator_hidden_statesc                     U R                  U5      n[        U R                  R                     " U5      nU R	                  U5      R                  S5      nU$ )Nr   )r  r   rm   rk  r  squeeze)r   r  r  logitss       r?   r   &FunnelDiscriminatorPredictions.forward  sJ    

#>?t{{556}E&&}5==bAr   )rm   r  r  )r   r   r   r   r-  r   r   ri   r   r   r   r   r   s   @r?   r  r    s9    O=| = =5<< ELL  r   r  c                   &    \ rS rSr\r\rSrS r	Sr
g)FunnelPreTrainedModeli  funnelc                     UR                   R                  nUR                  S5      S:w  a  [        USS 5      b  U R                  R
                  c=  UR                  R                  u  p4[        R                  " S[        XC-   5      -  5      nOU R                  R
                  n[        R                  R                  UR                  US9  [        USS 5      b+  [        R                  R                  UR                  S5        g g US:X  Ga-  [        R                  R!                  UR"                  U R                  R$                  S	9  [        R                  R!                  UR&                  U R                  R$                  S	9  [        R                  R!                  UR(                  U R                  R$                  S	9  [        R                  R!                  UR*                  U R                  R$                  S	9  [        R                  R!                  UR,                  U R                  R$                  S	9  g US
:X  a  U R                  R
                  c  SOU R                  R
                  n[        R                  R                  UR.                  R                  US9  UR.                  R0                  bF  UR.                  R                  R2                  UR.                  R0                     R5                  5         g g g )Nr;  r   r#   r   )stdr$   g        r`   )br   )r   r   findrb   rm   initializer_stdr#   re   ro   sqrtr\  r   initnormal_	constant_r$   uniform_r>  initializer_ranger?  ra   r@  rA  r%   r   rk   zero_)r   module	classnamefan_outfan_inr  s         r?   _init_weights#FunnelPreTrainedModel._init_weights  s   $$--	>>(#r)vx.:;;..6&,mm&9&9OG''#f.>(?"?@C++55C37vvt,8!!&++s3 977GGV__0M0MNGGV__0M0MNGGV__0M0MNGGV__0M0MNGGV--1N1NO,,44<#$++B]B]CGGOOF2299sOC%%11=&&--2263I3I3U3UV\\^ > -r   r<   N)r   r   r   r   r   config_classr}   load_tf_weightsbase_model_prefixr  r   r<   r   r?   r  r    s    L/O _r   r  c                   r   ^  \ rS rSrS\S\SS4U 4S jjrS\R                  S\R                  4S jr	S	r
U =r$ )
FunnelClassificationHeadi  rm   n_labelsr   Nc                   > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  5      U l        [        R                  " UR                  U5      U l	        g r   )
r   r   r   r;  r   linear_hiddenr   r   r   
linear_out)r   rm   r  r   s      r?   r   !FunnelClassificationHead.__init__  sU    YYv~~v~~Fzz&"7"78))FNNH=r   ro  c                     U R                  U5      n[        R                  " U5      nU R                  U5      nU R	                  U5      $ r   )r  ri   tanhr   r  )r   ro  s     r?   r    FunnelClassificationHead.forward  s=    ##F+F#f%v&&r   )r   r  r  )r   r   r   r   r   rY   r   ri   r   r   r   r   r   s   @r?   r  r    s=    >| >s >t >'ell 'u|| ' 'r   r  c                       \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S'   Srg)	FunnelForPreTrainingOutputi&  a  
Output type of [`FunnelForPreTraining`].

Args:
    loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
        Total loss of the ELECTRA-style objective.
    logits (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
        Prediction scores of the head (scores for each token before SoftMax).
    hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
        shape `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
Nlossr  r  r  r<   )r   r   r   r   r-  r  r   ri   FloatTensorr.  r  r  r   r  r   r<   r   r?   r  r  &  sg    * )-D(5$$
%,*.FHU&&'.8<M8E%"3"345<59Ju00129r   r  z
    The base Funnel Transformer Model transformer outputting raw hidden-states without upsampling head (also called
    decoder) or any task-specific head on top.
    )custom_introc                     ^  \ rS rSrS\SS4U 4S jjrS\R                  4S jrS\R                  SS4S jr	\
         SS	\\R                     S
\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\   S\\   S\\   S\\\4   4S jj5       rSrU =r$ )FunnelBaseModeliC  rm   r   Nc                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        U R                  5         g r   )r   r   r   r&   r}  encoder	post_initr   s     r?   r   FunnelBaseModel.__init__J  s4     *62$V, 	r   c                 .    U R                   R                  $ r   r&   r%   r   s    r?   get_input_embeddings$FunnelBaseModel.get_input_embeddingsS      ...r   new_embeddingsc                 $    XR                   l        g r   r  r   r  s     r?   set_input_embeddings$FunnelBaseModel.set_input_embeddingsV      *8'r   r   r   r   position_ids	head_maskr   rY  r  r  c
           	      P   Ub  UOU R                   R                  nUb  UOU R                   R                  nU	b  U	OU R                   R                  n	Ub  Ub  [	        S5      eUb"  U R                  X5        UR                  5       n
O"Ub  UR                  5       S S n
O[	        S5      eUb  UR                  OUR                  nUc  [        R                  " XS9nUc$  [        R                  " U
[        R                  US9nU R                  XS9nU R                  UUUUUU	S9nU$ )NDYou cannot specify both input_ids and inputs_embeds at the same timer   5You have to specify either input_ids or inputs_embedsr   r   r   r   r   rY  r  r  )rm   rY  r  use_return_dict
ValueError%warn_if_padding_and_no_attention_maskr   r   ri   onesr=  r   r&   r  )r   r   r   r   r  r  r   rY  r  r  input_shaper   encoder_outputss                r?   r   FunnelBaseModel.forwardY  s7    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66yQ#..*K&',,.s3KTUU%.%:!!@T@T!"ZZCN!"[[EJJvVN 	O,,))/!5# ' 
 r   )r&   r  	NNNNNNNNNr   r   r   r   r   r   r   r   r  r  r   r   ri   r   rg  r   r   r   r   r   r   r   s   @r?   r  r  C  s   |  /bll /92<< 9D 9  -11515/3,004,0/3&*/ELL)/ !./ !.	/
 u||,/ ELL)/  -/ $D>/ 'tn/ d^/ 
uo%	&/ /r   r  c                   F  ^  \ rS rSrS\SS4U 4S jjrS\R                  4S jrS\R                  SS4S jr	\
       SS	\\R                     S
\\R                     S\\R                     S\\R                     S\\   S\\   S\\   S\\\4   4S jj5       rSrU =r$ )FunnelModeli  rm   r   Nc                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        [        U5      U l        U R                  5         g r   )
r   r   rm   r   r&   r}  r  r  decoderr  r   s     r?   r   FunnelModel.__init__  sE     *62$V,$V, 	r   c                 .    U R                   R                  $ r   r  r  s    r?   r   FunnelModel.get_input_embeddings  r  r   r  c                 $    XR                   l        g r   r  r  s     r?   r   FunnelModel.set_input_embeddings  r  r   r   r   r   r   rY  r  r  c           
         Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUb  Ub  [	        S5      eUb"  U R                  X5        UR                  5       nO"Ub  UR                  5       S S nO[	        S5      eUb  UR                  OUR                  n	Uc  [        R                  " XS9nUc$  [        R                  " U[        R                  U	S9nU R                  XS9nU R                  UUUUSUS9n
U R                  U
S	   U
S
   U R                   R                  S	      UUUUUS9nU(       d<  S	nUS	   4nU(       a  US
-  nXS
   X   -   4-   nU(       a  US
-  nXS   X   -   4-   nU$ [!        US	   U(       a  U
R"                  UR"                  -   OS U(       a  U
R$                  UR$                  -   S9$ S S9$ )Nr  r   r  r  r   r  Tr  r   r   )r  r  r   r   rY  r  r  r   r  )rm   rY  r  r  r  r  r   r   ri   r   r=  r   r&   r  r	  r]   r   r  r  )r   r   r   r   r   rY  r  r  r  r   r  decoder_outputsidxoutputss                 r?   r   FunnelModel.forward  s3    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66yQ#..*K&',,.s3KTUU%.%:!!@T@T!"ZZCN!"[[EJJvVN 	O,,))/!%# ' 
 ,,(+.q1$++2I2I!2LM))/!5# ' 
 C&q)+G#q!Q%7/:N%N$PP q!Q%7/:N%N$PPN-a0# +88?;X;XXTe22_5O5OO
 	

 lp
 	
r   )rm   r	  r&   r  )NNNNNNNr  r   s   @r?   r  r    s    |  /bll /92<< 9D 9  -1151504,0/3&*H
ELL)H
 !.H
 !.	H

  -H
 $D>H
 'tnH
 d^H
 
uo%	&H
 H
r   r  z
    Funnel Transformer model with a binary classification head on top as used during pretraining for identifying
    generated tokens.
    c                     ^  \ rS rSrS\SS4U 4S jjr\        SS\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\
   S\\
   S\\
   S\\\4   4S jj5       rSrU =r$ )FunnelForPreTrainingi  rm   r   Nc                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        U R                  5         g r   )r   r   r  r  r  discriminator_predictionsr  r   s     r?   r   FunnelForPreTraining.__init__  s3     !&))G)O&r   r   r   r   r   labelsrY  r  r  c	           
      f   Ub  UOU R                   R                  nU R                  UUUUUUUS9n	U	S   n
U R                  U
5      nSnUb  [        R
                  " 5       nUb`  UR                  SU
R                  S   5      S:H  nUR                  SU
R                  S   5      U   nX^   nU" UUR                  5       5      nO4U" UR                  SU
R                  S   5      UR                  5       5      nU(       d  U4U	SS -   nUb  U4U-   $ U$ [        UUU	R                  U	R                  S9$ )a  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the ELECTRA-style loss. Input should be a sequence of tokens (see `input_ids`
    docstring) Indices should be in `[0, 1]`:

    - 0 indicates the token is an original token,
    - 1 indicates the token was replaced.

Examples:

```python
>>> from transformers import AutoTokenizer, FunnelForPreTraining
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("funnel-transformer/small")
>>> model = FunnelForPreTraining.from_pretrained("funnel-transformer/small")

>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> logits = model(**inputs).logits
```Nr   r   r   rY  r  r  r   r   r   r  r  r  r  )rm   r  r  r  r   r	   r[  re   r\  r  r  r  )r   r   r   r   r   r  rY  r  r  r  discriminator_sequence_outputr  r  loss_fctactive_lossactive_logitsactive_labelsr&  s                     r?   r   FunnelForPreTraining.forward  sh   @ &1%<k$++B]B]&*kk))'/!5# '2 '
# )DA(F%//0MN++-H),11"6S6Y6YZ[6\]abb &B0M0S0STU0V WXc d & 3}/B/B/DEB0M0S0STU0V WY_YeYeYghY!<QR!@@F)-)9TGf$EvE)5CC2==	
 	
r   )r  r  NNNNNNNN)r   r   r   r   r   r   r   r   ri   r   rg  r   r   r  r   r   r   r   s   @r?   r  r    s    |    -1151504)-,0/3&*B
ELL)B
 !.B
 !.	B

  -B
 &B
 $D>B
 'tnB
 d^B
 
u00	1B
 B
r   r  c                   l  ^  \ rS rSrS/rS\SS4U 4S jjrS\R                  4S jr	S\R                  SS4S	 jr\        SS
\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\   S\\   S\\   S\\\4   4S jj5       rSrU =r$ )FunnelForMaskedLMi?  zlm_head.weightrm   r   Nc                    > [         TU ]  U5        [        U5      U l        [        R
                  " UR                  UR                  5      U l        U R                  5         g r   )
r   r   r  r  r   r;  r   r   lm_headr  r   s     r?   r   FunnelForMaskedLM.__init__C  sD     !&)yy1B1BC 	r   c                     U R                   $ r   r'  r  s    r?   get_output_embeddings'FunnelForMaskedLM.get_output_embeddingsL  s    ||r   r  c                     Xl         g r   r*  r  s     r?   set_output_embeddings'FunnelForMaskedLM.set_output_embeddingsO  s    %r   r   r   r   r   r  rY  r  r  c	           
         Ub  UOU R                   R                  nU R                  UUUUUUUS9n	U	S   n
U R                  U
5      nSnUbF  [	        5       nU" UR                  SU R                   R                  5      UR                  S5      5      nU(       d  U4U	SS -   nUb  U4U-   $ U$ [        UUU	R                  U	R                  S9$ )az  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
Nr  r   r   r   r  )
rm   r  r  r'  r
   r[  r   r   r  r  )r   r   r   r   r   r  rY  r  r  r  r  prediction_logitsmasked_lm_lossr  r&  s                  r?   r   FunnelForMaskedLM.forwardR  s    $ &1%<k$++B]B]++))'/!5#  
 $AJ LL):;')H%&7&<&<RAWAW&XZ`ZeZefhZijN')GABK7F3A3M^%.YSYY$!//))	
 	
r   )r  r'  r#  )r   r   r   r   _tied_weights_keysr   r   r   r;  r+  r   r.  r   r   ri   r   rg  r   r   r   r   r   r   r   s   @r?   r%  r%  ?  s   *+|  ryy &BLL &T &  -1151504)-,0/3&*.
ELL).
 !..
 !.	.

  -.
 &.
 $D>.
 'tn.
 d^.
 
un$	%.
 .
r   r%  z
    Funnel Transformer Model with a sequence classification/regression head on top (two linear layer on top of the
    first timestep of the last hidden state) e.g. for GLUE tasks.
    c                     ^  \ rS rSrS\SS4U 4S jjr\        SS\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\
   S\\
   S\\
   S\\\4   4S jj5       rSrU =r$ )FunnelForSequenceClassificationi  rm   r   Nc                    > [         TU ]  U5        UR                  U l        Xl        [	        U5      U l        [        XR                  5      U l        U R                  5         g r   )	r   r   
num_labelsrm   r  r  r  
classifierr  r   s     r?   r   (FunnelForSequenceClassification.__init__  sJ      ++%f-26;L;LMr   r   r   r   r   r  rY  r  r  c	           
      >   Ub  UOU R                   R                  nU R                  UUUUUUUS9n	U	S   n
U
SS2S4   nU R                  U5      nSnUGb  U R                   R                  c  U R
                  S:X  a  SU R                   l        OoU R
                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                   l        OSU R                   l        U R                   R                  S:X  aI  [        5       nU R
                  S:X  a&  U" UR                  5       UR                  5       5      nOU" X5      nOU R                   R                  S:X  a=  [        5       nU" UR                  SU R
                  5      UR                  S5      5      nO,U R                   R                  S:X  a  [        5       nU" X5      nU(       d  U4U	SS -   nUb  U4U-   $ U$ [        UUU	R                   U	R"                  S	9$ )
ae  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Nr  r   r   
regressionsingle_label_classificationmulti_label_classificationr   r  )rm   r  r  r9  problem_typer8  r   ri   r   rY   r   r  r
   r[  r	   r   r  r  )r   r   r   r   r   r  rY  r  r  r  r  pooled_outputr  r  r  r&  s                   r?   r   'FunnelForSequenceClassification.forward  s   $ &1%<k$++B]B]++))'/!5#  
 $AJ)!Q$//{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'!//))	
 	
r   )r9  rm   r  r8  r#  )r   r   r   r   r   r   r   r   ri   r   rg  r   r   r   r   r   r   r   s   @r?   r6  r6    s    |    -1151504)-,0/3&*A
ELL)A
 !.A
 !.	A

  -A
 &A
 $D>A
 'tnA
 d^A
 
u..	/A
 A
r   r6  c                     ^  \ rS rSrS\SS4U 4S jjr\        SS\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\
   S\\
   S\\
   S\\\4   4S jj5       rSrU =r$ )FunnelForMultipleChoicei  rm   r   Nc                    > [         TU ]  U5        [        U5      U l        [	        US5      U l        U R                  5         g r,  )r   r   r  r  r  r9  r  r   s     r?   r    FunnelForMultipleChoice.__init__  s4     %f-261=r   r   r   r   r   r  rY  r  r  c	           
         Ub  UOU R                   R                  nUb  UR                  S   OUR                  S   n	Ub!  UR                  SUR	                  S5      5      OSnUb!  UR                  SUR	                  S5      5      OSnUb!  UR                  SUR	                  S5      5      OSnUb1  UR                  SUR	                  S5      UR	                  S5      5      OSnU R                  UUUUUUUS9n
U
S   nUSS2S4   nU R                  U5      nUR                  SU	5      nSnUb  [        5       nU" X5      nU(       d  U4U
SS -   nUb  U4U-   $ U$ [        UUU
R                  U
R                  S9$ )a"  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
    num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
    `input_ids` above)
Nr   r   r  r   r  )rm   r  re   r[  r   r  r9  r
   r   r  r  )r   r   r   r   r   r  rY  r  r  num_choicesr  r  r@  r  reshaped_logitsr  r  r&  s                     r?   r   FunnelForMultipleChoice.forward  s   $ &1%<k$++B]B],5,Aiooa(}GZGZ[\G]>G>SINN2y~~b'9:Y]	M[Mg,,R1D1DR1HImqM[Mg,,R1D1DR1HImq ( r=#5#5b#9=;M;Mb;QR 	 ++))'/!5#  
 $AJ)!Q$// ++b+6')HO4D%''!"+5F)-)9TGf$EvE("!//))	
 	
r   )r9  r  r#  )r   r   r   r   r   r   r   r   ri   r   rg  r   r   r   r   r   r   r   s   @r?   rC  rC    s    |    -1151504)-,0/3&*:
ELL):
 !.:
 !.	:

  -:
 &:
 $D>:
 'tn:
 d^:
 
u//	0:
 :
r   rC  c                     ^  \ rS rSrS\SS4U 4S jjr\        SS\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\
   S\\
   S\\
   S\\\4   4S jj5       rSrU =r$ )FunnelForTokenClassificationi"  rm   r   Nc                 0  > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r   )r   r   r8  r  r  r   r   r   r   r;  r   r9  r  r   s     r?   r   %FunnelForTokenClassification.__init__$  si      ++!&)zz&"7"78))F$6$68I8IJ 	r   r   r   r   r   r  rY  r  r  c	           
         Ub  UOU R                   R                  nU R                  UUUUUUUS9n	U	S   n
U R                  U
5      n
U R	                  U
5      nSnUb<  [        5       nU" UR                  SU R                  5      UR                  S5      5      nU(       d  U4U	SS -   nUb  U4U-   $ U$ [        UUU	R                  U	R                  S9$ )z
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
Nr  r   r   r   r  )rm   r  r  r   r9  r
   r[  r8  r   r  r  )r   r   r   r   r   r  rY  r  r  r  r  r  r  r  r&  s                  r?   r   $FunnelForTokenClassification.forward/  s      &1%<k$++B]B]++))'/!5#  
 $AJ LL):;!23')HFKKDOO<fkk"oNDY,F)-)9TGf$EvE$!//))	
 	
r   )r9  r   r  r8  r#  )r   r   r   r   r   r   r   r   ri   r   rg  r   r   r   r   r   r   r   s   @r?   rL  rL  "  s    	| 	 	  -1151504)-,0/3&*-
ELL)-
 !.-
 !.	-

  --
 &-
 $D>-
 'tn-
 d^-
 
u++	,-
 -
r   rL  c                   >  ^  \ rS rSrS\SS4U 4S jjr\         SS\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\R                     S\\
   S\\
   S\\
   S\\\4   4S jj5       rSrU =r$ )FunnelForQuestionAnsweringi`  rm   r   Nc                    > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r   )
r   r   r8  r  r  r   r;  r   
qa_outputsr  r   s     r?   r   #FunnelForQuestionAnswering.__init__b  sS      ++!&)))F$6$68I8IJ 	r   r   r   r   r   start_positionsend_positionsrY  r  r  c
           
         U	b  U	OU R                   R                  n	U R                  UUUUUUU	S9n
U
S   nU R                  U5      nUR	                  SSS9u  pUR                  S5      R                  5       nUR                  S5      R                  5       nS nUb  Ub  [        UR                  5       5      S:  a  UR                  S5      n[        UR                  5       5      S:  a  UR                  S5      nUR                  S5      nUR                  SU5      nUR                  SU5      n[        US9nU" X5      nU" X5      nUU-   S-  nU	(       d  X4U
SS  -   nUb  U4U-   $ U$ [        UUUU
R                  U
R                  S9$ )	Nr  r   r   r   r   )ignore_indexr   )r  start_logits
end_logitsr  r  )rm   r  r  rT  rS   r  
contiguousrf   r   squezeclampr
   r   r  r  )r   r   r   r   r   rV  rW  rY  r  r  r  r  r  rZ  r[  
total_lossignored_indexr  
start_lossend_lossr&  s                        r?   r   "FunnelForQuestionAnswering.forwardl  s    &1%<k$++B]B]++))'/!5#  
 $AJ!23#)<<r<#: #++B/::<''+668

&=+D?'')*Q."1"8"8"<=%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
:H$x/14J"/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
r   )r  r8  rT  r  )r   r   r   r   r   r   r   r   ri   r   rg  r   r   r   r   r   r   r   s   @r?   rR  rR  `  s    |    -11515042604,0/3&*:
ELL):
 !.:
 !.	:

  -:
 "%,,/:
  -:
 $D>:
 'tn:
 d^:
 
u22	3:
 :
r   rR  )
r  r%  rC  r  rR  r6  rL  r  r  r}   )TF)Ar-  rJ   dataclassesr   typingr   r   r   r   rE   ro   ri   r   torch.nnr	   r
   r   activationsr   modeling_outputsr   r   r   r   r   r   modeling_utilsr   utilsr   r   r   configuration_funnelr   
get_loggerr   rH   r]  r}   Moduler   r   r   rY   r6  r`   rW   rt  r}  rg  r  r  r  r  r  r  r  r  r  r%  r6  rC  rL  rR  __all__r<   r   r?   <module>ro     s   ( 	 ! / /    A A !  . 9 9 . 
		H	% 
Wtryy "A ryy A HELL s SV [`[g[g  MG")) MG`+BII +&E")) E&<uBII <u@ di|| .1AE\`
\\,.uBII .ubRYY   _O _ _<'ryy ' : : :8 @+ @@F Z
' Z
 Z
z L
0 L
L
^ A
- A
 A
H M
&; M
M
` D
3 D
 D
N :
#8 :
 :
z F
!6 F
 F
Rr   