
    fTh                        S r SSKrSSKJr  SSKJr  SSKJrJrJ	r	  SSK
r
SSKr
SSK
Jr  SSKJrJrJr  SS	KJrJr  \" 5       (       a  SS
KJr  SSKJr  SSKJrJrJrJrJrJrJrJ r J!r!  SSK"J#r#  SSK$J%r%  SSKJ&r&  SSK'J(r(  \&RR                  " \*5      r+S r,S r-S r. " S S\R^                  5      r0 " S S\R^                  5      r1 " S S\R^                  5      r2 " S S\R^                  5      r3 " S S\R^                  5      r4 " S S \R^                  5      r5 " S! S"\R^                  5      r6 " S# S$\R^                  5      r7 " S% S&\R^                  5      r8 " S' S(\R^                  5      r9 " S) S*\R^                  5      r: " S+ S,\R^                  5      r; " S- S.\R^                  5      r< " S/ S0\R^                  5      r=\ " S1 S2\#5      5       r>\ " S3 S4\5      5       r?\ " S5 S6\>5      5       r@\" S7S89 " S9 S:\>5      5       rA\ " S; S<\>5      5       rB\" S=S89 " S> S?\>5      5       rC\" S@S89 " SA SB\>5      5       rD\ " SC SD\>5      5       rE\ " SE SF\>5      5       rF\ " SG SH\>5      5       rG/ SIQrHg)JzPyTorch FNet model.    N)	dataclass)partial)OptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )auto_docstringis_scipy_available)linalg)ACT2FN)	BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputModelOutputMultipleChoiceModelOutputNextSentencePredictorOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)apply_chunking_to_forward)logging   )
FNetConfigc                     U R                   S   nUSU2SU24   nU R                  [        R                  5      n [        R                  " SXU5      $ )z4Applies 2D matrix multiplication to 3D input arrays.r   Nzbij,jk,ni->bnk)shapetypetorch	complex64einsum)xmatrix_dim_onematrix_dim_two
seq_lengths       ^/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/fnet/modeling_fnet.py_two_dim_matmulr*   7   sL    J#KZK*$<=N	uA<<(!^LL    c                     [        XU5      $ N)r*   )r%   r&   r'   s      r)   two_dim_matmulr.   @   s    1n==r+   c                     U n[        [        U R                  5      SS 5       H   n[        R                  R	                  XS9nM"     U$ )z
Applies n-dimensional Fast Fourier Transform (FFT) to input array.

Args:
    x: Input n-dimensional array.

Returns:
    n-dimensional Fourier transform of input n-dimensional array.
r   N)axis)reversedrangendimr"   fft)r%   outr0   s      r)   fftnr6   E   s@     Cqvvqr*+iimmCm+ ,Jr+   c                   6   ^  \ rS rSrSrU 4S jrSS jrSrU =r$ )FNetEmbeddingsU   zGConstruct the embeddings from word, position and token_type embeddings.c                 j  > [         TU ]  5         [        R                  " UR                  UR
                  UR                  S9U l        [        R                  " UR                  UR
                  5      U l	        [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR
                  UR
                  5      U l        [        R                   " UR"                  5      U l        U R'                  S[(        R*                  " UR                  5      R-                  S5      SS9  U R'                  S[(        R.                  " U R0                  R3                  5       [(        R4                  S9SS9  g )	N)padding_idxepsposition_ids)r   F)
persistenttoken_type_idsdtype)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsLinear
projectionDropouthidden_dropout_probdropoutregister_bufferr"   arangeexpandzerosr>   sizelongselfconfig	__class__s     r)   rE   FNetEmbeddings.__init__X   s<   !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2H2H&J\J\%]" f&8&8f>S>ST))F$6$68J8JKzz&"<"<= 	ELL)G)GHOOPWXej 	 	
 	ekk$*;*;*@*@*B%**Ubg 	 	
r+   c                 b   Ub  UR                  5       nOUR                  5       S S nUS   nUc  U R                  S S 2S U24   nUcv  [        U S5      (       a-  U R                  S S 2S U24   nUR	                  US   U5      nUnO8[
        R                  " U[
        R                  U R                  R                  S9nUc  U R                  U5      nU R                  U5      n	XI-   n
U R                  U5      nX-  n
U R                  U
5      n
U R                  U
5      n
U R                  U
5      n
U
$ )Nr?   r   rA   r   rC   device)rZ   r>   hasattrrA   rX   r"   rY   r[   rc   rJ   rN   rL   rO   rR   rU   )r]   	input_idsrA   r>   inputs_embedsinput_shaper(   buffered_token_type_ids buffered_token_type_ids_expandedrN   
embeddingsrL   s               r)   forwardFNetEmbeddings.forwardn   s:    #..*K',,.s3K ^
,,Q^<L
 !t-..*.*=*=a*n*M'3J3Q3QR]^_R`bl3m0!A!&[

SWSdSdSkSk!l  00;M $ : :> J":
"66|D)
^^J/
__Z0
\\*-
r+   )rO   rU   rL   rR   rN   rJ   )NNNN)	__name__
__module____qualname____firstlineno____doc__rE   rk   __static_attributes____classcell__r_   s   @r)   r8   r8   U   s    Q
,! !r+   r8   c                   4   ^  \ rS rSrU 4S jrS rS rSrU =r$ )FNetBasicFourierTransform   c                 D   > [         TU ]  5         U R                  U5        g r-   )rD   rE   _init_fourier_transformr\   s     r)   rE   "FNetBasicFourierTransform.__init__   s    $$V,r+   c                    UR                   (       d(  [        [        R                  R                  SS9U l        g UR                  S::  a  [        5       (       a  U R                  S[        R                  " [        R                  " UR                  5      [        R                  S95        U R                  S[        R                  " [        R                  " UR                  5      [        R                  S95        [        [        U R                   U R"                  S9U l        g [$        R&                  " S5        [        U l        g [        U l        g )	N)r      dim   dft_mat_hiddenrB   dft_mat_seq)r&   r'   zpSciPy is needed for DFT matrix calculation and is not found. Using TPU optimized fast fourier transform instead.)use_tpu_fourier_optimizationsr   r"   r4   r6   fourier_transformrK   r   rV   tensorr   dftrH   r#   tpu_short_seq_lengthr.   r   r   r   warning)r]   r^   s     r)   ry   1FNetBasicFourierTransform._init_fourier_transform   s    33%,UYY^^%HD"++t3!##$$$ell6::f>P>P3QY^YhYh&i $$!5<<

6;V;V0W_d_n_n#o *1"43C3CTXTgTg*& * *.&%)D"r+   c                 >    U R                  U5      R                  nU4$ r-   )r   real)r]   hidden_statesoutputss      r)   rk   !FNetBasicFourierTransform.forward   s"     ((7<<zr+   )r   )	rm   rn   ro   rp   rE   ry   rk   rr   rs   rt   s   @r)   rv   rv      s    -*. r+   rv   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )FNetBasicOutput   c                 ~   > [         TU ]  5         [        R                  " UR                  UR
                  S9U l        g Nr<   )rD   rE   r   rO   rH   rP   r\   s     r)   rE   FNetBasicOutput.__init__   s,    f&8&8f>S>STr+   c                 ,    U R                  X!-   5      nU$ r-   rO   r]   r   input_tensors      r)   rk   FNetBasicOutput.forward   s    |'CDr+   r   rm   rn   ro   rp   rE   rk   rr   rs   rt   s   @r)   r   r      s    U r+   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )FNetFourierTransform   c                 b   > [         TU ]  5         [        U5      U l        [	        U5      U l        g r-   )rD   rE   rv   r]   r   outputr\   s     r)   rE   FNetFourierTransform.__init__   s&    -f5	%f-r+   c                 X    U R                  U5      nU R                  US   U5      nU4nU$ Nr   )r]   r   )r]   r   self_outputsfourier_outputr   s        r)   rk   FNetFourierTransform.forward   s1    yy/\!_mD!#r+   )r   r]   r   rt   s   @r)   r   r      s    .
 r+   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )FNetIntermediate   c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g r-   )rD   rE   r   rQ   rH   intermediate_sizedense
isinstance
hidden_actstrr   intermediate_act_fnr\   s     r)   rE   FNetIntermediate.__init__   s`    YYv1163K3KL
f''--'-f.?.?'@D$'-'8'8D$r+   r   returnc                 J    U R                  U5      nU R                  U5      nU$ r-   r   r   r]   r   s     r)   rk   FNetIntermediate.forward   s&    

=100?r+   r   
rm   rn   ro   rp   rE   r"   Tensorrk   rr   rs   rt   s   @r)   r   r      s(    9U\\ ell  r+   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )
FNetOutput   c                 (  > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g r   )rD   rE   r   rQ   r   rH   r   rO   rP   rS   rT   rU   r\   s     r)   rE   FNetOutput.__init__   s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=r+   r   r   r   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r-   )r   rU   rO   r   s      r)   rk   FNetOutput.forward   s5    

=1]3}'CDr+   )rO   r   rU   r   rt   s   @r)   r   r      s6    >U\\  RWR^R^  r+   r   c                   4   ^  \ rS rSrU 4S jrS rS rSrU =r$ )	FNetLayer   c                    > [         TU ]  5         UR                  U l        SU l        [	        U5      U l        [        U5      U l        [        U5      U l	        g Nr   )
rD   rE   chunk_size_feed_forwardseq_len_dimr   fourierr   intermediater   r   r\   s     r)   rE   FNetLayer.__init__   sI    '-'E'E$+F3,V4 (r+   c                     U R                  U5      nUS   n[        U R                  U R                  U R                  U5      nU4nU$ r   )r   r   feed_forward_chunkr   r   )r]   r   self_fourier_outputsr   layer_outputr   s         r)   rk   FNetLayer.forward   sO    #||M:-a00##T%A%A4CSCSUc
  /r+   c                 J    U R                  U5      nU R                  X!5      nU$ r-   )r   r   )r]   r   intermediate_outputr   s       r)   r   FNetLayer.feed_forward_chunk  s(    "//?{{#6Gr+   )r   r   r   r   r   )	rm   rn   ro   rp   rE   rk   r   rr   rs   rt   s   @r)   r   r      s    )
 r+   r   c                   2   ^  \ rS rSrU 4S jrSS jrSrU =r$ )FNetEncoderi	  c                    > [         TU ]  5         Xl        [        R                  " [        UR                  5       Vs/ s H  n[        U5      PM     sn5      U l        SU l	        g s  snf )NF)
rD   rE   r^   r   
ModuleListr2   num_hidden_layersr   layergradient_checkpointing)r]   r^   _r_   s      r)   rE   FNetEncoder.__init__
  sR    ]]uVE]E]?^#_?^!If$5?^#_`
&+# $`s   A&c                 b   U(       a  SOS n[        U R                  5       H]  u  pVU(       a  XA4-   nU R                  (       a.  U R                  (       a  U R	                  UR
                  U5      nOU" U5      nUS   nM_     U(       a  XA4-   nU(       d  [        S X4 5       5      $ [        XS9$ )N r   c              3   .   #    U  H  oc  M  Uv   M     g 7fr-   r   ).0vs     r)   	<genexpr>&FNetEncoder.forward.<locals>.<genexpr>"  s     X$Fq$Fs   	)last_hidden_stater   )	enumerater   r   training_gradient_checkpointing_func__call__tupler   )r]   r   output_hidden_statesreturn_dictall_hidden_statesilayer_modulelayer_outputss           r)   rk   FNetEncoder.forward  s    "6BD(4OA#$58H$H!**t}} $ A A,BWBWYf g ,] ;)!,M  5   14D DX]$FXXX``r+   )r^   r   r   )FTr   rt   s   @r)   r   r   	  s    ,a ar+   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )
FNetPooleri(  c                    > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " 5       U l        g r-   )rD   rE   r   rQ   rH   r   Tanh
activationr\   s     r)   rE   FNetPooler.__init__)  s9    YYv1163E3EF
'')r+   r   r   c                 \    US S 2S4   nU R                  U5      nU R                  U5      nU$ r   )r   r   )r]   r   first_token_tensorpooled_outputs       r)   rk   FNetPooler.forward.  s6     +1a40

#566r+   )r   r   r   rt   s   @r)   r   r   (  s(    $
U\\ ell  r+   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )FNetPredictionHeadTransformi8  c                 p  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        UR                  [        5      (       a  [        UR                     U l
        OUR                  U l
        [        R                  " UR                  UR                  S9U l        g r   )rD   rE   r   rQ   rH   r   r   r   r   r   transform_act_fnrO   rP   r\   s     r)   rE   $FNetPredictionHeadTransform.__init__9  s~    YYv1163E3EF
f''--$*6+<+<$=D!$*$5$5D!f&8&8f>S>STr+   r   r   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r-   )r   r   rO   r   s     r)   rk   #FNetPredictionHeadTransform.forwardB  s4    

=1--m<}5r+   )rO   r   r   r   rt   s   @r)   r   r   8  s)    UU\\ ell  r+   r   c                   8   ^  \ rS rSrU 4S jrS rSS jrSrU =r$ )FNetLMPredictionHeadiI  c                 J  > [         TU ]  5         [        U5      U l        [        R
                  " UR                  UR                  5      U l        [        R                  " [        R                  " UR                  5      5      U l        U R                  U R                  l        g r-   )rD   rE   r   	transformr   rQ   rH   rG   decoder	Parameterr"   rY   biasr\   s     r)   rE   FNetLMPredictionHead.__init__J  si    4V< yy!3!3V5F5FGLLV->->!?@	 IIr+   c                 J    U R                  U5      nU R                  U5      nU$ r-   )r  r  r   s     r)   rk   FNetLMPredictionHead.forwardU  s$    }5]3r+   c                     U R                   R                  R                  R                  S:X  a  U R                  U R                   l        g U R                   R                  U l        g )Nmeta)r  r  rc   r!   r]   s    r)   _tie_weights!FNetLMPredictionHead._tie_weightsZ  sC    <<##((F2 $		DLL ))DIr+   )r  r  r  )r   N)	rm   rn   ro   rp   rE   rk   r  rr   rs   rt   s   @r)   r   r   I  s    	&
* *r+   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )FNetOnlyMLMHeadic  c                 B   > [         TU ]  5         [        U5      U l        g r-   )rD   rE   r   predictionsr\   s     r)   rE   FNetOnlyMLMHead.__init__d  s    /7r+   c                 (    U R                  U5      nU$ r-   r  )r]   sequence_outputprediction_scoress      r)   rk   FNetOnlyMLMHead.forwardh  s     ,,_=  r+   r  r   rt   s   @r)   r  r  c  s    8! !r+   r  c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )FNetOnlyNSPHeadin  c                 n   > [         TU ]  5         [        R                  " UR                  S5      U l        g Nr|   )rD   rE   r   rQ   rH   seq_relationshipr\   s     r)   rE   FNetOnlyNSPHead.__init__o  s'     "		&*<*<a @r+   c                 (    U R                  U5      nU$ r-   r  )r]   r   seq_relationship_scores      r)   rk   FNetOnlyNSPHead.forwards  s    !%!6!6}!E%%r+   r  r   rt   s   @r)   r  r  n  s    A& &r+   r  c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )FNetPreTrainingHeadsiy  c                    > [         TU ]  5         [        U5      U l        [        R
                  " UR                  S5      U l        g r  )rD   rE   r   r  r   rQ   rH   r  r\   s     r)   rE   FNetPreTrainingHeads.__init__z  s4    /7 "		&*<*<a @r+   c                 L    U R                  U5      nU R                  U5      nX44$ r-   r  r  )r]   r  r   r  r  s        r)   rk   FNetPreTrainingHeads.forward  s-     ,,_=!%!6!6}!E 88r+   r&  r   rt   s   @r)   r"  r"  y  s    A
9 9r+   r"  c                   &    \ rS rSr\rSrSrS rSr	g)FNetPreTrainedModeli  fnetTc                    [        U[        R                  5      (       ak  UR                  R                  R                  SU R                  R                  S9  UR                  b%  UR                  R                  R                  5         gg[        U[        R                  5      (       ax  UR                  R                  R                  SU R                  R                  S9  UR                  b2  UR                  R                  UR                     R                  5         gg[        U[        R                  5      (       aJ  UR                  R                  R                  5         UR                  R                  R                  S5        gg)zInitialize the weightsg        )meanstdNg      ?)r   r   rQ   weightdatanormal_r^   initializer_ranger  zero_rF   r;   rO   fill_)r]   modules     r)   _init_weights!FNetPreTrainedModel._init_weights  s   fbii(( MM&&CT[[5R5R&S{{&  &&( '--MM&&CT[[5R5R&S!!-""6#5#56<<> .--KK""$MM$$S) .r+   r   N)
rm   rn   ro   rp   r   config_classbase_model_prefixsupports_gradient_checkpointingr5  rr   r   r+   r)   r)  r)    s    L&*#*r+   r)  c                       \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\R                     \	S'   Sr\\\R                        \	S'   Srg)	FNetForPreTrainingOutputi  am  
Output type of [`FNetForPreTraining`].

Args:
    loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
        Total loss as the sum of the masked language modeling loss and the next sequence prediction
        (classification) loss.
    prediction_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
        Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
    seq_relationship_logits (`torch.FloatTensor` of shape `(batch_size, 2)`):
        Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation
        before SoftMax).
    hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
        shape `(batch_size, sequence_length, hidden_size)`. Hidden-states of the model at the output of each layer
        plus the initial embedding outputs.
Nlossprediction_logitsseq_relationship_logitsr   r   )rm   rn   ro   rp   rq   r<  r   r"   FloatTensor__annotations__r=  r>  r   r   rr   r   r+   r)   r;  r;    sd    $ )-D(5$$
%,59x 1 129;?Xe&7&78?8<M8E%"3"345<r+   r;  c                      ^  \ rS rSrSrSU 4S jjrS rS r\      SS\	\
R                     S\	\
R                     S\	\
R                     S	\	\
R                     S
\	\   S\	\   S\\\4   4S jj5       rSrU =r$ )	FNetModeli  z

The model can behave as an encoder, following the architecture described in [FNet: Mixing Tokens with Fourier
Transforms](https://arxiv.org/abs/2105.03824) by James Lee-Thorp, Joshua Ainslie, Ilya Eckstein, Santiago Ontanon.

c                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        U(       a  [        U5      OSU l        U R                  5         g)z^
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
N)
rD   rE   r^   r8   rj   r   encoderr   pooler	post_init)r]   r^   add_pooling_layerr_   s      r)   rE   FNetModel.__init__  sK    
 	 (0"6*,=j(4 	r+   c                 .    U R                   R                  $ r-   rj   rJ   r
  s    r)   get_input_embeddingsFNetModel.get_input_embeddings  s    ...r+   c                 $    XR                   l        g r-   rJ  )r]   values     r)   set_input_embeddingsFNetModel.set_input_embeddings  s    */'r+   re   rA   r>   rf   r   r   r   c                    Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  Ub  [        S5      eUb  UR	                  5       nUu  pO&Ub  UR	                  5       S S nUu  pO[        S5      eU R                   R
                  (       a+  U	S::  a%  U R                   R                  U	:w  a  [        S5      eUb  UR                  OUR                  n
Ucr  [        U R                  S5      (       a3  U R                  R                  S S 2S U	24   nUR                  X5      nUnO$[        R                  " U[        R                  U
S9nU R                  UUUUS9nU R                  UUUS	9nUS
   nU R                   b  U R!                  U5      OS nU(       d
  UU4USS  -   $ [#        UUUR$                  S9$ )NzDYou cannot specify both input_ids and inputs_embeds at the same timer?   z5You have to specify either input_ids or inputs_embedsr   zThe `tpu_short_seq_length` in FNetConfig should be set equal to the sequence length being passed to the model when using TPU optimizations.rA   rb   )re   r>   rA   rf   )r   r   r   r   )r   pooler_outputr   )r^   r   use_return_dict
ValueErrorrZ   r   r   rc   rd   rj   rA   rX   r"   rY   r[   rD  rE  r   r   )r]   re   rA   r>   rf   r   r   rg   
batch_sizer(   rc   rh   ri   embedding_outputencoder_outputsr  rR  s                    r)   rk   FNetModel.forward  s    %9$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"#..*K%0"J
&',,.s3K%0"J
TUU KK55d"00J>; 
 &/%:!!@T@T!t(899*.//*H*HKZK*X'3J3Q3QR\3i0!A!&[

SY!Z??%)'	 + 
 ,,!5# ' 

 *!,8<8OO4UY#]3oab6III)-')77
 	
r+   )r^   rj   rD  rE  )T)NNNNNN)rm   rn   ro   rp   rq   rE   rK  rO  r   r   r"   
LongTensorr?  boolr   r   r   rk   rr   rs   rt   s   @r)   rB  rB    s     /0  15593759/3&*C
E,,-C
 !!1!12C
 u//0	C

   1 12C
 'tnC
 d^C
 
uo%	&C
 C
r+   rB  z
    FNet Model with two heads on top as done during the pretraining: a `masked language modeling` head and a `next
    sentence prediction (classification)` head.
    )custom_introc                   :  ^  \ rS rSrSS/rU 4S jrS rS r\        SS\	\
R                     S\	\
R                     S	\	\
R                     S
\	\
R                     S\	\
R                     S\	\
R                     S\	\   S\	\   S\\\4   4S jj5       rSrU =r$ )FNetForPreTrainingi  cls.predictions.decoder.biascls.predictions.decoder.weightc                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        U R                  5         g r-   )rD   rE   rB  r*  r"  clsrF  r\   s     r)   rE   FNetForPreTraining.__init__&  s4     f%	'/ 	r+   c                 B    U R                   R                  R                  $ r-   ra  r  r  r
  s    r)   get_output_embeddings(FNetForPreTraining.get_output_embeddings/      xx##+++r+   c                     XR                   R                  l        UR                  U R                   R                  l        g r-   ra  r  r  r  r]   new_embeddingss     r)   set_output_embeddings(FNetForPreTraining.set_output_embeddings2  *    '5$$2$7$7!r+   re   rA   r>   rf   labelsnext_sentence_labelr   r   r   c	           	         Ub  UOU R                   R                  nU R                  UUUUUUS9n	U	SS u  pU R                  X5      u  pSnUbv  Ubs  [	        5       nU" UR                  SU R                   R                  5      UR                  S5      5      nU" UR                  SS5      UR                  S5      5      nUU-   nU(       d  X4U	SS -   nUb  U4U-   $ U$ [        UUUU	R                  S9$ )a  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
next_sentence_label (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
    (see `input_ids` docstring) Indices should be in `[0, 1]`:

    - 0 indicates sequence B is a continuation of sequence A,
    - 1 indicates sequence B is a random sequence.

Example:

```python
>>> from transformers import AutoTokenizer, FNetForPreTraining
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("google/fnet-base")
>>> model = FNetForPreTraining.from_pretrained("google/fnet-base")
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)
>>> prediction_logits = outputs.prediction_logits
>>> seq_relationship_logits = outputs.seq_relationship_logits
```NrA   r>   rf   r   r   r|   r?   )r<  r=  r>  r   )	r^   rS  r*  ra  r
   viewrG   r;  r   )r]   re   rA   r>   rf   ro  rp  r   r   r   r  r   r  r  
total_lossloss_fctmasked_lm_lossnext_sentence_lossr   s                      r)   rk   FNetForPreTraining.forward6  s+   J &1%<k$++B]B])))%'!5#  
 *1!&48HH_4\1
"5"A')H%&7&<&<RAWAW&XZ`ZeZefhZijN!)*@*E*Eb!*LNaNfNfgiNj!k'*<<J'@712;NF/9/EZMF*Q6Q'/$:!//	
 	
r+   ra  r*  NNNNNNNN)rm   rn   ro   rp   _tied_weights_keysrE   re  rl  r   r   r"   r   rZ  r   r   r;  rk   rr   rs   rt   s   @r)   r]  r]    s     9:Z[,8  -115/304)-6:/3&*B
ELL)B
 !.B
 u||,	B

  -B
 &B
 &ell3B
 'tnB
 d^B
 
u..	/B
 B
r+   r]  c                     ^  \ rS rSrSS/rU 4S jrS rS r\       SS\	\
R                     S\	\
R                     S	\	\
R                     S
\	\
R                     S\	\
R                     S\	\   S\	\   S\\\4   4S jj5       rSrU =r$ )FNetForMaskedLMi|  r^  r_  c                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        U R                  5         g r-   )rD   rE   rB  r*  r  ra  rF  r\   s     r)   rE   FNetForMaskedLM.__init__  4     f%	"6* 	r+   c                 B    U R                   R                  R                  $ r-   rd  r
  s    r)   re  %FNetForMaskedLM.get_output_embeddings  rg  r+   c                     XR                   R                  l        UR                  U R                   R                  l        g r-   ri  rj  s     r)   rl  %FNetForMaskedLM.set_output_embeddings  rn  r+   re   rA   r>   rf   ro  r   r   r   c           	         Ub  UOU R                   R                  nU R                  UUUUUUS9nUS   n	U R                  U	5      n
SnUbF  [	        5       nU" U
R                  SU R                   R                  5      UR                  S5      5      nU(       d  U
4USS -   nUb  U4U-   $ U$ [        XUR                  S9$ )a{  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
Nrr  r   r?   r|   r<  logitsr   )	r^   rS  r*  ra  r
   rs  rG   r   r   )r]   re   rA   r>   rf   ro  r   r   r   r  r  rv  ru  r   s                 r)   rk   FNetForMaskedLM.forward  s    " &1%<k$++B]B])))%'!5#  
 "!* HH_5')H%&7&<&<RAWAW&XZ`ZeZefhZijN')GABK7F3A3M^%.YSYY>[b[p[pqqr+   ry  NNNNNNN)rm   rn   ro   rp   r{  rE   re  rl  r   r   r"   r   rZ  r   r   r   rk   rr   rs   rt   s   @r)   r}  r}  |  s    8:Z[,8  -115/304)-/3&*'rELL)'r !.'r u||,	'r
  -'r &'r 'tn'r d^'r 
un$	%'r 'rr+   r}  zT
    FNet Model with a `next sentence prediction (classification)` head on top.
    c                     ^  \ rS rSrU 4S jr\       SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\	   S	\\	   S
\
\\4   4S jj5       rSrU =r$ )FNetForNextSentencePredictioni  c                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        U R                  5         g r-   )rD   rE   rB  r*  r  ra  rF  r\   s     r)   rE   &FNetForNextSentencePrediction.__init__  r  r+   re   rA   r>   rf   ro  r   r   r   c           	         SU;   a,  [         R                  " S[        5        UR                  S5      nUb  UOU R                  R
                  nU R                  UUUUUUS9n	U	S   n
U R                  U
5      nSnUb2  [        5       nU" UR                  SS5      UR                  S5      5      nU(       d  U4U	SS -   nUb  U4U-   $ U$ [        UUU	R                  S9$ )	a  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
    (see `input_ids` docstring). Indices should be in `[0, 1]`:

    - 0 indicates sequence B is a continuation of sequence A,
    - 1 indicates sequence B is a random sequence.

Example:

```python
>>> from transformers import AutoTokenizer, FNetForNextSentencePrediction
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("google/fnet-base")
>>> model = FNetForNextSentencePrediction.from_pretrained("google/fnet-base")
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
>>> encoding = tokenizer(prompt, next_sentence, return_tensors="pt")
>>> outputs = model(**encoding, labels=torch.LongTensor([1]))
>>> logits = outputs.logits
>>> assert logits[0, 0] < logits[0, 1]  # next sentence was random
```rp  zoThe `next_sentence_label` argument is deprecated and will be removed in a future version, use `labels` instead.Nrr  r   r?   r|   r  )warningswarnFutureWarningpopr^   rS  r*  ra  r
   rs  r   r   )r]   re   rA   r>   rf   ro  r   r   kwargsr   r   seq_relationship_scoresrw  ru  r   s                  r)   rk   %FNetForNextSentencePrediction.forward  s   H !F*MM%
 ZZ 56F%0%<k$++B]B])))%'!5#  
  
"&((="9!')H!)*A*F*Fr1*Mv{{[]!_-/'!"+=F7I7U')F2a[aa*#*!//
 	
r+   ry  r  )rm   rn   ro   rp   rE   r   r   r"   r   rZ  r   r   r   rk   rr   rs   rt   s   @r)   r  r    s      -115/304)-/3&*G
ELL)G
 !.G
 u||,	G

  -G
 &G
 'tnG
 d^G
 
u11	2G
 G
r+   r  z
    FNet Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
    output) e.g. for GLUE tasks.
    c                     ^  \ rS rSrU 4S jr\       SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\	   S	\\	   S
\
\\4   4S jj5       rSrU =r$ )FNetForSequenceClassificationi  c                 0  > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r-   rD   rE   
num_labelsrB  r*  r   rS   rT   rU   rQ   rH   
classifierrF  r\   s     r)   rE   &FNetForSequenceClassification.__init__  si      ++f%	zz&"<"<=))F$6$68I8IJ 	r+   re   rA   r>   rf   ro  r   r   r   c           	      4   Ub  UOU R                   R                  nU R                  UUUUUUS9nUS   n	U R                  U	5      n	U R	                  U	5      n
SnUGb  U R                   R
                  c  U R                  S:X  a  SU R                   l        OoU R                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                   l        OSU R                   l        U R                   R
                  S:X  aI  [        5       nU R                  S:X  a&  U" U
R                  5       UR                  5       5      nOU" X5      nOU R                   R
                  S:X  a=  [        5       nU" U
R                  SU R                  5      UR                  S5      5      nO,U R                   R
                  S:X  a  [        5       nU" X5      nU(       d  U
4USS -   nUb  U4U-   $ U$ [!        XUR"                  S	9$ )
ae  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Nrr  r   
regressionsingle_label_classificationmulti_label_classificationr?   r|   r  )r^   rS  r*  rU   r  problem_typer  rC   r"   r[   intr   squeezer
   rs  r	   r   r   )r]   re   rA   r>   rf   ro  r   r   r   r   r  r<  ru  r   s                 r)   rk   %FNetForSequenceClassification.forward'  s   " &1%<k$++B]B])))%'!5#  
  
]3/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'TPWPePeffr+   r  rU   r*  r  r  )rm   rn   ro   rp   rE   r   r   r"   r   rZ  r   r   r   rk   rr   rs   rt   s   @r)   r  r    s    	  -115/304)-/3&*9gELL)9g !.9g u||,	9g
  -9g &9g 'tn9g d^9g 
u..	/9g 9gr+   r  c                     ^  \ rS rSrU 4S jr\       SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\	   S	\\	   S
\
\\4   4S jj5       rSrU =r$ )FNetForMultipleChoiceid  c                    > [         TU ]  U5        [        U5      U l        [        R
                  " UR                  5      U l        [        R                  " UR                  S5      U l
        U R                  5         g r   )rD   rE   rB  r*  r   rS   rT   rU   rQ   rH   r  rF  r\   s     r)   rE   FNetForMultipleChoice.__init__f  sV     f%	zz&"<"<=))F$6$6: 	r+   re   rA   r>   rf   ro  r   r   r   c           	         Ub  UOU R                   R                  nUb  UR                  S   OUR                  S   nUb!  UR                  SUR	                  S5      5      OSnUb!  UR                  SUR	                  S5      5      OSnUb!  UR                  SUR	                  S5      5      OSnUb1  UR                  SUR	                  S5      UR	                  S5      5      OSnU R                  UUUUUUS9n	U	S   n
U R                  U
5      n
U R                  U
5      nUR                  SU5      nSnUb  [        5       nU" X5      nU(       d  U4U	SS -   nUb  U4U-   $ U$ [        XU	R                  S9$ )a  
input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
    Indices of input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are input IDs?](../glossary#input-ids)
token_type_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
    Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
    1]`:

    - 0 corresponds to a *sentence A* token,
    - 1 corresponds to a *sentence B* token.

    [What are token type IDs?](../glossary#token-type-ids)
position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
    config.max_position_embeddings - 1]`.

    [What are position IDs?](../glossary#position-ids)
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
    Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
    is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
    model's internal embedding lookup matrix.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
    num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
    `input_ids` above)
Nr   r?   rr  r|   r  )r^   rS  r    rs  rZ   r*  rU   r  r
   r   r   )r]   re   rA   r>   rf   ro  r   r   num_choicesr   r   r  reshaped_logitsr<  ru  r   s                   r)   rk   FNetForMultipleChoice.forwardp  s   R &1%<k$++B]B],5,Aiooa(}GZGZ[\G]>G>SINN2y~~b'9:Y]	M[Mg,,R1D1DR1HImqGSG_|((\->->r-BCei ( r=#5#5b#9=;M;Mb;QR 	 )))%'!5#  
  
]3/ ++b+6')HO4D%''!"+5F)-)9TGf$EvE(dZaZoZoppr+   )r  rU   r*  r  )rm   rn   ro   rp   rE   r   r   r"   r   rZ  r   r   r   rk   rr   rs   rt   s   @r)   r  r  d  s      -115/304)-/3&*LqELL)Lq !.Lq u||,	Lq
  -Lq &Lq 'tnLq d^Lq 
u//	0Lq Lqr+   r  c                     ^  \ rS rSrU 4S jr\       SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\	   S	\\	   S
\
\\4   4S jj5       rSrU =r$ )FNetForTokenClassificationi  c                 0  > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r-   r  r\   s     r)   rE   #FNetForTokenClassification.__init__  si      ++f%	zz&"<"<=))F$6$68I8IJ 	r+   re   rA   r>   rf   ro  r   r   r   c           	         Ub  UOU R                   R                  nU R                  UUUUUUS9nUS   n	U R                  U	5      n	U R	                  U	5      n
SnUb<  [        5       nU" U
R                  SU R                  5      UR                  S5      5      nU(       d  U
4USS -   nUb  U4U-   $ U$ [        XUR                  S9$ )z
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
Nrr  r   r?   r|   r  )
r^   rS  r*  rU   r  r
   rs  r  r   r   )r]   re   rA   r>   rf   ro  r   r   r   r  r  r<  ru  r   s                 r)   rk   "FNetForTokenClassification.forward  s     &1%<k$++B]B])))%'!5#  
 "!*,,71')HFKKDOO<fkk"oNDY,F)-)9TGf$EvE$$WMbMbccr+   r  r  )rm   rn   ro   rp   rE   r   r   r"   r   rZ  r   r   r   rk   rr   rs   rt   s   @r)   r  r    s    
  -115/304)-/3&*(dELL)(d !.(d u||,	(d
  -(d &(d 'tn(d d^(d 
u++	,(d (dr+   r  c                   &  ^  \ rS rSrU 4S jr\        SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\	   S
\\	   S\
\\4   4S jj5       rSrU =r$ )FNetForQuestionAnsweringi  c                    > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r-   )
rD   rE   r  rB  r*  r   rQ   rH   
qa_outputsrF  r\   s     r)   rE   !FNetForQuestionAnswering.__init__  sS      ++f%	))F$6$68I8IJ 	r+   re   rA   r>   rf   start_positionsend_positionsr   r   r   c	           	         Ub  UOU R                   R                  nU R                  UUUUUUS9n	U	S   n
U R                  U
5      nUR	                  SSS9u  pUR                  S5      R                  5       nUR                  S5      R                  5       nS nUb  Ub  [        UR                  5       5      S:  a  UR                  S5      n[        UR                  5       5      S:  a  UR                  S5      nUR                  S5      nUR                  SU5      nUR                  SU5      n[        US9nU" X5      nU" X5      nUU-   S-  nU(       d  X4U	SS  -   nUb  U4U-   $ U$ [        XXR                  S9$ )	Nrr  r   r   r?   r}   )ignore_indexr|   )r<  start_logits
end_logitsr   )r^   rS  r*  r  splitr  
contiguouslenrZ   clampr
   r   r   )r]   re   rA   r>   rf   r  r  r   r   r   r  r  r  r  rt  ignored_indexru  
start_lossend_lossr   s                       r)   rk    FNetForQuestionAnswering.forward  s    &1%<k$++B]B])))%'!5#  
 "!*1#)<<r<#: #++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
:H$x/14J"/'!"+=F/9/EZMF*Q6Q+:]r]r
 	
r+   )r*  r  r  rz  )rm   rn   ro   rp   rE   r   r   r"   r   rZ  r   r   r   rk   rr   rs   rt   s   @r)   r  r    s    	  -115/3042604/3&*4
ELL)4
 !.4
 u||,	4

  -4
 "%,,/4
  -4
 'tn4
 d^4
 
u22	34
 4
r+   r  )
r}  r  r  r]  r  r  r  r   rB  r)  )Irq   r  dataclassesr   	functoolsr   typingr   r   r   r"   torch.utils.checkpointr   torch.nnr	   r
   r   utilsr   r   scipyr   activationsr   modeling_outputsr   r   r   r   r   r   r   r   r   modeling_utilsr   pytorch_utilsr   r   configuration_fnetr   
get_loggerrm   loggerr*   r.   r6   Moduler8   rv   r   r   r   r   r   r   r   r   r   r  r  r"  r)  r;  rB  r]  r}  r  r  r  r  r  __all__r   r+   r)   <module>r     s     !  ) )    A A 7  !
 
 
 . 6  * 
		H	%M>
 :RYY :z#		 #Lbii 
299 
ryy   		 6a")) a>  ")) "*299 *4!bii !&bii &	9299 	9 */ * *. ={ = =2 b
# b
 b
J V
, V
V
r ;r) ;r ;r| 
R
$7 R

R
j Fg$7 FgFgR Xq/ Xq Xqv 6d!4 6d 6dr A
2 A
 A
Hr+   