
    fThz                     p   S r SSKrSSKJr  SSKJrJrJrJr  SSK	r	SSK
Jr  SSKJs  Jr  SSKJr  SSKJr  SSKJrJr  SS	KJr  SS
KJrJrJr  SSKJr  SSKJr  SSK J!r!  \RD                  " \#5      r$\ " S S\5      5       r%\ " S S\5      5       r& " S S\RN                  5      r( " S S\RN                  5      r) " S S\5      r* " S S\RN                  5      r+ " S S\RN                  5      r, " S S\RN                  5      r-\ " S  S!\5      5       r.\ " S" S#\.5      5       r/ " S$ S%\.5      r0/ S&Qr1g)'zPyTorch TimesFM model.    N)	dataclass)CallableOptionalSequenceUnion   )FlashAttentionKwargs)BaseModelOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)auto_docstringcan_return_tuplelogging   )LlamaRMSNorm)simple_eager_attention_forward   )TimesFmConfigc                   j    \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Srg)TimesFmOutput&   z
Args:
    loc (`torch.Tensor` of shape `(batch_size, )`):
        The mean of the time series inputs.
    scale (`torch.Tensor` of shape `(batch_size,)`):
        The scale of the time series inputs.
Nlocscale )__name__
__module____qualname____firstlineno____doc__r   r   torchTensor__annotations__r   __static_attributes__r       c/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/timesfm/modular_timesfm.pyr   r   &   s/     #'C%,,	&$(E8ELL!(r%   r   c                       \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\\R                  \4      \	S'   Srg)TimesFmOutputForPrediction4   a  
Args:
    mean_predictions (`torch.Tensor` of shape `(batch_size, sequence_length)`):
        The mean predictions of the time series.
    full_predictions (`torch.Tensor` of shape `(batch_size, sequence_length)`):
        The full predictions of the time series including the mean and the quantiles.
    loss (`torch.Tensor` of shape `(1,)`, *optional*, returned when `future_values` is provided):
        The loss of the TimesFM model.
Nmean_predictionsfull_predictionslossr   )r   r   r   r   r    r*   r   r!   r"   r#   r+   r,   r   floatr$   r   r%   r&   r(   r(   4   sN     04hu||,3/3hu||,315D(5u,-
.5r%   r(   c                   >   ^  \ rS rSrSrS\4U 4S jjrSS jrSrU =r	$ )
TimesFmMLPE   zPax MLP in pytorch.configc                    > [         TU ]  5         UR                  nUR                  n[        R
                  " X#5      U l        [        R
                  " X25      U l        [        R                  " USS9U l	        g )Ngư>)normalized_shapeeps)
super__init__hidden_sizeintermediate_sizennLinear	gate_proj	down_proj	LayerNorm
layer_norm)selfr1   r7   r8   	__class__s       r&   r6   TimesFmMLP.__init__H   sX    (("44;B#4B,,Nr%   c                     U R                  U5      nU R                  U5      n[        R                  " U5      nU R	                  U5      nUb  USUS S 2S S 2S 4   -
  -  nXQ-   $ )N      ?)r>   r;   Frelur<   )r?   xpaddingsgate_inpgateoutputss         r&   forwardTimesFmMLP.forwardQ   sa    ??1%~~h'vvd|..&x1d
';!;<G{r%   )r<   r;   r>   N
r   r   r   r   r    r   r6   rK   r$   __classcell__r@   s   @r&   r/   r/   E   s    O} O r%   r/   c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )TimesFmResidualBlock[   zTimesFM residual block.c                   > [         TU ]  5         Xl        X l        X0l        [
        R                  " X5      U l        [
        R                  " 5       U l	        [
        R                  " X#5      U l
        [
        R                  " X5      U l        g rM   )r5   r6   
input_dimshidden_dimsoutput_dimsr9   r:   input_layerSiLU
activationoutput_layerresidual_layer)r?   rU   rV   rW   r@   s       r&   r6   TimesFmResidualBlock.__init__^   s_    $&&99Z='')IIk? ii
@r%   c                     U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nX4-   $ rM   )rX   rZ   r[   r\   )r?   rF   hiddenoutputresiduals        r&   rK   TimesFmResidualBlock.forwardi   sI    !!!$(""6*&&q)  r%   )rZ   rV   rU   rX   rW   r[   r\   )	r   r   r   r   r    r6   rK   r$   rO   rP   s   @r&   rR   rR   [   s    !	A! !r%   rR   c                       \ rS rSrSrg)TimesFmRMSNormq   r   N)r   r   r   r   r$   r   r%   r&   rd   rd   q   s    r%   rd   c                   >   ^  \ rS rSrSrS\4U 4S jjrSS jrSrU =r	$ )TimesFmPositionalEmbeddingu   z6Generates position embedding for a given 1-d sequence.r1   c                   > [         TU ]  5         UR                  nUR                  nUR                  U l        U R
                  S-  n[        R                  " [        U5      [        U5      -  5      [        US-
  S5      -  nU R                  SU[        R                  " [        R                  " U[        R                  S9U* -  5      -  5        g )Nr   r   inv_timescalesdtype)r5   r6   min_timescalemax_timescaler7   embedding_dimsmathlogr-   maxregister_bufferr!   exparangefloat32)r?   r1   rm   rn   num_timescaleslog_timescale_incrementr@   s         r&   r6   #TimesFmPositionalEmbedding.__init__x   s    ,,,,$00,,1"&((5+?%BV+V"WZ]^lop^prsZt"tEIIell>&W[rZr&rss	
r%   c                 T   Uc  Uc  [        S5      eUcH  [        R                  " U[        R                  U R                  R
                  S9R                  S5      nO(UR                  S:w  a  [        SUR                   35      eUR                  " / UR                  QSP76 U R                  R                  SSS5      -  n[        R                  " [        R                  " U5      [        R                  " U5      /SS9n[        R                  " USSSU R                  S-  45      nU$ )	a  Generates a Tensor of sinusoids with different frequencies.

Args:
    seq_length: an optional Python int defining the output sequence length.
      if the `position` argument is specified.
    position: [B, seq_length], optional position for each token in the
      sequence, only required when the sequence is packed.

Returns:
    [B, seqlen, D] if `position` is specified, else [1, seqlen, D]
z.Either position or seq_length must be providedrl   devicer   r   z*position must be 2-dimensional, got shape r   dim)
ValueErrorr!   ru   rv   rj   r|   	unsqueezendimshapeviewcatsincosrD   padro   )r?   
seq_lengthpositionscaled_timesignals        r&   rK   "TimesFmPositionalEmbedding.forward   s     
 2MNN||JemmDL_L_LfLfgqqrstH]]aI(..IYZ[[mm7X^^7Q7$:M:M:R:RSTVWY[:\\EIIk2EIIk4JKQRS v1a)<)<q)@ABr%   )ro   )NNrN   rP   s   @r&   rg   rg   u   s    @
} 
 r%   rg   c                      ^  \ rS rSrSrS\S\4U 4S jjrS\R                  S\R                  4S jr
 SS	\R                  S
\\R                     S\\   S\\R                  \\R                     4   4S jjrSrU =r$ )TimesFmAttention   zlImplements the attention used in TimesFM. One key difference is that there is _per_dim_scaling of the query.r1   	layer_idxc                   > [         TU ]  5         Xl        SU l        UR                  U l        X l        UR                  U l        UR                  U l        UR                  U l	        U R                  U R                  -  U l
        U R                  U R                  -  U l        [        R                  " [        R                  " U R                  45      5      U l        [        R"                  " U R                  U R                  U R                  -  5      U l        [        R"                  " U R                  U R                  U R                  -  5      U l        [        R"                  " U R                  U R                  U R                  -  5      U l        [        R"                  " U R                  U R                  -  U R                  5      U l        g )NT)r5   r6   r1   	is_causalattention_dropoutr   num_attention_heads	num_headsr7   head_dimq_sizekv_sizer9   	Parameterr!   emptyscalingr:   q_projk_projv_projo_projr?   r1   r   r@   s      r&   r6   TimesFmAttention.__init__   s/   !'!9!9"33!--nnt}}4~~5||EKK0@$ABii 0 0$..4==2PQii 0 0$..4==2PQii 0 0$..4==2PQii >@P@PQr%   queryreturnc                     [         R                  " U R                  5      R                  S[        R
                  " U R                  5      -  5      nXS S S S S 24   -  $ )Ng^$3eG?)rD   softplusr   mulrp   sqrtr   )r?   r   r   s      r&   _scale_queryTimesFmAttention._scale_query   sH    

4<<(,,[499T]];S-STT4q0111r%   hidden_statesattention_maskkwargsc                 Z   UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      n[        n	U R                  R                  S:w  ad  U R                  R                  S:X  a-  UR                  SS5      (       a  [        R                  S5        O[        U R                  R                     n	U	" U UUUU4U R                  (       d  S	OU R                   S
S.UD6u  pU
R"                  " / UQSP76 R%                  5       n
U R'                  U
5      n
X4$ )Nr}   r   r   eagersdpaoutput_attentionsFz`torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to eager attention. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.        rC   )dropoutr   )r   r   r   r   	transposer   r   r   r   r1   _attn_implementationgetloggerwarning_oncer   trainingr   reshape
contiguousr   )r?   r   r   r   input_shapehidden_shapequery_states
key_statesvalue_statesattention_interfaceattn_outputattn_weightss               r&   rK   TimesFmAttention.forward   s    $))#2.88b8$--8{{=166|DNNqRST((6[[/44\BLLQPQR
{{=166|DNNqRST(F;;++w6{{//69fjjI\^c>d>d##L
 '>dkk>^>^&_#$7	%
  $}}C$2H2H	%
 	%
! "));;;;FFHkk+.((r%   )r   r1   r   r7   r   r   r   r   r   r   r   r   r   r   rM   )r   r   r   r   r    r   intr6   r!   r"   r   r   r   r	   tuplerK   r$   rO   rP   s   @r&   r   r      s    vR} R R(2%,, 25<< 2 26$)||$) !.$) -.	$)
 
u||Xell33	4$) $)r%   r   c                      ^  \ rS rSrSrS\S\4U 4S jjr SS\R                  S\R                  S\R                  S	\
S
\\\R                     \R                  4   4
S jjrSrU =r$ )TimesFmDecoderLayer   zTransformer layer.r1   r   c                    > [         TU ]  5         [        XS9U l        [	        U5      U l        [        UR                  UR                  S9U l	        g )N)r   )r4   )
r5   r6   r   	self_attnr/   mlprd   r7   rms_norm_epsinput_layernormr   s      r&   r6   TimesFmDecoderLayer.__init__   sA    )&Ff%-f.@.@fFYFYZr%   r   r   rG   r   r   c                 z    UnU R                  U5      nU R                  UUUS9u  pXQ-   nU R                  XS9nXa4$ )N)r   r   r   )rG   )r   r   r   )r?   r   r   rG   r   ra   scoress          r&   rK   TimesFmDecoderLayer.forward   s]     !,,]; $')/ !/ !

 !0 B$$r%   )r   r   r   )F)r   r   r   r   r    r   r   r6   r!   r"   boolr   r   rK   r$   rO   rP   s   @r&   r   r      s~    [} [ [ #(%||% % ,,	%
  % 
x%u||3	4% %r%   r   c                   0    \ rS rSr\rSrS/rSrSr	S r
Srg)	TimesFmPreTrainedModeli  timesfmr   past_valuesTc                    [        U[        R                  5      (       a9  UR                  R                  R                  SU R                  R                  S9  g [        U[        R                  5      (       ap  UR                  R                  R                  SU R                  R                  S9  UR                  b*  [        R                  R                  UR                  5        g g [        U[        R                  5      (       aS  [        R                  R                  UR                  5        [        R                  R                  UR                  5        g [        U[        5      (       a*  [        R                  R                  UR                  5        g [        U[        5      (       a*  [        R                  R                  UR                   5        g g )Nr   )meanstd)
isinstancer9   	Embeddingweightdatanormal_r1   initializer_ranger:   biasinitzeros_r=   ones_rd   r   r   )r?   modules     r&   _init_weights$TimesFmPreTrainedModel._init_weights  s   fbll++MM&&A4;;3P3P&Q		**MM&&A4;;3P3P&Q{{&v{{+ ' --GGMM&--(GGNN6;;'//GGNN6==) 011GGMM&..) 2r%   r   N)r   r   r   r   r   config_classbase_model_prefix_no_split_modulesmain_input_name_supports_sdpar   r$   r   r%   r&   r   r     s%     L!./#ON*r%   r   c                     ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  S\\R                  \\R                  \R                  4   4   4S jr	\
\  SS\R                  S	\R                  S
\R                  S\S\S\4S jj5       5       r\ SS\\R                     S\S\R&                  S\R(                  S\S\\R                     4S jj5       r\S\R                  S\R                  S\\R                  \R                  4   4S j5       r\S\R                  S\R                  S\R                  4S j5       rSrU =r$ )TimesFmModeli"  r1   c           	        > [         TU ]  U5        Xl        [        SUR                  -  UR
                  UR                  S9U l        [        R                  " UR                  UR
                  S9U l        [        R                  " [        UR                  5       Vs/ s H  n[        X5      PM     sn5      U l        U R                  R"                  (       a  [%        US9U l        U R)                  5         g s  snf )Nr   rU   rW   rV   )num_embeddingsembedding_dim)r1   )r5   r6   r1   rR   patch_lengthr7   r8   input_ff_layerr9   r   	freq_sizefreq_emb
ModuleListrangenum_hidden_layersr   layersuse_positional_embeddingrg   position_emb	post_initr   s      r&   r6   TimesFmModel.__init__$  s     26...**00

 F4D4DTZTfTfgmmEJ6KcKcEdeEd	 3Ede
 ;;// :& ID 	 fs   C8inputspatched_padsr   c           	      6   U R                  X5      u  p4[        R                  " X@R                  R                  :  [        R
                  " SUR                  UR                  S9U5      nXSS2SS4   -
  USS2SS4   -  n[        R                  " [        R                  " XR                  R                  -
  5      U R                  R                  :  [        R
                  " U R                  R                  UR                  UR                  S9U5      nXSU44$ )zInput is of shape [B, N, P].rC   r{   N)
_timesfm_masked_mean_stdr!   wherer1   	tolerancetensorrl   r|   abspad_val)r?   r   r   musigmarJ   s         r&   _forward_transformTimesFmModel._forward_transform7  s     11&G	KK)))LLEKKE
 q$}--q$}1EE++IIf{{2223dkk6K6KKLL,,GMM'..Y

 U##r%   r   past_values_paddingfreqr   output_hidden_statesc           	         UR                   S   nUR                  USU R                  R                  5      nUR                  USU R                  R                  5      n[        R
                  " [        R                  " US-
  5      U R                  R                  :  [        R                  " SUR                  UR                  S9U5      n[        R
                  " [        R                  " XpR                  R                  -
  5      U R                  R                  :  [        R                  " SUR                  UR                  S9U5      nU R                  Xx5      u  pyUSU-
  -  n[        R                  " Xx/SS9n
U R                  U
5      n[        R                  " USS9S   nU R                  R                   (       aY  U R#                  UR                   S   5      n[        R$                  " U/UR                   S   -  SS9nU R'                  X5      nX-  nU R)                  U5      nX-  nUnU R+                  UUR                   S   UR                  UR                  SS	9n/ n/ nU R,                  S
U R                  R.                    HA  nU" UUUUS9u  nnU(       a  UR1                  U5        U(       d  M0  UR1                  U5        MC     U(       a  U/U-   nOS
n[3        UUU(       a  UOS
U	S   U	S   S9$ )ax  
past_values_padding (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
    The padding indicator of the time series.
past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
    Past values of the time series that serves as input to the model.
freq (`torch.LongTensor` of shape `(batch_size,)`):
    Frequency indices for the time series data.
r   r}   rC   r   r{   r~   r   T)r   sequence_lengthrl   r|   r   N)r   r   rG   r   )last_hidden_stater   
attentionsr   r   )r   r   r1   r   r!   r  r  r  r  rl   r|   r  r
  r   r   minr   r   concat_timesfm_shift_padded_seqr   _prepare_4d_attention_maskr   r   appendr   )r?   r   r  r  r   r  bsizepatched_inputsr   statsconcat_inputsmodel_inputpatched_paddingpos_embf_embr   r   all_attentionsall_hidden_stateslayerr   s                        r&   rK   TimesFmModel.forwardK  s   & !!!$$))%T[[5M5MN*//r4;;;S;STIIlS()DKK,A,AALLN$8$8AVAVW

 {{IIn{{':'::;dkk>S>SSLLL$6$6|?R?RS

 !% 7 7 U (3+=>		>"@bI))-8  ))Lb9!<;;//''(9(9!(<=GllG9{/@/@/C#CKG44_NG"Kd# $88*)//2%% '' 9 
 [[!@4;;#@#@AE$)+-("3	%!FM !%%f-##!((7 B  !,0A A $++):~a(
 	
r%   r   r  rl   r|   r   c                    UR                   (       a   [        R                  " U5      R                  O[        R                  " U5      R                  nU b%  U R                  U R                  S   SSS5      n X-  n U(       a\  [        R                  " [        R                  " X4X#S9U-  SS9nUR                  SSX5      nU b  [        R                  " X5      n U $ Un U $ )a  
Creates 4D attention mask and combines causal and padding masks if needed.

Args:
    attention_mask: Optional tensor of shape (batch_size, seq_length) containing padding mask
    sequence_length: Length of the sequence
    dtype: Data type of the mask
    device: Device of the mask
    is_causal: Whether to apply causal masking

Returns:
    4D attention mask of shape (batch_size, 1, seq_length, seq_length)
r   r   r}   r{   )diagonal)
is_floating_pointr!   finfor  iinfor   r   triuonesminimum)r   r  rl   r|   r   	min_valuecausal_masks          r&   r  'TimesFmModel._prepare_4d_attention_mask  s    , /4.E.EEKK&**5;;W\K]KaKa	 %+001E1Ea1H!QPRSN+7N **

O=UZ]ffK &**1aRK )!&~!K  "-r%   paddingc           	         S[         R                  4S jn[         R                  " SU-
  SS9nU" U5      n[         R                  " U R                  S   5      nXUSS24   nXUSS24   nSU-
  n[         R                  " USS9n	[         R
                  " U	S:H  [         R                  " SU	R                  U	R                  S9U	5      n	[         R                  " Xh-  SS9n
[         R                  " Xh-  S-  SS9nX-  nX-  US-  -
  n[         R
                  " US	:  [         R                  " S	UR                  UR                  S9U5      n[         R                  " U5      nX4$ )
a  Calculates mean and standard deviation of `inputs` across axis 1.

It excludes values where `padding` is 1.

Args:
    inputs: A PyTorch tensor of shape [b, n, p].
    padding: A PyTorch tensor of shape [b, n, p] with values 0 or 1.

Returns:
    A tuple containing the mean and standard deviation.
    We return the statistics of the first patch with more than three non-padded values.
arrc                     [         R                  " U S:  R                  [         R                  5      SS9nU S:  R                  [         R                  5      R	                  SS9n[         R
                  " US:H  U R                  S   S-
  U5      $ )Nr   r   r~   r   )r!   argmaxtoint32sumr  r   )r1  indicesrow_sums      r&   _get_patch_index?TimesFmModel._timesfm_masked_mean_std.<locals>._get_patch_index  sk    llC1H==#=1EGaxmmEKK0444;G;;w!|SYYq\A-=wGGr%   r   r   r~   r   Nr{   r   )
r!   r"   r6  ru   r   r  r  rl   r|   r   )r   r/  r9  pad_sumpatch_indicesbidxsr1  r   masknum_valid_elements
masked_summasked_squared_summasked_mean
masked_var
masked_stds                  r&   r  %TimesFmModel._timesfm_masked_mean_std  sS    	H%,, 	H
 ))AKQ/(1V\\!_-M1,-]A-. 3w #YYt3"[[!#LL"4":":CUC\C\]
 YYszq1
"YY
q'8a@ !5'<{A~M
[[LLJ$4$4Z=N=NO


 ZZ
+
&&r%   r>  seqc                 l   UR                   u  p#nU S:H  nUR                  [        R                  5      R	                  SS9nSXeR                  SS9) '   [        R                  " X1R                  S9R                  SSS5      R                  USU5      nXvSS2SS4   -
  U-  nUR                  SU5      n	U	$ )zShifts rows of seq based on the first 0 in each row of the mask.

Args:
    mask: mask tensor of shape [B, N]
    seq: seq tensor of shape [B, N, P]

Returns:
    The shifted sequence.
r   r   r~   r}   )r|   N)r   r4  r!   r5  r3  anyru   r|   r   expandgather)
r>  rF  
batch_sizenum_seqfeature_dimnew_maskr7  	idx_rangeshifted_idxshifted_seqs
             r&   r  &TimesFmModel._timesfm_shift_padded_seq	  s     ,/99(
[%)QY ++ekk*11a18 )+!$$% LL<AA!RKRRS]_acno	 !1dD=#99WD jjK0r%   )r1   r   r   r   r   )FF)T)r   r   r   r   r   r6   r!   r"   r   r
  r   r   
LongTensorr   r   rK   staticmethodr   r   rl   r|   r  r  r  r$   rO   rP   s   @r&   r   r   "  s   } &$ll$27,,$	u||U5<<#=>>	?$(  #(%*U
\\U
 #--U
 ll	U

  U
 #U
 
U
  U
n  + .++ {{+ 	+
 + 
%,,	+ +Z 4' 4' 4'QVW\WcWcejeqeqWqQr 4' 4'l  5<< ELL  r%   r   c                     ^  \ rS rSrSrS\4U 4S jjrS\\R                     S\\
   S\\R                  \R                  \R                  4   4S jrS	\R                  S
\\R                  \R                  4   S\R                  4S jrS\R                  S\R                  S\R                  4S jr\\        SS\\R                     S\\\\R                  \
4         S\\
   S\\R                     S\\
   S\S\S\\   S\\   S\4S jj5       5       r\S\R                  S\
S\\R                     4S j5       rSrU =r$ )TimesFmModelForPredictioni*  z/TimesFM model for quantile and mean prediction.r1   c                 B  > [         TU ]  U5        Xl        UR                  U l        UR
                  U l        [        U5      U l        [        UR                  UR
                  S[        UR                  5      -   -  UR                  S9U l        U R                  5         g )Nr   r   )r5   r6   r1   context_lengthcontext_lenhorizon_lengthhorizon_lenr   decoderrR   r7   len	quantilesr8   horizon_ff_layerr   )r?   r1   r@   s     r&   r6   "TimesFmModelForPrediction.__init__-  s     !00!00#F+ !5))--S9I9I5J1JK00!
 	r%   r   r  r   c           	         / / / pTn[        U5       GHU  u  pgUR                  S   n[        R                  " XR                  -   UR
                  UR                  S9n	XR                  :  a  U R                  U-
  n
[        R                  " [        R                  " XR
                  UR                  S9U/SS9n[        R                  " [        R                  " XR
                  U	R                  S9U	/SS9n	O:XR                  :  a+  XpR                  * S nXR                  U R                  -   * S n	UR                  U5        UR                  U	5        UR                  X&   5        GMX     [        R                  " USS9[        R                  " USS9[        R                  " U[        R                  S9R                  SS5      4$ )a+  Formats and pads raw inputs to feed into the model.

This function both pads each time series to match the context length, and
pads the inputs to meet the SPMD shape requirement.

Args:
  inputs: A list of 1d Tensors. Each Tensor is the context time series of
    a single forecast task.
  freq: list of frequencies

Returns:
A tuple of:
- the padded input time series to meet the model required context.
- the padding indicator.
- the number of padded examples for SPMD so that each core has the same
    number (a multiple of `batch_size`) of examples.
r   r{   r~   Nrk   r}   r   )	enumerater   r!   zerosr[  rl   r|   rY  r   r*  r  stackr  r5  r   )r?   r   r  input_tsinput_paddinginp_freqits	input_lenr/  num_front_pads              r&   _preprocess%TimesFmModelForPrediction._preprocess@  sw   ( -/Bv&EAIkk).>.>">bhhWYW`W`aG+++ $ 0 09 <YYMRTR[R[ \^`aghi))UZZXXV]VdVd%egn$ouvw---)))+,!$4$4t7G7G$G"H"JKOOB  )OODG$ '  KKa(KK1-LL5==b!D
 	
r%   model_outputr  c                 
   U R                  U5      nUR                  u  pEnUR                  XEU R                  R                  [        U R                  R                  5      S-   5      nUu  pxX8SS2SSS4   -  USS2SSS4   -   $ )z*Postprocess output of stacked transformer.r   N)r_  r   r   r1   rZ  r]  r^  )	r?   rn  r  	output_tsbn_r  r	  s	            r&   _postprocess_output-TimesFmModelForPrediction._postprocess_outputk  s     )),7	 //aNN1)C)CSI^I^E_bcEcd		D$!4551dD$;N8OOOr%   predictionstargetsc                 ,   / n[        U R                  R                  5       HL  u  pEX!SU4   -
  n[        R                  " US-
  U-  XV-  5      nUR                  UR                  5       5        MN     [        R                  " U5      R                  5       $ )N.r   )rb  r1   r^  r!   rr   r  r   rd  )r?   rv  rw  lossesrh  qerrorsr,   s           r&   _quantile_loss(TimesFmModelForPrediction._quantile_lossz  sz    dkk334DA3622F99a!ev-qz:DMM$))+& 5 {{6"''))r%   r   window_sizefuture_valuesforecast_context_lenreturn_forecast_on_contexttruncate_negativer   r  c
           
      	   Uc  U R                   n
OUn
US   R                  nU Vs/ s H  oU
* S PM
     nn[        R                  " [        R                  " U Vs/ s H  n[        R                  " U5      PM     sn5      5      nUb]  / n/ n[        U5       HC  u  nnUR                  U R                  X5      5        Uc  M+  UR                  UU   /S-  5        ME     UnUb  UnUc$  [        R                  S5        S/[        U5      -  nUc  U R                  R                  nU	c  U R                  R                  n	U R                  X5      u  nnnUR                  U5      nUR                  U5      nUR                  U5      nUnUR                   S   n/ nUR                   S   UR                   S   U R"                  -   :w  a8  [%        SUR                   S    SUR                   S    SU R"                   35      eU R                  R&                  nU R"                  U-   S-
  U-  n[)        U5       GH1  nUSS2SUR                   S   24   nUSS2U
* S24   nUSS2U
* S24   nU R+                  UUUUU	S	9nU R-                  UR.                  UR0                  UR2                  45      nU(       am  US:X  ag  USS2SS
2SU R                  R4                  2SS24   nUR7                  UR9                  S5      S
UR9                  S5      5      nUR;                  U5        USS2S
SU2S4   nUSS2S
SU2SS24   nUR;                  U5        [        R<                  " UU/S
S9nGM4     U(       aF  [        R<                  " USS9SS2SUU R                  R4                  -
  U R"                  -   2SS24   nO+[        R<                  " USS9SS2SU R"                  2SS24   nUSS2SS2S4   n Ub*  U SSS2S4   U SSS2S4   -   n USSS2S4   USSS2S4   -   nUS:  a5  U(       a.  [        R>                  " U S5      n [        R>                  " US5      nSn!Ub:  [@        RB                  " U U5      n"U RE                  USS2SS2SS24   U5      n#U"U#-   n![G        WR.                  U(       a  URH                  OSU	(       a  URJ                  OSU UU!S9$ s  snf s  snf )aa  
window_size (`int`, *optional*):
    Window size of trend + residual decomposition. If None then we do not do decomposition.
future_values (`torch.Tensor`, *optional*):
    Optional future time series values to be used for loss computation.
forecast_context_len (`int`, *optional*):
    Optional max context length.
return_forecast_on_context (`bool`, *optional*):
    True to return the forecast on the context when available, i.e. after the first input patch.
truncate_negative (`bool`, *optional*):
    Truncate to only non-negative values if any of the contexts have non-negative values,
    otherwise do nothing.
output_attentions (`bool`, *optional*):
    Whether to output the attentions.
output_hidden_states (`bool`, *optional*):
    Whether to output the hidden states.
past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
    Past values of the time series that serves as input to the model.
freq (`torch.LongTensor` of shape `(batch_size,)`):
    Frequency indices for the time series data.

Example:

```python
>>> from transformers import TimesFmModelForPrediction

>>> model = TimesFmModelForPrediction.from_pretrained("google/timesfm-2.0-500m-pytorch")

>>> forecast_input = [torch.linspace(0, 20, 100).sin(), torch.linspace(0, 20, 200).sin(), torch.linspace(0, 20, 400).sin()]
>>> frequency_input = torch.tensor([0, 1, 2], dtype=torch.long)

>>> # Generate
>>> with torch.no_grad():
>>>     outputs = model(past_values=forecast_input, freq=frequency_input, return_dict=True)
>>>     point_forecast_conv = outputs.mean_predictions
>>>     quantile_forecast_conv = outputs.full_predictions
```
Nr   r   z6No frequency provided via `freq`. Default to high (0).r   z=Length of paddings must match length of input + horizon_len: z != z + )r   r  r  r   r  r}   r   )axis.r   )r  r  r   r*   r+   r,   )&rY  r|   r!   r  rd  rb  extend_timesfm_moving_averager   infor]  r1   r   r  rl  r4  r   r[  r   rZ  r   r\  rt  r  r   r   r   r   sizer  concatenatemaximumrD   mse_lossr|  r(   r  r   )$r?   r   r  r~  r  r  r  r  r   r  fcontext_lenr|   ri  r   inp_min
new_inputs	new_freqsrh  re  rf  rg  	final_outrY  full_outputsoutput_patch_lennum_decode_patches
step_indexcurrent_paddingdecoder_outputfprop_outputsnew_full_tsnew_tsmean_outputsr,   r  quantile_losss$                                       r&   rK   !TimesFmModelForPrediction.forward  s    h  '++L/L Q&& 0;;{l]^${;))EKK(H22(HIJ"JI"6*2!!$">">r"OP#$$d1gY]3 +  F <KKPQ3V$D$ $ = ='#';;#C#C ,0,<,<V,J)-;;v&%((0;;v&	ooa(q!Y__Q%7$:J:J%JJ!''*+4	0B/C3tGWGWFXZ   ;;55"..1AAAEJZZ 23J+Aq9??13E/E,EFO \MN!23H+A}~,=>M!\\$$1"3%9 * N !4400##^%9%9:M
 *jAo ,Ass4Ndkk6N6N4NPQ,QR)11+2B2B12Er;K[K[\]K^_##K0 #1b*;+;*;Q#>?F'2/@0@/@!(CDK,))9f*=BGI= 4@ & ,,\BPkDKK$<$<<t?O?OOPRSSL
 !,,\B1a$JZJZFZ\]C]^L#Aq!G,"'1c	2\!$Q$)5LLL'1c	2\!$Q$)5LLLa<- ==s;L ==s;L$zz,>H //Q12X0FVMm+D),>>4E~004:N.66TX))
 	
A <(Hs   S Sr1  c                 .   [         R                  " XS-
  S4SS5      n[        R                  " XR                  U R
                  S9U-  n[         R                  " UR                  SSS5      UR                  SSS5      5      R                  5       nX@U-
  /$ )zCCalculates the moving average using PyTorch's convolution function.r   r   constantr{   r}   )	rD   r   r!   r*  rl   r|   conv1dr   squeeze)r1  r~  
arr_paddedkernelsmoothed_arrs        r&   r  1TimesFmModelForPrediction._timesfm_moving_average(  s}     UU3q! 4j!D
KyyL{Zxx
1b 96;;q!R;PQYY[L011r%   )r1   rY  r\  r_  r[  )NNNNFFNN)r   r   r   r   r    r   r6   r   r!   r"   r   r   rl  rt  r|  r   r   r   r   r   r(   rK   rT  listr  r$   rO   rP   s   @r&   rV  rV  *  s   9} &)
u||,)
4<SM)
	u||U\\5<<7	8)
VP!LLP16u||U\\7Q1RP	P*%,, * *RWR^R^ *  >B%)04.2+0"',0/3b
ell+b
 xellC&7 89:b
 c]	b

  -b
 'smb
 %)b
  b
 $D>b
 'tnb
 
$b
  b
H 2U\\ 2 2U\\HZ 2 2r%   rV  )rV  r   r   )2r    rp   dataclassesr   typingr   r   r   r   r!   torch.nnr9   torch.nn.functional
functionalrD   modeling_flash_attention_utilsr	   modeling_outputsr
   modeling_utilsr   r   processing_utilsr   utilsr   r   r   llama.modeling_llamar   (phi4_multimodal.modeling_phi4_multimodalr   configuration_timesfmr   
get_loggerr   r   r   r(   Moduler/   rR   rd   rg   r   r   r   r   rV  __all__r   r%   r&   <module>r     s7     ! 6 6     B / F & > > / U 0 
		H	% 
)O 
) 
) 6 6 6  ,!299 !,	\ 	* *Z?)ryy ?)D%")) %B *_ * *8 D) D DNG2 6 G2T Rr%   