
    fTh                       S r SSKrSSKrSSKJrJrJr  SSKrSSKJr  SSK	J
r
  SSKJr  SSKJr  SS	KJrJrJr  SS
KJr  SSKJr  SSKJrJrJr  SSKJr  SSKJrJrJ r   SSK!J"r"J#r#J$r$J%r%J&r&  SSK'J(r(  \#" 5       (       a  SSK)J*r*  SSK+J,r,  \&RZ                  " \.5      r/Sr0 SSK1J2r2  Sr0\/Rg                  S5         " S S\Rn                  5      r8\0(       d  \2r8\Rr                  " \85         " S S\Rn                  5      r: " S S\Rn                  5      r; " S S \Rn                  5      r< " S! S"\Rn                  5      r= " S# S$\Rn                  5      r> " S% S&\Rn                  5      r? " S' S(\Rn                  5      r@\" " S) S*\5      5       rA " S+ S,\A5      rB " S- S.\Rn                  5      rC\"" S/S09 " S1 S2\A\5      5       rDS2S*/rEg! \4 a     GN\5 a    \/Rm                  S5         GN/f = f)3zPyTorch Pop2Piano model.    N)OptionalTupleUnion)nn)CrossEntropyLoss)GenerationConfig   )ACT2FN)CacheDynamicCacheEncoderDecoderCache)GenerationMixin)AttentionMaskConverter)BaseModelOutput)BaseModelOutputWithPastAndCrossAttentionsSeq2SeqLMOutput)PreTrainedModel)ALL_LAYERNORM_LAYERS find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringis_torch_flex_attn_availableis_torch_fx_proxyis_torchdynamo_compilinglogging   )Pop2PianoConfig)	BlockMask)make_flex_block_causal_maskT)FusedRMSNormFzVDiscovered apex.normalization.FusedRMSNorm - will use it instead of Pop2PianoLayerNormzIDiscovered apex but it failed to load, falling back to Pop2PianoLayerNormc                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )Pop2PianoLayerNorm?   c                    > [         TU ]  5         [        R                  " [        R
                  " U5      5      U l        X l        g)zZ
Construct a layernorm module in the Pop2Piano style. No bias and no subtraction of mean.
N)super__init__r   	Parametertorchonesweightvariance_epsilon)selfhidden_sizeeps	__class__s      h/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/pop2piano/modeling_pop2piano.pyr&   Pop2PianoLayerNorm.__init__@   s/     	ll5::k#:; #    c                    UR                  [        R                  5      R                  S5      R	                  SSS9nU[        R
                  " X R                  -   5      -  nU R                  R                  [        R                  [        R                  4;   a%  UR                  U R                  R                  5      nU R                  U-  $ )N   T)keepdim)tor(   float32powmeanrsqrtr+   r*   dtypefloat16bfloat16)r,   hidden_statesvariances      r0   forwardPop2PianoLayerNorm.forwardH   s     !##EMM266q9>>r4>P%H?T?T4T(UU ;; ??),,T[[->->?M{{]**r2   )r+   r*   )gư>)__name__
__module____qualname____firstlineno__r&   rA   __static_attributes____classcell__r/   s   @r0   r"   r"   ?   s    $+ +r2   r"   c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )Pop2PianoDenseActDense_   configc                 X  > [         TU ]  5         [        R                  " UR                  UR
                  SS9U l        [        R                  " UR
                  UR                  SS9U l        [        R                  " UR                  5      U l
        [        UR                     U l        g NFbias)r%   r&   r   Lineard_modeld_ffwiwoDropoutdropout_ratedropoutr
   dense_act_fnactr,   rM   r/   s     r0   r&   Pop2PianoDenseActDense.__init__`   sn    ))FNNFKKeD))FKKeDzz&"5"56&--.r2   c                    U R                  U5      nU R                  U5      nU R                  U5      n[        U R                  R
                  [        R                  5      (       a  UR                  U R                  R
                  R                  :w  aa  U R                  R
                  R                  [        R                  :w  a/  UR                  U R                  R
                  R                  5      nU R	                  U5      nU$ N)rU   r[   rY   
isinstancerV   r*   r(   Tensorr<   int8r7   )r,   r?   s     r0   rA   Pop2PianoDenseActDense.forwardg   s    ./]3tww~~u||44##tww~~';';;$$

2),,TWW^^-A-ABM.r2   )r[   rY   rU   rV   	rC   rD   rE   rF   r   r&   rA   rG   rH   rI   s   @r0   rK   rK   _   s    / / r2   rK   c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )Pop2PianoDenseGatedActDensev   rM   c                   > [         TU ]  5         [        R                  " UR                  UR
                  SS9U l        [        R                  " UR                  UR
                  SS9U l        [        R                  " UR
                  UR                  SS9U l        [        R                  " UR                  5      U l        [        UR                     U l        g rO   )r%   r&   r   rR   rS   rT   wi_0wi_1rV   rW   rX   rY   r
   rZ   r[   r\   s     r0   r&   $Pop2PianoDenseGatedActDense.__init__w   s    IIfnnfkkF	IIfnnfkkF	))FKKeDzz&"5"56&--.r2   c                 8   U R                  U R                  U5      5      nU R                  U5      nX#-  nU R                  U5      n[	        U R
                  R                  [        R                  5      (       a  UR                  U R
                  R                  R                  :w  aa  U R
                  R                  R                  [        R                  :w  a/  UR                  U R
                  R                  R                  5      nU R                  U5      nU$ r_   )r[   ri   rj   rY   r`   rV   r*   r(   ra   r<   rb   r7   )r,   r?   hidden_geluhidden_linears       r0   rA   #Pop2PianoDenseGatedActDense.forward   s    hhtyy78		-0#3]3 tww~~u||44##tww~~';';;$$

2),,TWW^^-A-ABM.r2   )r[   rY   ri   rj   rV   rd   rI   s   @r0   rf   rf   v   s    / / r2   rf   c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )Pop2PianoLayerFF   rM   c                   > [         TU ]  5         UR                  (       a  [        U5      U l        O[        U5      U l        [        UR                  UR                  S9U l	        [        R                  " UR                  5      U l        g )Nr.   )r%   r&   is_gated_actrf   DenseReluDenserK   r"   rS   layer_norm_epsilon
layer_normr   rW   rX   rY   r\   s     r0   r&   Pop2PianoLayerFF.__init__   s_    "=f"ED"8"@D,V^^AZAZ[zz&"5"56r2   c                 p    U R                  U5      nU R                  U5      nXR                  U5      -   nU$ r_   )rx   rv   rY   )r,   r?   forwarded_statess      r0   rA   Pop2PianoLayerFF.forward   s;    ??=9../?@%5E(FFr2   )rv   rY   rx   rd   rI   s   @r0   rq   rq      s    7 7 r2   rq   c                      ^  \ rS rSr  S
S\S\\   4U 4S jjjrS r\	SS j5       r
SS jr         SS jrS	rU =r$ )Pop2PianoAttention   rM   	layer_idxc                   > [         TU ]  5         UR                  U l        X l        UR                  U l        UR
                  U l        UR                  U l        UR                  U l        UR                  U l
        UR                  U l        U R                  U R                  -  U l        X0l        Uc>  U R                  (       a-  [        R!                  SU R"                  R$                   S35        [&        R(                  " U R                  U R                  SS9U l        [&        R(                  " U R                  U R                  SS9U l        [&        R(                  " U R                  U R                  SS9U l        [&        R(                  " U R                  U R                  SS9U l        U R                  (       a0  [&        R2                  " U R                  U R                  5      U l        [7        5       U l        SU l        g )NzInstantiating a decoder z without passing `layer_idx` is not recommended and will to errors during the forward call, if caching is used. Please make sure to provide a `layer_idx` when creating this class.FrP   )r%   r&   
is_decoderhas_relative_attention_biasrelative_attention_num_bucketsrelative_attention_max_distancerS   d_kvkey_value_proj_dim	num_headsn_headsrX   rY   	inner_dimr   loggerwarning_oncer/   rC   r   rR   qkvo	Embeddingrelative_attention_biassetpruned_headsgradient_checkpointingr,   rM   r   r   r/   s       r0   r&   Pop2PianoAttention.__init__   so    	 +++F(.4.S.S+/5/U/U,~~"(++''**(?(??"*4>>+B+B*C D, , 4<<eD4<<eD4<<eD4>>4<<eD+++-<<8[8[]a]i]i+jD(E&+#r2   c                 
   [        U5      S:X  a  g [        XR                  U R                  U R                  5      u  p[        U R                  U5      U l        [        U R                  U5      U l        [        U R                  U5      U l        [        U R                  USS9U l	        U R                  [        U5      -
  U l        U R                  U R                  -  U l
        U R                  R                  U5      U l        g )Nr   r   dim)lenr   r   r   r   r   r   r   r   r   r   union)r,   headsindexs      r0   prune_headsPop2PianoAttention.prune_heads   s    u:?7<<!8!8$:K:K
 $DFFE2#DFFE2#DFFE2#DFFEq9||c%j0004<<? --33E:r2   c                 b   SnU(       aC  US-  nX@S:  R                  [        R                  5      U-  -  n[        R                  " U 5      n O,[        R                  " U [        R
                  " U 5      5      * n US-  nX:  nU[        R                  " U R                  5       U-  5      [        R                  " X5-  5      -  X%-
  -  R                  [        R                  5      -   n[        R                  " U[        R                  " XrS-
  5      5      nU[        R                  " X`U5      -  nU$ )aR  
Adapted from Mesh Tensorflow:
https://github.com/tensorflow/mesh/blob/0cb87fe07da627bf0b7e60475d59f95ed6b5be3d/mesh_tensorflow/transformer/transformer_layers.py#L593

Translate relative position to a bucket number for relative attention. The relative position is defined as
memory_position - query_position, i.e. the distance in tokens from the attending position to the attended-to
position. If bidirectional=False, then positive relative positions are invalid. We use smaller buckets for
small absolute relative_position and larger buckets for larger absolute relative_positions. All relative
positions >=max_distance map to the same bucket. All relative positions <=-max_distance map to the same bucket.
This should allow for more graceful generalization to longer sequences than the model has been trained on

Args:
    relative_position: an int32 Tensor
    bidirectional: a boolean - whether the attention is bidirectional
    num_buckets: an integer
    max_distance: an integer

Returns:
    a Tensor with the same shape as relative_position, containing int32 values in the range [0, num_buckets)
r   r4   r   )r7   r(   longabsmin
zeros_likelogfloatmath	full_likewhere)relative_positionbidirectionalnum_bucketsmax_distancerelative_buckets	max_exactis_smallrelative_position_if_larges           r0   _relative_position_bucket,Pop2PianoAttention._relative_position_bucket   s   , AKQ!6 : :5:: F TT %		*; <!&+<e>N>NO`>a!b b  1$	$0 &/II'--/);<hh|/01&( "UZZ.	&"
 &+YY&8RbcTc(d&
" 	EKKE_``r2   c                    Uc   U R                   R                  R                  nUc,  [        R                  " U[        R
                  US9SS2S4   nOUSS2S4   R                  U5      n[        R                  " U[        R
                  US9SSS24   nXe-
  nU R                  UU R                  (       + U R                  U R                  S9nU R                  U5      n	U	R                  / SQ5      R                  S5      n	U	$ )z%Compute binned relative position biasN)r<   device)r   r   r   )r4   r   r   r   )r   r*   r   r(   aranger   r7   r   r   r   r   permute	unsqueeze)
r,   query_length
key_lengthr   cache_positioncontext_positionmemory_positionr   relative_position_bucketvaluess
             r0   compute_biasPop2PianoAttention.compute_bias  s    >1188??F!$||L

SYZ[\^b[bc-ag699&A,,zFSTXZ[T[\+>#'#A#A#.;;==	 $B $
  --.FG	*44Q7r2   c                 ~   UR                   SS u  pUSLnU R                  U5      nUR                  USU R                  U R                  5      R                  SS5      nUbE  UR                  R                  U R                  5      nU(       a  UR                  nOUR                  nU(       a  UOUnU(       a=  Ub:  W(       a3  WR                  U R                     nUR                  U R                     nOU R                  U5      nU R                  U5      nUR                  USU R                  U R                  5      R                  SS5      nUR                  USU R                  U R                  5      R                  SS5      nUbN  U(       d  U
OSn
WR                  UUU R                  SU
05      u  nnU(       a  SUR                  U R                  '   [         R"                  " UUR                  SS5      5      nUc  UR                   S   nUb  UOU
S   S-   nU R$                  (       db  [         R&                  " SU R                  UU4UR(                  UR*                  S	9nU R,                  (       a  U R.                  (       a  SUl        O.U R3                  UUUR(                  U
S
9nUSS2SS2U* S2SS24   nUb#  USS2SS2SS2SUR                   S   24   nUU-   nU R4                  (       aS  [         R6                  " UR                   S   5      nSU[9        U R4                  5      '   USS2UR;                  5       4   nOUnUU-  n[<        R>                  RA                  URC                  5       SS9RE                  U5      n[<        R>                  RG                  UU RF                  U R.                  S9nUb  UU-  n[         R"                  " UU5      nUR                  SS5      RI                  5       nUR                  USU RJ                  5      nU RM                  U5      nUXT4nU	(       a  UU4-   nU$ )zp
Self-attention (if key_value_states is None) or attention over source sentence (provided by key_value_states).
Nr4   r5   r   r   Tr	   )r   r<   )r   r   r   r   )ptraining)'shaper   viewr   r   	transpose
is_updatedgetr   cross_attention_cacheself_attention_cache	key_cachevalue_cacher   r   updater(   matmulr   zerosr   r<   r   r   requires_gradr   r   r)   listboolr   
functionalsoftmaxr   type_asrY   
contiguousr   r   )r,   r?   maskkey_value_statesposition_biaspast_key_valuelayer_head_maskr   	use_cacheoutput_attentionsr   
batch_size
seq_lengthis_cross_attentionquery_statesr   curr_past_key_valuecurrent_states
key_statesvalue_statesscoresr   real_seq_lengthcausal_maskposition_bias_maskedattn_weightsattn_outputoutputss                               r0   rA   Pop2PianoAttention.forward  s   $ "/!4!4Ra!8
 .T9vvm,#((RtG^G^_iijkmno%'2266t~~FJ!&4&J&J#&4&I&I#-?)]."<,66t~~FJ.::4>>JL/J66.1L#RtG^G^_iijkmnoJ',,ZT\\4KbKbcmmnoqrsL)7It+>+E+Ednn?OQ_>`,(
L &@DN--dnn= lJ,@,@A,FG #))"-J.:.FlN[]L^abLbO33 %j*=fmm[a[g[g! ..4==26M/ $ 1 1#ZVd !2 ! !.aZKL!.C D"1a,Bj.>.>r.B,B#BC - ;::m11!45D,-Dd''()#0DIIK#@ #0 && }},,V\\^,DLLVT}},,\T\\TXTaTa,b &'/9Lll<>!++Aq1<<>!&&z2t~~Fff[)>/Gr2   )rS   rY   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   FN)T       )NN)	NNNNNNFFN)rC   rD   rE   rF   r   r   intr&   r   staticmethodr   r   rA   rG   rH   rI   s   @r0   r~   r~      st     %*#'	!,!, C=	!, !,F;  -  - ^. i ir2   r~   c                   R   ^  \ rS rSrSS\\   4U 4S jjjr       SS jrSrU =r	$ )Pop2PianoLayerSelfAttentioni  r   c                    > [         TU ]  5         [        XUS9U l        [	        UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g )Nr   r   rt   )r%   r&   r~   SelfAttentionr"   rS   rw   rx   r   rW   rX   rY   r   s       r0   r&   $Pop2PianoLayerSelfAttention.__init__  sQ    /W`
 -V^^AZAZ[zz&"5"56r2   c	                     U R                  U5      n	U R                  U	UUUUUUUS9n
XR                  U
S   5      -   nU4U
SS  -   nU$ )N)r   r   r   r   r   r   r   r   r   )rx   r   rY   )r,   r?   attention_maskr   r   r   r   r   r   normed_hidden_statesattention_outputr   s               r0   rA   #Pop2PianoLayerSelfAttention.forward  st      $}=-- '+)/) . 	
 &5Ea5H(II "%5ab%99r2   )r   rY   rx   r   )NNNNFFN
rC   rD   rE   rF   r   r   r&   rA   rG   rH   rI   s   @r0   r   r     s:    7XVY] 7 7  r2   r   c                   T   ^  \ rS rSrSS\\   4U 4S jjjr        SS jrSrU =r	$ )Pop2PianoLayerCrossAttentioni  r   c                    > [         TU ]  5         [        USUS9U l        [	        UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g )NFr   rt   )r%   r&   r~   EncDecAttentionr"   rS   rw   rx   r   rW   rX   rY   )r,   rM   r   r/   s      r0   r&   %Pop2PianoLayerCrossAttention.__init__  sO    1&V[gpq,V^^AZAZ[zz&"5"56r2   c                     U R                  U5      nU R                  UUUUUUUUU	U
S9
nXR                  US   5      -   nU4USS  -   nU$ )N)	r   r   r   r   r   r   r   r   r   r   r   )rx   r  rY   )r,   r?   r   r   r   r   r   r   r   r   r   r   r   layer_outputr   s                  r0   rA   $Pop2PianoLayerCrossAttention.forward  sy      $}=// -'+)%/) 0 
 %||4DQ4G'HH/$4QR$88r2   )r  rY   rx   r_   )NNNNFNFNr  rI   s   @r0   r  r    s<    7(3- 7 7  r2   r  c                   \   ^  \ rS rSrSS\\   4U 4S jjjr            SS jrSrU =r	$ )Pop2PianoBlocki  r   c                 l  > [         TU ]  5         UR                  U l        [        R                  " 5       U l        U R
                  R                  [        XUS95        U R                  (       a"  U R
                  R                  [        XS95        U R
                  R                  [        U5      5        g )Nr   )r   )
r%   r&   r   r   
ModuleListlayerappendr   r  rq   r   s       r0   r&   Pop2PianoBlock.__init__  s     ++]]_


'[d	

 ??JJ:6WX

*623r2   c                    U R                   S   " UUUUU	U
UUS9nUS S u  pUSS  nUR                  [        R                  :X  a  [        R                  " [        R
                  " U5      R                  5       [        R                  " UR                  5      R                  S-
  [        R                  " UR                  5      R                  5      n[        R                  " UU* US9nU R                  =(       a    US LnU(       a  U R                   S   " UUUUUU	US   S-   U
US9	nUS S u  pUR                  [        R                  :X  a  [        R                  " [        R
                  " U5      R                  5       [        R                  " UR                  5      R                  S-
  [        R                  " UR                  5      R                  5      n[        R                  " UU* US9nUUSS  -   nU R                   S   " U5      nUR                  [        R                  :X  a  [        R                  " [        R
                  " U5      R                  5       [        R                  " UR                  5      R                  S-
  [        R                  " UR                  5      R                  5      n[        R                  " UU* US9nU4nU
(       a  UU	4-   U-   nU$ UU-   nU$ )	Nr   )r   r   r   r   r   r   r   r4   i  )r   maxr   r5   )r   r   r   r   r   r   r   r   )r  r<   r(   r=   r   isinfanyfinfor  clampr   )r,   r?   r   r   encoder_hidden_statesencoder_attention_maskencoder_decoder_position_biasr   cross_attn_layer_head_maskr   r   r   return_dictr   self_attention_outputsattention_outputsclamp_valuedo_cross_attentioncross_attention_outputsr   s                       r0   rA   Pop2PianoBlock.forward  s     "&A)'+)/)	"
 )?r(B%2126 %--/++M*..0M//044t;M//044K
 "KKK<[YM!__R1Fd1R&*jjm!65; :-+B/!3#"3
'# -DBQ,G)M ""emm3#kkKK.224KK 3 34884?KK 3 3488
 !&M|Q\ ] !24KAB4O O 

2}5 %--/++M*..0M//044t;M//044K
 "KKK<[YM " 114EEG   11Gr2   )r   r  r   )NNNNNNNNFFTNr  rI   s   @r0   r  r    sK    4XVY] 4 4" "#&*#'T Tr2   r  c                   D    \ rS rSr\rSrSrSrSr	Sr
S/rS/rS rS rS	rg
)Pop2PianoPreTrainedModeli>  transformerFTr  rV   c                 P   U R                   R                  n[        U[        5      (       a)  UR                  R
                  R                  US-  5        g[        U[        5      (       a2  UR                  R                  R
                  R                  SUS-  S9  g[        U[        5      (       a  UR                  R                  R
                  R                  SUS-  S9  [        US5      (       aN  U R                   R                  (       d2  UR                  R                  R
                  R                  SUS-  S9  ggg[        U[        5      (       GaQ  UR                   R                  R
                  R                  SX R                   R"                  S-  -  S9  [        UR                   S5      (       aE  UR                   R$                  b.  UR                   R$                  R
                  R'                  5         UR(                  R                  R
                  R                  SX R                   R*                  S-  -  S9  [        UR(                  S5      (       aG  UR(                  R$                  b/  UR(                  R$                  R
                  R'                  5         ggg[        U[,        5      (       Ga  UR.                  R                  R
                  R                  SX R                   R"                  S-  -  S9  [        UR.                  S5      (       aE  UR.                  R$                  b.  UR.                  R$                  R
                  R'                  5         UR0                  R                  R
                  R                  SX R                   R"                  S-  -  S9  [        UR0                  S5      (       aE  UR0                  R$                  b.  UR0                  R$                  R
                  R'                  5         UR(                  R                  R
                  R                  SX R                   R*                  S-  -  S9  [        UR(                  S5      (       aG  UR(                  R$                  b/  UR(                  R$                  R
                  R'                  5         ggg[        U[2        5      (       GaZ  U R                   R"                  nU R                   R4                  nU R                   R6                  nUR8                  R                  R
                  R                  SX#U-  S-  -  S9  UR:                  R                  R
                  R                  SX#S-  -  S9  UR<                  R                  R
                  R                  SX#S-  -  S9  UR>                  R                  R
                  R                  SX%U-  S-  -  S9  UR@                  (       a4  URB                  R                  R
                  R                  SX#S-  -  S9  ggg)zInitialize the weights      ?        )r:   stdlm_head      rQ   N)"rM   initializer_factorr`   r"   r*   datafill_Pop2PianoConcatEmbeddingToMel	embeddingnormal_!Pop2PianoForConditionalGenerationsharedhasattrtie_word_embeddingsr)  rK   rU   rS   rQ   zero_rV   rT   rf   ri   rj   r~   r   r   r   r   r   r   r   r   )r,   modulefactorrS   r   r   s         r0   _init_weights&Pop2PianoPreTrainedModel._init_weightsI  s   //f011MM$$Vc\2 =>>##((00cv|0L ABB MM  %%--3FSL-Ivy))$++2Q2Q%%**22#2N 3R) 677 II!!))s;;CVCV[_B_8`)avyy&))fiinn.H		##))+II!!))s;;CSCSX\B\8])^vyy&))fiinn.H		##))+ /I) ;<<KK##++&[[EXEX]aDa:b+cv{{F++0@0@0L  %%++-KK##++&[[EXEX]aDa:b+cv{{F++0@0@0L  %%++-II!!))s;;CSCSX\B\8])^vyy&))fiinn.H		##))+ /I) 233 kk))G!%!1!1kk++GHHOO  ((cvL^B^cgAg7h(iHHOO  ((cv$7O(PHHOO  ((cv$7O(PHHOO  ((cvL^B^cgAg7h(i11..55::BBQWhl[lQmBn 2 4r2   c                    U R                   R                  nU R                   R                  nUc  [        S5      e[	        U5      (       aE  [
        R                  " UR                  S S S-   U5      n[
        R                  " XASS S24   /SS9nO=UR                  UR                  5      nUSS S24   R                  5       USSS 24'   X$S'   Uc  [        S5      eUR                  US	:H  U5        U$ )
Nzoself.model.config.decoder_start_token_id has to be defined. In Pop2Piano it is usually set to the pad_token_id.r5   )r   .r   r   ).r   z1self.model.config.pad_token_id has to be defined.)rM   decoder_start_token_idpad_token_id
ValueErrorr   r(   fullr   cat	new_zerosclonemasked_fill_)r,   	input_idsr<  r=  shifted_input_idss        r0   _shift_right%Pop2PianoPreTrainedModel._shift_rightw  s    !%!C!C{{//!) B 
 Y'' %

9??3B+?$+FH^ _ %		+<SbS>Q*RXZ [ ) 3 3IOO D)238)<)B)B)Dc12g&(>f%PQQ&&'8D'@,O  r2    N)rC   rD   rE   rF   r   config_classbase_model_prefixis_parallelizablesupports_gradient_checkpointing_supports_cache_class_supports_static_cache_no_split_modules_keep_in_fp32_modulesr8  rF  rG   rH  r2   r0   r#  r#  >  sB    "L%&*# ")*!F,o\!r2   r#  c                   .  ^  \ rS rSrSU 4S jjrS rS r             SS jr SS\\	R                  S4   S\	R                  S	\	R                  S
\S\4
S jjr\S\	R                  S\S\S\	R                   S	\	R                  S\4S j5       rSrU =r$ )Pop2PianoStacki  c                   > [         TU ]  U5        X l        UR                  U l        [        R
                  " [        UR                  5       Vs/ s H  n[        U[        US:H  5      US9PM     sn5      U l
        [        UR                  UR                  S9U l        [        R                  " UR                   5      U l        U R%                  5         SU l        S U l        SU l        g s  snf )Nr   r   rt   F)r%   r&   embed_tokensr   r   r  range
num_layersr  r   blockr"   rS   rw   final_layer_normrW   rX   rY   	post_initmodel_parallel
device_mapr   )r,   rM   rT  ir/   s       r0   r&   Pop2PianoStack.__init__  s     ( ++]] v0011A v4Q<[\]1

 !36>>vG`G` azz&"5"56 	#&+#s   !C*c                     U R                   $ r_   rT  r,   s    r0   get_input_embeddings#Pop2PianoStack.get_input_embeddings  s       r2   c                     Xl         g r_   r_  r,   new_embeddingss     r0   set_input_embeddings#Pop2PianoStack.set_input_embeddings  s    *r2   c                    U	b  U	OU R                   R                  n	U
b  U
OU R                   R                  n
Ub  UOU R                   R                  nUb  UOU R                   R                  nUb*  Ub'  U R
                  (       a  SOSn[        SU SU S35      eUb&  UR                  5       nUR                  SUS   5      nO>Ub  UR                  5       S S nO'U R
                  (       a  SOSn[        SU SU S	35      eU R                  (       a/  U R                  (       a  U	(       a  [        R                  S
5        Sn	Uc)  U R                  c  [        S5      eU R                  U5      nUu  nnU	SL a   U R
                  (       d  [        SU  S35      eSnSnU R
                  (       a  U	(       d  Ub  [        U[        5      (       a,  [        U[         5      (       d  Sn[!        U[#        5       5      nOv[        U[         5      (       d.  Sn[        R                  S5        [         R$                  " U5      nO3Uc  [!        [#        5       [#        5       5      nOU R
                  (       d  S nUb  UR'                  5       OSnUc#  [(        R*                  " UUU-   UR,                  S9nUc4  [/        5       (       d%  UU-   n[(        R0                  " UUUR,                  S9nU R                   R
                  (       a%  U R3                  UUUUb  UR4                  OS U
5      nOVUS S 2S S S S 24   nUR7                  UR8                  S9nSU-
  [(        R:                  " UR8                  5      R<                  -  nU R
                  (       aO  UbL  UR                  5       u  nnnUU4nUc  [(        R0                  " UUR,                  S9nU R?                  U5      nOS nU RA                  X`R                   RB                  5      nU RA                  XpR                   RB                  5      nU(       a  SOS nU
(       a  SOS nU
(       a  U R
                  (       a  SOS nS nS n U RE                  U5      n![G        U RH                  5       H  u  n"n#UU"   n$UU"   n%U(       a  UU!4-   nU R                  (       a9  U R                  (       a(  U RK                  U#RL                  U!UUUUU U$U%S U	U
U5      n&OU#" U!UUUUU U$U%UU	U
US9n&U	SL a  U&S S S-   U&SS  -   n&U&S S u  n!n'U&S   nU R
                  (       a  Ub  U&U
(       a  SOS   n U
(       d  M  UU&S   4-   nU R
                  (       d  M  UU&S   4-   nM     U RO                  U!5      n!U RE                  U!5      n!U(       a  UU!4-   nU	(       a  W'OS n(U(       a  UR4                  n(U(       a  URQ                  5       n(U(       d  [S        S U!U(UUU4 5       5      $ [U        U!U(UUUS9$ )Ndecoder_ zYou cannot specify both zinput_ids and zinputs_embeds at the same timer5   zYou have to specify either zinput_ids or inputs_embedszZ`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...Fz<You have to initialize the model with valid token embeddingsTz)`use_cache` can only be set to `True` if z is used as a decoderzPassing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.r   r   )r<   r&  rH  )r   r   r  r  r  r   r  r   r   r   r   r   r_   r4      r	      c              3   0   #    U  H  nUc  M  Uv   M     g 7fr_   rH  ).0r   s     r0   	<genexpr>)Pop2PianoStack.forward.<locals>.<genexpr>t  s"      
A  s   	)last_hidden_statepast_key_valuesr?   
attentionscross_attentions)+rM   r   r   output_hidden_statesuse_return_dictr   r>  sizer   r   r   r   r   rT  r`   r   r   r   from_legacy_cacheget_seq_lengthr(   r   r   r   r)   _update_causal_maskr   r7   r<   r  r   invert_attention_maskget_head_maskrV  rY   	enumeraterW  _gradient_checkpointing_funcrA   rX  to_legacy_cachetupler   ))r,   rD  r   r  r  rk  	head_maskcross_attn_head_maskrt  r   r   rw  r  r   err_msg_prefixinput_shaper   r   return_legacy_cachereturn_self_attention_cachepast_key_values_lengthmask_seq_lengthr   encoder_batch_sizeencoder_sequence_length_encoder_hidden_shapeencoder_extended_attention_maskall_hidden_statesall_attentionsall_cross_attentionsr   r  r?   r\  layer_moduler   r  layer_outputsnext_decoder_cache
next_caches)                                            r0   rA   Pop2PianoStack.forward  s     "+!6IDKK<Q<Q	1B1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>+/??ZN*>*:.HXXvw  "#..*K!r;r?;I&',,.s3K+/??ZN:>:J-XfWggtuvv&&4==##p "	   ( !_`` --i8M!,
J?? #LTFRg!hii $&+#??	_-H/511*_Vi:j:j.2+"5o|~"V1DEE&*###`
 #6"G"G"X ("5lnln"U #OETE`!?!?!Afg!"\\&(>(KTaThThN !*B*D*D4zAO"ZZ
OML`L`aN;;!!228G8S44Y]!K )D$)9:K%..}/B/B.CK,M<O<O0P0T0TTK ??4@=R=W=W=Y: 7$68O#P %-).4HQ^QeQe)f&.2.H.HI_.`+.2+ &&y++2H2HI	#112FH^H^_"6BD0d&7DOOrRV(,%]3(4OA|'lO)=a)@&#$58H$H!**t}} $ A A ((!!)31#.%"!  !-!#."/*?+J2O$3/I#2'&7#1!" E! -bq 1G ;mAB>O O0=bq0A-M-
 *!,M#8#D0=CTaZ[0\-  !/=3C2E!E???+?=QRCSBU+U(o  5r --m<]3   1]4D D+4'$
&(==J(88:J 
 "%"(
 
 
 9+&+%1
 	
r2   r   r   input_tensorr   rt  r   c           	         U R                   R                  S:X  a  Ub  US:H  R                  5       (       a  U$ g U R                   R                  S:X  a,  [        U[        R
                  5      (       a  [        U5      nU$ Ub  UR                  5       OSnUb  UR                  OSnU R                   R                  S:X  a5  U(       d.  U(       d'  [        R                  " UUUU R                  S9(       a  g UR                  nUR                  S   n	U(       a  UR                  5       n
O5[        U[        R
                  5      (       a  UR                  S	   OXi-   S-   n
U R                  UU	U
UUUR                  S   S
9nU R                   R                  S:X  aZ  UbW  UR                   R"                  S;   a=  U(       d6  [        R$                  " U5      R&                  n[        R(                  " X5      nU$ )Nflash_attention_2r'  flex_attentionr   Fsdpa)rk  r  is_trainingr   r5   )sequence_lengthtarget_lengthr<   r   r   )cudaxpunpu)rM   _attn_implementationr  r`   r(   ra   r   r{  is_compileabler   _ignore_causal_mask_sdpar   r<   r   get_max_cache_shape5_prepare_4d_causal_attention_mask_with_cache_positionr   typer  r   _unmask_unattended)r,   r   r  r   rt  r   past_seen_tokensusing_compilable_cacher<   r  r  r   	min_dtypes                r0   r|  "Pop2PianoStack._update_causal_mask  s    ;;++/BB)~/D.I.I.K.K%%;;++/??.%,,77!<^!L!!
 @O?Z?99;`aCRC^!?!?di ;;++v5>T]n%>>*'7 MM	 ""&,,Q/!+??AM nell;; $$R(%7!;  PP+')#))!, Q 
 KK,,6*%%**.DD%
 E*..I0CCK[Kr2   r  r  r<   r   c                    U b  U R                  5       S:X  a  U nU$ [        R                  " U5      R                  n[        R                  " X4XUR
                  S9nUS:w  a  [        R                  " USS9nU[        R                  " X$R
                  S9UR                  SS5      :  -  nUSSSS2SS24   R                  USSS5      nU b  UR                  5       nU R                  S   n	USS2SS2SS2SU	24   U SS2SSSS24   R                  UR
                  5      -   n
U
S:H  n
USS2SS2SS2SU	24   R                  X5      USS2SS2SS2SU	24'   U$ )	a  
Creates a causal 4D mask of shape `(batch_size, 1, query_length, key_value_length)` from a 2D mask of shape
`(batch_size, key_value_length)`, or if the input `attention_mask` is already 4D, do nothing.

Args:
    attention_mask (`torch.Tensor`):
        A 2D attention mask of shape `(batch_size, key_value_length)` or a 4D attention mask of shape
        `(batch_size, 1, query_length, key_value_length)`.
    sequence_length (`int`):
        The sequence length being processed.
    target_length (`int`):
        The target length: when generating with static cache, the mask should be as long as the static cache,
        to account for the 0 padding, the part of the cache that is not filled yet.
    dtype (`torch.dtype`):
        The dtype to use for the 4D attention mask.
    cache_position (`torch.Tensor`):
        Indices depicting the position of the input sequence tokens in the sequence.
    batch_size (`torch.Tensor`):
        Batch size.
Nrm  )
fill_valuer<   r   r   )diagonalrl  r5   r   )r   r(   r  r   r?  r   triur   reshapeexpandrB  r   r7   masked_fill)r   r  r  r<   r   r   kwargsr   r  mask_lengthpadding_masks              r0   r  DPop2PianoStack._prepare_4d_causal_attention_mask_with_cache_position  s}   > %.*<*<*>!*C(K* ' E*..I** 0Y\j\q\qK !##jjqA5<<>S>STWeWmWmnprsWtttK%dD!Q&67>>z1bRTUK))//1,2226*1aL[L+@ANSTVZ\`bcScDdDgDg&&E    ,q05@Aq,;,AV5W5c5c 6Aq!\k\12 r2   )rW  r[  rY   rT  rX  r   r   rZ  r_   )NNNNNNNNNNNNN)F)rC   rD   rE   rF   r&   ra  rf  rA   r   r(   ra   r   r   r|  r   r   r<   r  rG   rH   rI   s   @r0   rR  rR    s    ,.!+
 "#!!S
x #(BellK78B llB 	B
 B  BH 444 4 {{	4
 4 4 4r2   rR  c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )r.  i  z'Embedding Matrix for `composer` tokens.c                 ~   > [         TU ]  5         [        R                  " UR                  UR
                  S9U l        g )N)num_embeddingsembedding_dim)r%   r&   r   r   composer_vocab_sizerS   r/  r\   s     r0   r&   &Pop2PianoConcatEmbeddingToMel.__init__  s-    V5O5O_e_m_mnr2   c                 z    X#-
  nU R                  U5      R                  S5      n[        R                  " XQ/SS9nU$ )Nr   r   )r/  r   r(   r@  )r,   featureindex_valueembedding_offsetindex_shiftedcomposer_embeddingrk  s          r0   rA   %Pop2PianoConcatEmbeddingToMel.forward  s>    #6!^^M:DDQG		#5"?QGr2   )r/  )	rC   rD   rE   rF   __doc__r&   rA   rG   rH   rI   s   @r0   r.  r.    s    1o r2   r.  zA
    Pop2Piano Model with a `language modeling` head on top.
    )custom_introc            *       X  ^  \ rS rSr/ SQrS\4U 4S jjrS rS rS r	S r
S	 rS
 r S&S\R                  S\S\S\\R                     4S jjr\                  S'S\\R(                     S\\R                     S\\R(                     S\\R*                     S\\R                     S\\R                     S\\R,                     S\\\\R,                           S\\\\R,                           S\\R                     S\\R                     S\\R                     S\\R(                     S\\   S\\   S\\   S\\   S\\R(                     S \\\R                     \4   4&S! jj5       r\R8                  " 5          S(U 4S" jj5       rS\R,                  4S# jrS$ rS%r U =r!$ ))r1  i  )zencoder.embed_tokens.weightzdecoder.embed_tokens.weightzlm_head.weightrM   c                 x  > [         TU ]  U5        Xl        UR                  U l        [
        R                  " UR                  UR                  5      U l        [        U5      U l
        [        R                  " U5      nSUl        SUl        SUl        [!        X R                  5      U l        [        R                  " U5      nSUl        SUl        UR$                  Ul        [!        X0R                  5      U l        [
        R*                  " UR                  UR                  SS9U l        U R/                  5         g )NFTrP   )r%   r&   rM   rS   	model_dimr   r   
vocab_sizer2  r.  mel_conditionercopydeepcopyr   r   is_encoder_decoderrR  encodernum_decoder_layersrV  decoderrR   r)  rY  )r,   rM   encoder_configdecoder_configr/   s       r0   r&   *Pop2PianoForConditionalGeneration.__init__  s     ll6#4#4fnnE<VDv.$)!#( ,1)%nkkBv.$(!,1)$*$=$=!%nkkByy1B1BO 	r2   c                     U R                   $ r_   )r2  r`  s    r0   ra  6Pop2PianoForConditionalGeneration.get_input_embeddings6  s    {{r2   c                 |    Xl         U R                  R                  U5        U R                  R                  U5        g r_   )r2  r  rf  r  rd  s     r0   rf  6Pop2PianoForConditionalGeneration.set_input_embeddings9  s+    $)).9)).9r2   c                     Xl         g r_   r)  rd  s     r0   set_output_embeddings7Pop2PianoForConditionalGeneration.set_output_embeddings>  s    %r2   c                     U R                   $ r_   r  r`  s    r0   get_output_embeddings7Pop2PianoForConditionalGeneration.get_output_embeddingsA      ||r2   c                     U R                   $ r_   )r  r`  s    r0   get_encoder-Pop2PianoForConditionalGeneration.get_encoderD  r  r2   c                     U R                   $ r_   )r  r`  s    r0   get_decoder-Pop2PianoForConditionalGeneration.get_decoderG  r  r2   input_featurescomposergeneration_configr   c                    UR                   nX%R                  5       ;  a(  [        S[        UR                  5       5       SU 35      eXR   n[        R
                  " X`R                  S9nUR                  UR                  S   5      n[        UR                  5       5      nU R                  UUUS9nUbK  SXSS2S4   R                  5       ) '   [        R                  " USS2S4   R                  SS	5      U/S	S
9nX4$ US4$ )ak  
This method is used to concatenate mel conditioner tokens at the front of the input_features in order to
control the type of MIDI token generated by the model.

Args:
    input_features (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
        input features extracted from the feature extractor.
    composer (`str`):
        composer token which determines the type of MIDI tokens to be generated.
    generation_config (`~generation.GenerationConfig`):
        The generation is used to get the composer-feature_token pair.
    attention_mask (``, *optional*):
        For batched generation `input_features` are padded to have the same shape across all examples.
        `attention_mask` helps to determine which areas were padded and which were not.
        - 1 for tokens that are **not padded**,
        - 0 for tokens that are **padded**.
zPlease choose a composer from z. Composer received - rl  r   )r  r  r  Nr'  r5   r   )axis)composer_to_feature_tokenkeysr>  r   r(   tensorr   repeatr   r   r   r  r   concatenater   )r,   r  r  r  r   r  composer_valuer  s           r0   get_mel_conditioner_outputs=Pop2PianoForConditionalGeneration.get_mel_conditioner_outputsJ  s#   0 %6$O$O!99;;06O6T6T6V1W0XXnownxy  3<n[[I'..~/C/CA/FG8??AB--"&- . 

 %;>N1a4055778 #..q!t0D0I0I"a0PR`/ahijN!11t##r2   rD  decoder_input_idsdecoder_attention_maskr  decoder_head_maskr  encoder_outputsrt  rk  decoder_inputs_embedslabelsr   r   rw  r  r   returnc                    Ub  UOU R                   R                  nUb  UOU R                   R                  nU
b  Ub  [        S5      eUb  U
c  Un
Uc  U R	                  UUU
UUUUS9nORU(       aK  [        U[        5      (       d6  [        US   [        U5      S:  a  US   OS[        U5      S:  a  US   OSS9nUS   nUb  Uc  Uc  U R                  U5      nU R                  UUUU	UUUUUUUUUS9nUS   nU R                   R                  (       a  UU R                  S	-  -  nU R                  U5      nSnUb@  [        S
S9nU" UR                  SUR                  S5      5      UR                  S5      5      nU(       d  U4USS -   U-   nUb  U4U-   $ U$ [!        UUUR"                  UR$                  UR&                  UR(                  UR*                  UR$                  UR&                  S9	$ )aX
  
input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
    Indices of input sequence tokens in the vocabulary. Pop2Piano is a model with relative position embeddings
    so you should be able to pad the inputs on both the right and the left. Indices can be obtained using
    [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and [`PreTrainedTokenizer.__call__`] for detail.
    [What are input IDs?](../glossary#input-ids) To know more on how to prepare `input_ids` for pretraining
    take a look a [Pop2Piano Training](./Pop2Piano#training).
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
    Indices of decoder input sequence tokens in the vocabulary. Indices can be obtained using
    [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and [`PreTrainedTokenizer.__call__`] for details.
    [What are decoder input IDs?](../glossary#decoder-input-ids) Pop2Piano uses the `pad_token_id` as the
    starting token for `decoder_input_ids` generation. If `past_key_values` is used, optionally only the last
    `decoder_input_ids` have to be input (see `past_key_values`). To know more on how to prepare
decoder_attention_mask (`torch.BoolTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
    Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also
    be used by default.
decoder_head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
    Mask to nullify selected heads of the self-attention modules in the decoder. Mask values selected in `[0,
    1]`:
    - 1 indicates the head is **not masked**,
    - 0 indicates the head is **masked**.
cross_attn_head_mask (`torch.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
    Mask to nullify selected heads of the cross-attention modules in the decoder. Mask values selected in
    `[0, 1]`:
    - 1 indicates the head is **not masked**,
    - 0 indicates the head is **masked**.
input_features (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
    Does the same task as `inputs_embeds`. If `inputs_embeds` is not present but `input_features` is present
    then `input_features` will be considered as `inputs_embeds`.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[-100, 0, ...,
    config.vocab_size - 1]`. All labels set to `-100` are ignored (masked), the loss is only computed for
    labels in `[0, ..., config.vocab_size]`
NzSBoth `inputs_embeds` and `input_features` received! Please provide only one of them)rD  r   rk  r  r   rw  r  r   r   r4   )rs  r?   ru  )rD  r   rk  rt  r  r  r  r  r   r   rw  r  r   r*  r;  )ignore_indexr5   )	losslogitsrt  decoder_hidden_statesdecoder_attentionsrv  encoder_last_hidden_stater  encoder_attentions)rM   r   rx  r>  r  r`   r   r   rF  r  r4  r  r)  r   r   ry  r   rt  r?   ru  rv  rs  )r,   rD  r   r  r  r  r  r  r  rt  rk  r  r  r  r   r   rw  r  r   r?   decoder_outputssequence_output	lm_logitsr  loss_fctoutputs                             r0   rA   )Pop2PianoForConditionalGeneration.forward{  sA   p "+!6IDKK<Q<Q	%0%<k$++B]B]$)Crss'M,A*M ""ll#-+#"3%9' + O O_!M!M-"1!"4474H14Loa0RV14_1E1I?1-tO (*"3";@U@] $ 1 1& 9 ,,'1/+"/#1'!5/!5#) ' 
  *!,;;** .1EFOLL1	'T:HINN2y~~b/ABFKKPROTD\OAB$77/IF)-)9TGf$EvE+;;"1"?"?.99,==&5&G&G"1"?"?.99

 
	
r2   c                   > Uc  U R                   nUR                  " S	0 UD6  [        US5      (       d  [        S5      e[	        UR
                  5      U R                  R                  :w  a9  [        SU R                  R                   S[	        UR
                  5       S35      eU R                  UUUUS9u  p[        TU ](  " S	SUUUS.UD6$ )
aV  
Generates token ids for midi outputs.

<Tip warning={true}>

Most generation-controlling parameters are set in `generation_config` which, if not passed, will be set to the
model's default generation configuration. You can override any `generation_config` by passing the corresponding
parameters to generate(), e.g. `.generate(inputs, num_beams=4, do_sample=True)`. For an overview of generation
strategies and code examples, check out the [following guide](./generation_strategies).

</Tip>

Parameters:
    input_features (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
        This is the featurized version of audio generated by `Pop2PianoFeatureExtractor`.
    attention_mask:
        For batched generation `input_features` are padded to have the same shape across all examples.
        `attention_mask` helps to determine which areas were padded and which were not.
        - 1 for tokens that are **not padded**,
        - 0 for tokens that are **padded**.
    composer (`str`, *optional*, defaults to `"composer1"`):
        This value is passed to `Pop2PianoConcatEmbeddingToMel` to generate different embeddings for each
        `"composer"`. Please make sure that the composet value is present in `composer_to_feature_token` in
        `generation_config`. For an example please see
        https://huggingface.co/sweetcocoa/pop2piano/blob/main/generation_config.json .
    generation_config (`~generation.GenerationConfig`, *optional*):
        The generation configuration to be used as base parametrization for the generation call. `**kwargs`
        passed to generate matching the attributes of `generation_config` will override them. If
        `generation_config` is not provided, the default will be used, which had the following loading
        priority: 1) from the `generation_config.json` model file, if it exists; 2) from the model
        configuration. Please note that unspecified parameters will inherit [`~generation.GenerationConfig`]'s
        default values, whose documentation should be checked to parameterize generation.
    kwargs:
        Ad hoc parametrization of `generate_config` and/or additional model-specific kwargs that will be
        forwarded to the `forward` function of the model. If the model is an encoder-decoder model, encoder
        specific kwargs should not be prefixed and decoder specific kwargs should be prefixed with *decoder_*.
Return:
    [`~utils.ModelOutput`] or `torch.LongTensor`: A [`~utils.ModelOutput`] (if `return_dict_in_generate=True`
    or when `config.return_dict_in_generate=True`) or a `torch.FloatTensor`.
        Since Pop2Piano is an encoder-decoder model (`model.config.is_encoder_decoder=True`), the possible
        [`~utils.ModelOutput`] types are:
            - [`~generation.GenerateEncoderDecoderOutput`],
            - [`~generation.GenerateBeamEncoderDecoderOutput`]
Nr  z`composer_to_feature_token` was not found! Please refer to https://huggingface.co/sweetcocoa/pop2piano/blob/main/generation_config.jsonand parse a dict like that.ztconfig.composer_vocab_size must be same as the number of keys in generation_config.composer_to_feature_token! Found z vs .)r  r   r  r  )inputsrk  r   r  rH  )r  r   r3  r>  r   r  rM   r  r  r%   generate)r,   r  r   r  r  r  r/   s         r0   r  *Pop2PianoForConditionalGeneration.generate  s    l $ $ 6 6  *6* (*EFF.   ::;t{{?^?^^889cBSBmBm>n=oopr  *.)I)I))/	 *J *
& w 
()/	

 
 	
r2   c                 $    U R                  U5      $ r_   )rF  )r,   r  s     r0   %prepare_decoder_input_ids_from_labelsGPop2PianoForConditionalGeneration.prepare_decoder_input_ids_from_labels]  s      ((r2   c           	         Uc  [         R                  S5        U$ SnU H  nSnU H2  nUUR                  SUR                  UR                  5      5      4-   nM4     US   R
                  US   R
                  :w  a,  [        SUS   R
                   SUS   R
                   S35      e[        U5      [        U5      :w  a$  [        S[        U5       S[        U5       S35      eX54-   nM     U$ )	NzHYou might want to consider setting `use_cache=True` to speed up decodingrH  r   z%reordered_layer_past_states[0] shape z  and layer_past_states[0] shape z mismatchedz&length of reordered_layer_past_states z! and length of layer_past_states )r   warningindex_selectr7   r   r   r>  r   )r,   rt  beam_idxreordered_decoder_pastlayer_past_statesreordered_layer_past_stateslayer_past_states          r0   _reorder_cache0Pop2PianoForConditionalGeneration._reorder_cache`  sf    "NNef""!#!0 +-'$5 .I$11!X[[AQAXAX5YZM /+ %6 +1-337H7K7Q7QQ ;<WXY<Z<`<`;a  bB  CT  UV  CW  C]  C]  B^  ^i  j  ./37H3II <SA\=]<^^  AD  EV  AW  @X  Xc  d  &<>\%\"' "1( &%r2   )rM   r  r  r)  r  r  r2  r_   )NNNNNNNNNNNNNNNNNN)N	composer1N)"rC   rD   rE   rF   _tied_weights_keysr   r&   ra  rf  r  r  r  r  r(   FloatTensorstrr   r   r  r   
LongTensor
BoolTensorra   r   r   r   r   rA   no_gradr  r  r  rG   rH   rI   s   @r0   r1  r1    s    j 6:
& 7;/$))/$ /$ ,	/$
 !!2!23/$b  156:8<=A159=7;@D@D596:=A-1$(,0/3&*59'E
E,,-E
 !!2!23E
 $E$4$45	E

 !))9)9 :E
 E--.E
 $E$5$56E
 'u||4E
 "%ell(;"<=E
 "%ell(;"<=E
   1 12E
 !!2!23E
  ((9(9:E
 ))*E
 D>E
  $D>!E
" 'tn#E
$ d^%E
& !!1!12'E
( 
uU&&'8	9)E
 E
N ]]_ W
 W
r)ELL )& &r2   r1  )Fr  r  r   typingr   r   r   r(   r   torch.nnr   transformers.generationr   activationsr
   cache_utilsr   r   r   
generationr   modeling_attn_mask_utilsr   modeling_outputsr   r   r   modeling_utilsr   pytorch_utilsr   r   r   utilsr   r   r   r   r   configuration_pop2pianor   !torch.nn.attention.flex_attentionr   integrations.flex_attentionr   
get_loggerrC   r   _load_pop2piano_layer_normapex.normalizationr    infoImportError	Exceptionr  Moduler"   r  rK   rf   rq   r~   r   r  r  r#  rR  r.  r1  __all__rH  r2   r0   <module>r9     s      ) )   % 4 ! C C ) > k k - g g w w 4  !!;J 
		H	%! 	/!&
KKhi+ +2 "%   . /RYY .")) <ryy &a aJ!")) !J#299 #NcRYY cL P! P! P!fp- pfBII  
d&(@/ d&
d&N /0J
KS*  	 	
NN^_	s   "G G+G+*G+