
    fThJ                    2   S r SSKrSSKrSSKJrJrJrJr  SSKrSSK	rSSKJ
r
  SSKJrJrJr  SSKJr  SSKJr  SS	KJrJr  SS
KJrJrJrJrJrJrJr  SSKJr  SSK J!r!J"r"  SSK#J$r$  \"RJ                  " \&5      r'S\RP                  S\)S\)4S jr* " S S\
RV                  5      r, " S S\
RZ                  5      r. " S S\
RZ                  5      r/ " S S\
RZ                  5      r0 " S S\
RZ                  5      r1 " S S\
RZ                  5      r2\! " S S \5      5       r3 " S! S"\35      r4 " S# S$\35      r5\! " S% S&\35      5       r6\!" S'S(9 " S) S*\3\5      5       r7\!" S+S(9 " S, S-\35      5       r8\! " S. S/\35      5       r9 " S0 S1\35      r: " S2 S3\3\5      r;/ S4Qr<g)5zPyTorch MVP model.    N)ListOptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)GenerationMixin)_prepare_4d_attention_mask!_prepare_4d_causal_attention_mask)BaseModelOutput)BaseModelOutputWithPastAndCrossAttentions!CausalLMOutputWithCrossAttentionsSeq2SeqLMOutputSeq2SeqModelOutput#Seq2SeqQuestionAnsweringModelOutputSeq2SeqSequenceClassifierOutput)PreTrainedModel)auto_docstringlogging   )	MvpConfig	input_idspad_token_iddecoder_start_token_idc                     U R                  U R                  5      nU SS2SS24   R                  5       USS2SS24'   X#SS2S4'   Uc  [        S5      eUR	                  US:H  U5        U$ )z)
Shift input ids one token to the right.
Nr   r   z1self.model.config.pad_token_id has to be defined.i)	new_zerosshapeclone
ValueErrormasked_fill_)r   r   r   shifted_input_idss       \/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/mvp/modeling_mvp.pyshift_tokens_rightr(   2   sz     "++IOO<(CRC0668ae4adLMM""#4#<lK    c                      ^  \ rS rSrSrS\S\4U 4S jjrSS\R                  S\S\R                  4U 4S	 jjjr	S
r
U =r$ )MvpLearnedPositionalEmbeddingC   zF
This module learns positional embeddings up to a fixed maximum size.
num_embeddingsembedding_dimc                 L   > SU l         [        TU ]	  XR                   -   U5        g N   )offsetsuper__init__)selfr-   r.   	__class__s      r'   r4   &MvpLearnedPositionalEmbedding.__init__H   s"     ++5}Er)   r   past_key_values_lengthposition_idsc                   > Uc]  UR                   SS u  pE[        R                  " X"U-   [        R                  U R                  R
                  S9R                  US5      nOUR                  S5      n[        TU ]%  X0R                  -   5      $ )z3`input_ids' shape is expected to be [bsz x seqlen].Nr1   )dtypedevicer    r   )r"   torcharangelongweightr<   expand	unsqueezer3   forwardr2   )r5   r   r8   r9   bszseq_lenr6   s         r'   rC   %MvpLearnedPositionalEmbedding.forwardN   s     $??2A.LC <<&(HPUPZPZcgcncncucufS"o  (11!4Lw|kk9::r)   )r2   r   N)__name__
__module____qualname____firstlineno____doc__intr4   r=   TensorrC   __static_attributes____classcell__r6   s   @r'   r+   r+   C   sH    Fs F3 F; ;s ;^c^j^j ; ;r)   r+   c                     ^  \ rS rSrSr   SS\S\S\S\S\4
U 4S jjjrS	\	R                  S
\S\4S jr      SS\	R                  S\\	R                     S\\\	R                        S\\	R                     S\\	R                     S\\	R                     S\S\\	R                  \\	R                     \\\	R                        4   4S jjrSrU =r$ )MvpAttention\   z=Multi-headed attention from 'Attention Is All You Need' paper	embed_dim	num_headsdropout
is_decoderbiasc                   > [         TU ]  5         Xl        X l        X0l        X-  U l        U R
                  U-  U R                  :w  a  [        SU R                   SU S35      eU R
                  S-  U l        X@l        [        R                  " XUS9U l        [        R                  " XUS9U l        [        R                  " XUS9U l        [        R                  " XUS9U l        g )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: z).g      ࿩rY   )r3   r4   rU   rV   rW   head_dimr$   scalingrX   r   Lineark_projv_projq_projout_proj)r5   rU   rV   rW   rX   rY   r6   s         r'   r4   MvpAttention.__init___   s     	""!.MMI%$..8MdnnM]$YKr3  }}d*$ii	4@ii	4@ii	4@		)TBr)   tensorrE   rD   c                     UR                  X2U R                  U R                  5      R                  SS5      R	                  5       $ )Nr   r1   )viewrV   r\   	transpose
contiguous)r5   rd   rE   rD   s       r'   _shapeMvpAttention._shapez   s5    {{3GQQRSUVWbbddr)   hidden_stateskey_value_statespast_key_valueattention_masklayer_head_maskattn_promptoutput_attentionsreturnc                 ,   USLnUR                  5       u  pnU R                  U5      U R                  -  nU(       a  Ub  US   nUS   nGOU(       aE  U R                  U R	                  U5      SU	5      nU R                  U R                  U5      SU	5      nOUby  U R                  U R	                  U5      SU	5      nU R                  U R                  U5      SU	5      n[        R                  " US   U/SS9n[        R                  " US   U/SS9nODU R                  U R	                  U5      SU	5      nU R                  U R                  U5      SU	5      nU R                  (       a  X4nUb  [        R                  " US   R                  U	SSS5      U/SS9n[        R                  " US   R                  U	SSS5      U/SS9nUbY  [        R                  " U	SXS   R                  S5      5      R                  UR                  5      n[        R                  " X/SS9nXR                  -  SU R                  4nU R                  XU	5      R                  " U6 nUR                  " U6 nUR                  " U6 nUR                  S5      n[        R                   " XR#                  SS5      5      nUR                  5       XR                  -  U
U4:w  a.  [%        SXR                  -  U
U4 SUR                  5        35      eUbz  UR                  5       U	SU
U4:w  a#  [%        S	U	SU
U4 SUR                  5        35      eUR                  XR                  U
U5      U-   nUR                  XR                  -  U
U5      n[&        R(                  R+                  USS9nUb  UR                  5       U R                  4:w  a*  [%        S
U R                  4 SUR                  5        35      eUR                  SSSS5      UR                  XR                  U
U5      -  nUR                  XR                  -  U
U5      nU(       a=  UR                  XR                  U
U5      nUR                  XR                  -  U
U5      nOSn[&        R(                  R-                  UU R,                  U R.                  S9n[        R                   " UU5      nUR                  5       XR                  -  XR                  4:w  a5  [%        SXR                  XR                  4 SUR                  5        35      eUR                  XR                  XR                  5      nUR#                  SS5      nUR1                  XU R2                  5      nU R5                  U5      nUUU4$ )z#Input shape: Batch x Time x ChannelNr   r   r    r1   dimz$Attention weights should be of size z	, but is z!Attention mask should be of size z/Head mask for a single layer should be of size ptrainingz `attn_output` should be of size )sizera   r]   ri   r_   r`   r=   catrX   rA   zerostor<   rV   r\   rf   bmmrg   r$   r   
functionalsoftmaxrW   rx   reshaperU   rb   )r5   rk   rl   rm   rn   ro   rp   rq   is_cross_attentionrD   tgt_len_query_states
key_statesvalue_statesprompt_mask
proj_shapesrc_lenattn_weightsattn_weights_reshaped
attn_probsattn_outputs                         r'   rC   MvpAttention.forward}   sM    .T9',,.a {{=1DLL@."<'*J)!,LT[[1A%BBLJ;;t{{3C'Db#NL'T[[%?SIJ;;t{{='A2sKLN1$5z#BJJ 99nQ&7%FANL T[[%?SIJ;;t{{='A2sKL?? )7N"KN$9$9#r2r$JJ#W]^_J 99k!n&;&;CR&Ll%[abcL)#kk#q'q>;N;Nq;QRUUVdVkVkl!&K+Hr!SNN*B>
{{<#>CCZP__j1
#((*5//!$yy/C/CAq/IJ3#7'"JJ6nn8LgW^7_6` a %%'(* 
 %""$a'(BB 7a'8R7SS\]k]p]p]r\st  (,,S..'7SVddL',,S>>-A7GTL}},,\r,B&##%$..):: Et~~FWEX Y',,./1  +//2q!<|?P?PQTVdVdfmov?wwL',,S>>-A7GTL
 %1$5$5c>>7T[$\!055cNN6JGU\]L$(!]]**<4<<RVR_R_*`
ii
L9#"6!OO2CR_R_3`2a b$$&') 
 "&&sNNG]]S!++Aq1 "))#GmmK01>AAr)   )
rW   rU   r\   rX   r_   rV   rb   ra   r]   r`   )        FT)NNNNNF)rH   rI   rJ   rK   rL   rM   floatboolr4   r=   rN   ri   r   r   rC   rO   rP   rQ   s   @r'   rS   rS   \   sQ   G  CC C 	C
 C C C6eU\\ eC ec e 488<1526.2"'wB||wB #5<<0wB !u||!45	wB
 !.wB "%,,/wB ell+wB  wB 
u||Xell3XeELL>Q5RR	SwB wBr)   rS   c                      ^  \ rS rSrS\4U 4S jjr SS\R                  S\R                  S\R                  S\R                  S\\	   S	\
\R                  \\R                     4   4S
 jjrSrU =r$ )MvpEncoderLayer   configc                 h  > [         TU ]  5         UR                  U l        [	        U R                  UR
                  UR                  S9U l        [        R                  " U R                  5      U l
        UR                  U l        [        UR                     U l        UR                  U l        [        R                   " U R                  UR"                  5      U l        [        R                   " UR"                  U R                  5      U l        [        R                  " U R                  5      U l        g )N)rU   rV   rW   )r3   r4   d_modelrU   rS   encoder_attention_headsattention_dropout	self_attnr   	LayerNormself_attn_layer_normrW   r   activation_functionactivation_fnactivation_dropoutr^   encoder_ffn_dimfc1fc2final_layer_normr5   r   r6   s     r'   r4   MvpEncoderLayer.__init__   s    %nn44,,

 %'LL$@!~~#F$>$>?"(";";99T^^V-C-CD99V33T^^D "T^^ <r)   rk   rn   ro   self_attn_promptrq   rr   c                    UnU R                  UUUUUS9u  pn[        R                  R                  XR                  U R                  S9nXa-   nU R                  U5      nUnU R                  U R                  U5      5      n[        R                  R                  XR                  U R                  S9nU R                  U5      n[        R                  R                  XR                  U R                  S9nXa-   nU R                  U5      nUR                  [        R                  :X  a  [        R                  " U5      R                  5       (       d)  [        R                   " U5      R                  5       (       aC  [        R"                  " UR                  5      R$                  S-
  n	[        R&                  " X* U	S9nU4n
U(       a  X4-  n
U
$ )a  
Args:
    hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
    attention_mask (`torch.FloatTensor`): attention mask of size
        `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
    layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
        `(encoder_attention_heads,)`.
    self_attn_prompt (`torch.FloatTensor`): prompt of self attention of shape
        `(2, encoder_attention_heads, pro_len, head_dim)`.
    output_attentions (`bool`, *optional*):
        Whether or not to return the attentions tensors of all attention layers. See `attentions` under
        returned tensors for more detail.
)rk   rn   ro   rp   rq   rv     )minmax)r   r   r~   rW   rx   r   r   r   r   r   r   r;   r=   float16isinfanyisnanfinfor   clamp)r5   rk   rn   ro   r   rq   residualr   r   clamp_valueoutputss              r'   rC   MvpEncoderLayer.forward  s   * !)-')+(/ *8 *
&Q --m||VZVcVc-d 011-@ **488M+BC--m?V?Vaeanan-o/--m||VZVcVc-d 0--m<%--/KK&**,,M0J0N0N0P0P++m&9&9:>>EK!KK<[YM "&Gr)   )	r   r   rW   rU   r   r   r   r   r   )F)rH   rI   rJ   rK   r   r4   r=   FloatTensorr   r   r   rC   rO   rP   rQ   s   @r'   r   r      s    =y =, -24((4 ))4 **	4
  ++4 $D>4 
u  (5+<+<"==	>4 4r)   r   c                     ^  \ rS rSrS\4U 4S jjr          SS\R                  S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\R                     S\\	\R                        S\\
   S\\
   S\	\R                  \\	\R                  \R                  4      4   4S jjrSrU =r$ )MvpDecoderLayeri?  r   c                   > [         TU ]  5         UR                  U l        [	        U R                  UR
                  UR                  SS9U l        UR                  U l        [        UR                     U l        UR                  U l        [        R                  " U R                  5      U l        [	        U R                  UR
                  UR                  SS9U l        [        R                  " U R                  5      U l        [        R$                  " U R                  UR&                  5      U l        [        R$                  " UR&                  U R                  5      U l        [        R                  " U R                  5      U l        g )NT)rU   rV   rW   rX   )rW   rX   )r3   r4   r   rU   rS   decoder_attention_headsr   r   rW   r   r   r   r   r   r   r   encoder_attnencoder_attn_layer_normr^   decoder_ffn_dimr   r   r   r   s     r'   r4   MvpDecoderLayer.__init__@  s   %nn44,,	
 ~~#F$>$>?"(";";$&LL$@!(NN**,,	
 (*||DNN'C$99T^^V-C-CD99V33T^^D "T^^ <r)   rk   rn   encoder_hidden_statesencoder_attention_maskro   cross_attn_layer_head_maskr   cross_attn_promptrm   rq   	use_cacherr   c           
      ^   UnU	b  U	SS OSnU R                  UUUUUU
S9u  pn[        R                  R                  XR                  U R                  S9nX-   nU R                  U5      nSnSnUbq  UnU	b  U	SS OSnU R                  UUUUUUU
S9u  nnn[        R                  R                  XR                  U R                  S9nX-   nU R                  U5      nUU-   nUnU R                  U R                  U5      5      n[        R                  R                  XR                  U R                  S9nU R                  U5      n[        R                  R                  XR                  U R                  S9nX-   nU R                  U5      nU4nU
(       a  UUU4-  nU(       a  UU4-  nU$ )a  
Args:
    hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
    attention_mask (`torch.FloatTensor`): attention mask of size
        `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
    encoder_hidden_states (`torch.FloatTensor`):
        cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
    encoder_attention_mask (`torch.FloatTensor`): encoder attention mask of size
        `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
    layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
        `(encoder_attention_heads,)`.
    cross_attn_layer_head_mask (`torch.FloatTensor`): mask for cross-attention heads in a given layer of
        size `(decoder_attention_heads,)`.
    self_attn_prompt (`torch.FloatTensor`): prompt of self attention of shape
        `(2, decoder_attention_heads, pro_len, head_dim)`.
    cross_attn_prompt (`torch.FloatTensor`): prompt of cross attention of shape
        `(2, decoder_attention_heads, pro_len, head_dim)`.
    past_key_value (`Tuple(torch.FloatTensor)`): cached past key and value projection states
    output_attentions (`bool`, *optional*):
        Whether or not to return the attentions tensors of all attention layers. See `attentions` under
        returned tensors for more detail.
Nr1   )rk   rm   rn   ro   rp   rq   rv   )rk   rl   rn   ro   rp   rm   rq   )r   r   r~   rW   rx   r   r   r   r   r   r   r   r   )r5   rk   rn   r   r   ro   r   r   r   rm   rq   r   r   self_attn_past_key_valueself_attn_weightspresent_key_valuecross_attn_present_key_valuecross_attn_weightscross_attn_past_key_valuer   s                       r'   rC   MvpDecoderLayer.forwardZ  s   H ! :H9S>"1#5Y] >Bnn'3)+(/ ?M ?
;*; --m||VZVcVc-d 011-@ (,$! ,$H @N?Yrs(;_c%NRN_N_+!65 :-8"3 O` OKM-/K MM11-<<Z^ZgZg1hM$4M 88GM !24P P !**488M+BC--m?V?Vaeanan-o/--m||VZVcVc-d 0--m< ")+=>>G)++Gr)   )r   r   rW   rU   r   r   r   r   r   r   r   )
NNNNNNNNFT)rH   rI   rJ   rK   r   r4   r=   rN   r   r   r   r   rC   rO   rP   rQ   s   @r'   r   r   ?  sA   =y =: 268<9=26=A37488<,1$(_||_ !._  (5	_
 !) 6_ "%,,/_ %-U\\$:_ #5<<0_ $ELL1_ !u||!45_ $D>_ D>_ 
u  (51B1BEDUDU1U+V"WW	X_ _r)   r   c                   z   ^  \ rS rSrSrS\S\S\S\4U 4S jjrS\R                  S	\R                  4S
 jr
SrU =r$ )MvpClassificationHeadi  z-Head for sentence-level classification tasks.	input_dim	inner_dimnum_classespooler_dropoutc                    > [         TU ]  5         [        R                  " X5      U l        [        R
                  " US9U l        [        R                  " X#5      U l        g )Nrw   )r3   r4   r   r^   denseDropoutrW   rb   )r5   r   r   r   r   r6   s        r'   r4   MvpClassificationHead.__init__  s@     	YYy4
zzN3		)9r)   rk   rr   c                     U R                  U5      nU R                  U5      n[        R                  " U5      nU R                  U5      nU R	                  U5      nU$ N)rW   r   r=   tanhrb   )r5   rk   s     r'   rC   MvpClassificationHead.forward  sN    ]3

=1

=1]3m4r)   )r   rW   rb   )rH   rI   rJ   rK   rL   rM   r   r4   r=   rN   rC   rO   rP   rQ   s   @r'   r   r     sQ    7
:
: 
: 	
:
 
:U\\ ell  r)   r   c                   l   ^  \ rS rSrSrU 4S jrS\R                  S\\R                     4S jr	Sr
U =r$ )	MvpPrompti  z)Layer-wise prompt for encoder or decoder.c           	      :  > [         TU ]  5         UR                  U l        X l        X0l        UR
                  U-  U l        [        R                  " UR                  S9U l	        [        R                  " UR                  UR
                  5      U l        [        R                  " [        R                  " UR
                  UR                  5      [        R                  " 5       [        R                  " UR                  US-  UR
                  -  5      5      U l        g )Nr   r1   )r3   r4   prompt_length
num_layersrV   r   r\   r   r   rW   	Embeddingprompt_embedding
Sequentialr^   prompt_mid_dimGELUprompt_trans)r5   r   r   rV   r6   s       r'   r4   MvpPrompt.__init__  s    #11$")3zzFNN3 "V-A-A6>> RMMIIfnnf&;&;<GGIIIf++Z!^fnn-LM
r)   
prompt_idsrr   c                 *   U R                  U R                  U5      5      nUR                  U R                  U R                  S-  U R
                  U R                  5      nU R                  U5      nUR                  / SQ5      R                  S5      nU$ )Nr1   )r   r1   r   r   )
r   r   rf   r   r   rV   r\   rW   permutesplit)r5   r   prompts      r'   rC   MvpPrompt.forward  sw    ""4#8#8#DET//11DdnnVZVcVcdf%-33A6r)   )rW   r\   rV   r   r   r   r   )rH   rI   rJ   rK   rL   r4   r=   rN   r   rC   rO   rP   rQ   s   @r'   r   r     s0    3
%,, 53F  r)   r   c                   6    \ rS rSr\rSrSrS r\	S 5       r
Srg)MvpPreTrainedModeli  modelTc                 "   U R                   R                  n[        U[        R                  5      (       aW  UR
                  R                  R                  SUS9  UR                  b%  UR                  R                  R                  5         g g [        U[        R                  5      (       ad  UR
                  R                  R                  SUS9  UR                  b2  UR
                  R                  UR                     R                  5         g g g )Nr   )meanstd)r   init_std
isinstancer   r^   r@   datanormal_rY   zero_r   padding_idx)r5   moduler   s      r'   _init_weights MvpPreTrainedModel._init_weights  s    kk""fbii((MM&&CS&9{{&  &&( '--MM&&CS&9!!-""6#5#56<<> . .r)   c                     U R                   R                  n[        R                  " / SQSSSSU//U R                  S9nUR                  U5      US.nU$ )N)r      
      r1   r         r1   r<   )rn   r   )r   r   r=   rd   r<   ne)r5   	pad_tokenr   dummy_inputss       r'   r  MvpPreTrainedModel.dummy_inputs  sW    KK,,	LL"2Q2q)4L!MVZVaVab	'll95"
 r)    N)rH   rI   rJ   rK   r   config_classbase_model_prefixsupports_gradient_checkpointingr   propertyr  rO   r  r)   r'   r   r     s,    L&*#	?  r)   r   c                   .  ^  \ rS rSrSr SS\S\\R                     S\\	   4U 4S jjjr
S rS r       SS	\\R                     S
\\R                     S\\R                     S\\R                      S\\	   S\\	   S\\	   S\\\4   4S jjrSrU =r$ )
MvpEncoderi
  z
Transformer encoder consisting of *config.encoder_layers* self attention layers. Each layer is a
[`MvpEncoderLayer`].

Args:
    config: MvpConfig
    embed_tokens (nn.Embedding): output embedding
    use_prompt (bool): whether to use prompt
r   embed_tokens
use_promptc                 P  > [         TU ]  U5        UR                  U l        UR                  U l        UR
                  nUR                  U l        UR                  U l	        UR                  (       a  [        R                  " U5      OSU l        Ub  X l        O0[        R                   " UR"                  X@R                  5      U l        [%        UR                  U5      U l        [        R(                  " [+        UR,                  5       Vs/ s H  n[/        U5      PM     sn5      U l        [        R2                  " U5      U l        X0l        U(       a7  UR8                  U l        [;        UUR,                  UR<                  5      U l        SU l         U RC                  5         g s  snf Ng      ?F)"r3   r4   rW   encoder_layerdrop	layerdropr   r   r   max_position_embeddingsmax_source_positionsscale_embeddingmathsqrtembed_scaler  r   r   
vocab_sizer+   embed_positions
ModuleListrangeencoder_layersr   layersr   layernorm_embeddingr  r   r   r   r   gradient_checkpointing	post_init)r5   r   r  r  rU   r   r6   s         r'   r4   MvpEncoder.__init__  sD    	 ~~11NN	!..$*$B$B!393I3I499Y/s# , "V->->	K[K[ \D<** 
 mmeFLaLaFb$cFb_V%<Fb$cd#%<<	#: $!'!5!5D$-%%..%D! ',# %ds   F#c                     U R                   $ r   r  r5   s    r'   get_input_embeddingsMvpEncoder.get_input_embeddings;         r)   c                     Xl         g r   r%  r5   values     r'   set_input_embeddingsMvpEncoder.set_input_embeddings>      !r)   r   rn   	head_maskinputs_embedsrq   output_hidden_statesreturn_dictrr   c           	         Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUb  Ub  [	        S5      eUb$  UnUR
                  n	UR                  SU	S   5      nO.Ub   UR                  5       SS n	USS2SS2S4   nO[	        S5      eUc  U R                  U5      U R                  -  nU R                  U5      n
XJ-   nU R                  U5      n[        R                  R                  XR                  U R                  S9nU R                   (       aJ  ["        R$                  " U R&                  5      R)                  U R*                  5      nU R-                  U5      nUb  [/        X$R0                  5      nU(       a  SOSnU(       a  SOSnUb`  UR                  5       S   [3        U R4                  5      :w  a6  [	        S[3        U R4                  5       S	UR                  5       S    S
35      e[7        U R4                  5       H  u  nnU(       a  X4-   nSnU R                  (       a(  ["        R8                  " / 5      nUU R:                  :  a  SnU(       a  SnOU R<                  (       aP  U R                  (       a?  U R?                  UR@                  UUUb  UU   OSU R                   (       a  WU   OSU5      nO(U" UUUb  UU   OSU R                   (       a  WU   OSUS9nUS   nU(       d  M  UUS   4-   nM     U(       a  X4-   nU(       d  [C        S XU4 5       5      $ [E        XUS9$ )a  
Args:
    input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
        Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you
        provide it.

        Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
        [`PreTrainedTokenizer.__call__`] for details.

        [What are input IDs?](../glossary#input-ids)
    attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
        Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

        - 1 for tokens that are **not masked**,
        - 0 for tokens that are **masked**.

        [What are attention masks?](../glossary#attention-mask)
    head_mask (`torch.Tensor` of shape `(encoder_layers, encoder_attention_heads)`, *optional*):
        Mask to nullify selected heads of the attention modules. Mask values selected in `[0, 1]`:

        - 1 indicates the head is **not masked**,
        - 0 indicates the head is **masked**.

    inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
        Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
        This is useful if you want more control over how to convert `input_ids` indices into associated vectors
        than the model's internal embedding lookup matrix.
    output_attentions (`bool`, *optional*):
        Whether or not to return the attentions tensors of all attention layers. See `attentions` under
        returned tensors for more detail.
    output_hidden_states (`bool`, *optional*):
        Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
        for more detail.
    return_dict (`bool`, *optional*):
        Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
NzDYou cannot specify both input_ids and inputs_embeds at the same timer    z5You have to specify either input_ids or inputs_embedsrv   r  r   z&The head_mask should be specified for  layers, but it is for .FT)NN)ro   r   rq   r   c              3   .   #    U  H  oc  M  Uv   M     g 7fr   r  .0vs     r'   	<genexpr>%MvpEncoder.forward.<locals>.<genexpr>  s     e$Sq$Ss   	last_hidden_staterk   
attentions)#r   rq   r2  use_return_dictr$   r"   rf   ry   r  r  r  r   r   r~   rW   rx   r  r=   r>   r   r|   r<   r   r   r;   lenr  	enumeraterandr  r!  _gradient_checkpointing_func__call__tupler   )r5   r   rn   r0  r1  rq   r2  r3  inputinput_shape	embed_posrk   r   r   encoder_statesall_attentionsidxencoder_layerto_dropdropout_probabilitylayer_outputss                        r'   rC   MvpEncoder.forwardA  sD   \ 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]  ]%>cdd"E++K!r;r?;I&',,.s3K!!Q(+ETUU  --i84;K;KKM((/	%100?--m||VZVcVc-d ??d&8&89<<T[[IJ#44Z@ %7H[H[\N30d  ~~"s4;;'78 <S=M<N O!(+,A/ 
 #,DKK"8C#!/2B!BG}}&+jjn#&7"G ,..4==$($E$E%..%&+4+@3d26//)#.t)%M %2%&;D;P3VZCG??*:3*?X\*;%M !.a 0  !/=3C2E!EE #9H  +.>>Ne]N$Seee+Vd
 	
r)   )rW   r  r  r  r!  r  r   r  r  r   r   r   r  NF)NNNNNNN)rH   rI   rJ   rK   rL   r   r   r   r   r   r4   r'  r-  r=   
LongTensorrN   r   r   r   r   rC   rO   rP   rQ   s   @r'   r  r  
  s    lq$$/7/E$ZbcgZh$ $L!"
 1515,059,0/3&*J
E,,-J
 !.J
 ELL)	J

   1 12J
 $D>J
 'tnJ
 d^J
 
uo%	&J
 J
r)   r  c                     ^  \ rS rSrSr SS\S\\R                     S\\	   4U 4S jjjr
S rS r            SS	\\R                     S
\\R                     S\\R                      S\\R                     S\\R                     S\\R                     S\\\R                         S\\R                      S\\	   S\\	   S\\	   S\\	   S\\\4   4S jjrSrU =r$ )
MvpDecoderi  z
Transformer decoder consisting of *config.decoder_layers* layers. Each layer is a [`MvpDecoderLayer`]

Args:
    config: MvpConfig
    embed_tokens (nn.Embedding): output embedding
    use_prompt (bool): whether to use prompt
r   r  r  c                   > [         TU ]  U5        UR                  U l        UR                  U l        UR
                  U l        UR                  U l        UR                  (       a   [        R                  " UR                  5      OSU l        Ub  X l        O;[        R                   " UR"                  UR                  U R                  5      U l        [%        UR                  UR                  5      U l        [        R(                  " [+        UR,                  5       Vs/ s H  n[/        U5      PM     sn5      U l        [        R2                  " UR                  5      U l        X0l        U(       a]  UR8                  U l        [;        UUR,                  UR<                  5      U l        [;        UUR,                  UR<                  5      U l         SU l!        U RE                  5         g s  snf r  )#r3   r4   rW   decoder_layerdropr  r   r   r  max_target_positionsr  r  r  r   r  r  r   r   r  r+   r  r  r  decoder_layersr   r  r   r   r  r   r   r   r   r   r!  r"  )r5   r   r  r  r   r6   s        r'   r4   MvpDecoder.__init__  ss    	 ~~11!..$*$B$B!8>8N8N499V^^4TW# , "V->->PTP`P` aD<**NN 
 mmeFLaLaFb$cFb_V%<Fb$cd#%<<#? $!'!5!5D$-%%..%D!
 &/%%..&D" ',#' %ds   G&c                     U R                   $ r   r%  r&  s    r'   r'  MvpDecoder.get_input_embeddings   r)  r)   c                     Xl         g r   r%  r+  s     r'   r-  MvpDecoder.set_input_embeddings  r/  r)   r   rn   r   r   r0  cross_attn_head_maskpast_key_valuesr1  r   rq   r2  r3  rr   c                 d	   U
b  U
OU R                   R                  n
Ub  UOU R                   R                  nU	b  U	OU R                   R                  n	Ub  UOU R                   R                  nUb  Ub  [        S5      eUb$  UnUR                  nUR                  SUS   5      nO.Ub   UR                  5       SS nUSS2SS2S4   nO[        S5      eUb  US   S   R                  S   OSnUc  U R                  U5      U R                  -  n[        X.X5      nUb  Ub  [        XHR                  US   S9nU R                  X5      nUU-   nU R                  U5      n[         R"                  R%                  UU R$                  U R&                  S9nU R(                  (       a[  [*        R,                  " U R.                  5      R1                  U R2                  5      nU R5                  U5      nU R7                  U5      nU R8                  (       a/  U R&                  (       a  U	(       a  [:        R=                  S	5        S
n	U(       a  SOSnU
(       a  SOSnU
(       a  Ub  SOSnU	(       a  SOSn[?        XV/SS/5       Hn  u  nnUc  M  UR                  5       S   [A        U RB                  5      :w  d  M7  [        SU S[A        U RB                  5       SUR                  5       S    S35      e   [E        U RB                  5       GHh  u  nnU(       a  UU4-  nU R&                  (       a(  [*        RF                  " / 5      nUU RH                  :  a  MM  Ub  UU   OSnU R8                  (       at  U R&                  (       ac  U RK                  URL                  UUUUUb  UU   OSUb  UU   OSU R(                  (       a  WU   OSU R(                  (       a  WU   OSSU
U	5      nOLU" UUUUUb  UU   OSUb  UU   OSU R(                  (       a  WU   OSU R(                  (       a  WU   OSUU
U	S9nUS   nU	(       a  UUU
(       a  SOS   4-  nU
(       d  GMP  UUS   4-  nUc  GM_  UUS   4-  nGMk     U(       a  UU4-  nU	(       a  UOSn U(       d  [O        S UU UUU4 5       5      $ [Q        UU UUUS9$ )ak  
Args:
    input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
        Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you
        provide it.

        Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
        [`PreTrainedTokenizer.__call__`] for details.

        [What are input IDs?](../glossary#input-ids)
    attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
        Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

        - 1 for tokens that are **not masked**,
        - 0 for tokens that are **masked**.

        [What are attention masks?](../glossary#attention-mask)
    encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, encoder_sequence_length, hidden_size)`, *optional*):
        Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention
        of the decoder.
    encoder_attention_mask (`torch.LongTensor` of shape `(batch_size, encoder_sequence_length)`, *optional*):
        Mask to avoid performing cross-attention on padding tokens indices of encoder input_ids. Mask values
        selected in `[0, 1]`:

        - 1 for tokens that are **not masked**,
        - 0 for tokens that are **masked**.

        [What are attention masks?](../glossary#attention-mask)
    head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*):
        Mask to nullify selected heads of the attention modules. Mask values selected in `[0, 1]`:

        - 1 indicates the head is **not masked**,
        - 0 indicates the head is **masked**.

    cross_attn_head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*):
        Mask to nullify selected heads of the cross-attention modules in the decoder to avoid performing
        cross-attention on hidden heads. Mask values selected in `[0, 1]`:

        - 1 indicates the head is **not masked**,
        - 0 indicates the head is **masked**.

    past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
        Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of
        shape `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of
        shape `(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`.

        Contains pre-computed hidden-states (key and values in the self-attention blocks and in the
        cross-attention blocks) that can be used (see `past_key_values` input) to speed up sequential decoding.

        If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
        that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
        all `decoder_input_ids` of shape `(batch_size, sequence_length)`.
    inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
        Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
        This is useful if you want more control over how to convert `input_ids` indices into associated vectors
        than the model's internal embedding lookup matrix.
    output_attentions (`bool`, *optional*):
        Whether or not to return the attentions tensors of all attention layers. See `attentions` under
        returned tensors for more detail.
    output_hidden_states (`bool`, *optional*):
        Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
        for more detail.
    return_dict (`bool`, *optional*):
        Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
NzTYou cannot specify both decoder_input_ids and decoder_inputs_embeds at the same timer    zEYou have to specify either decoder_input_ids or decoder_inputs_embedsr   r1   )r   rv   zZ`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...Fr  r0  r_  zThe `z` should be specified for r5  r6  )
rn   r   r   ro   r   r   r   rm   rq   r   r   r   c              3   0   #    U  H  nUc  M  Uv   M     g 7fr   r  r8  s     r'   r;  %MvpDecoder.forward.<locals>.<genexpr>  s      mA ms   	)r>  r`  rk   r?  cross_attentions))r   rq   r2  r   r@  r$   r"   rf   ry   r  r  r   r   r;   r  r   r   r~   rW   rx   r  r=   r>   r   r|   r<   r   r   r!  loggerwarning_onceziprA  r  rB  rC  r  rD  rE  rF  r   )!r5   r   rn   r   r   r0  r_  r`  r1  r   rq   r2  r3  rG  rH  r8   	positionsrk   r   r   r   all_hidden_statesall_self_attnsall_cross_attentionsnext_decoder_cache	attn_mask	mask_namerL  decoder_layerrO  rm   rP  
next_caches!                                    r'   rC   MvpDecoder.forward  s   ` 2C1N-TXT_T_TqTq$8$D $++JjJj 	 "+!6IDKK<Q<Q	%0%<k$++B]B]  ]%>stt"E#//K!r;r?;I&',,.s3K!!Q(+Edee DSC^!3A!6!<!<Q!?de  --i84;K;KKM:

 !,1G1S%?&(;(;[QS_&"
 ((G	%	100?--mt||VZVcVc-d ??d&8&89<<T[[IJ#44Z@ $ 6 6z B&&4==##p "	 #7BD0d&7<Q<]rdh#,R$ %((IKYoKp$q Iy$>>#A&3t{{+;<$	{*DSEUDV W%NN,Q/03  %r #,DKK"8C#!m%55!}}&+jjn#&75D5P_S1VZN**t}} $ A A!**!")*&/&;IcN1E1Q(-W[-1__$S)$.2oo%c*4%! !.!#1*?+A7@7LYs^RV5I5U,S1[_?C&6s&;TXAE'8'=VZ#1&7'! *!,M"}:KQQR'S&UU"  =#3"55(4(]1-=,??(g #9l  -!11+4'$
 '5FXlm  
 9+&+%1
 	
r)   )r   rW   r  r  r  r!  r  r   r  rX  r   r   r   r  rR  )NNNNNNNNNNNN)rH   rI   rJ   rK   rL   r   r   r   r   r   r4   r'  r-  r=   rS  rN   r   r   r   r   r   rC   rO   rP   rQ   s   @r'   rU  rU    s|    lq&&/7/E&ZbcgZh& &P!"
 1515=A=A,07;=A59$(,0/3&*_
E,,-_
 !._
  ((9(9:	_

 !))9)9 :_
 ELL)_
 'u||4_
 "$u'8'8"9:_
   1 12_
 D>_
 $D>_
 'tn_
 d^_
 
u??	@_
 _
r)   rU  c            $         ^  \ rS rSrS/rSS/rS\4U 4S jjrS rS r	S	 r
S
 rS r\               SS\\R                      S\\R"                     S\\R                      S\\R                      S\\R"                     S\\R"                     S\\R"                     S\\\R&                        S\\\R&                        S\\R&                     S\\R&                     S\\   S\\   S\\   S\\   S\\\4   4 S jj5       rSrU =r$ )MvpModelr   final_logits_biasencoder.embed_tokens.weightdecoder.embed_tokens.weightr   c                 t  > [         TU ]  U5        UR                  UR                  p2UR                  U l        [
        R                  " X1R                  U5      U l        [        XR                  UR                  5      U l
        [        XR                  UR                  5      U l        U R                  5         g r   )r3   r4   r   r  r  r   r   r   sharedr  encoderrU  decoderr"  )r5   r   r   r  r6   s       r'   r4   MvpModel.__init__  s     "("5"5v7H7HZ ++ll:~~{K!&++v7H7HI!&++v7H7HI 	r)   c                     U R                   $ r   )rx  r&  s    r'   r'  MvpModel.get_input_embeddings  s    {{r)   c                 |    Xl         U R                   U R                  l        U R                   U R                  l        g r   )rx  ry  r  rz  r+  s     r'   r-  MvpModel.set_input_embeddings  s'    $(KK!$(KK!r)   c                     U R                   $ r   )ry  r&  s    r'   get_encoderMvpModel.get_encoder      ||r)   c                     U R                   $ r   rz  r&  s    r'   get_decoderMvpModel.get_decoder  r  r)   c                 4   U R                   (       d   S5       eU R                  S5        U R                  R                  R                  S5        U R                  R                  R                  S5        U R                  R
                  R                  S5        g )NzHIf you want to use lightweight tuning, make sure that `use_prompt=True`.FT)r  requires_grad_ry  r   rz  r   r&  s    r'   set_lightweight_tuningMvpModel.set_lightweight_tuning  sj    j jjE"%%44T:%%44T:&&55d;r)   r   rn   decoder_input_idsdecoder_attention_maskr0  decoder_head_maskr_  encoder_outputsr`  r1  decoder_inputs_embedsr   rq   r2  r3  rr   c                 R   UcE  UcB  Uc  [        S5      e[        XR                  R                  U R                  R                  5      nUb  UOU R                  R
                  nUb  UOU R                  R                  nUb  UOU R                  R                  nUb  UOU R                  R                  nUc  U R                  UUUU
UUUS9nORU(       aK  [        U[        5      (       d6  [        US   [        U5      S:  a  US   OS[        U5      S:  a  US   OSS9nU R                  UUUS   UUUU	UUUUUS9nU(       d  UU-   $ [        UR                  UR                   UR"                  UR$                  UR&                  UR                  UR"                  UR$                  S	9$ )
ar  
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
    Indices of decoder input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are decoder input IDs?](../glossary#decoder-input-ids)

    Mvp uses the `eos_token_id` as the starting token for `decoder_input_ids` generation. If `past_key_values`
    is used, optionally only the last `decoder_input_ids` have to be input (see `past_key_values`).

    For translation and summarization training, `decoder_input_ids` should be provided. If no
    `decoder_input_ids` is provided, the model will create this tensor by shifting the `input_ids` to the right
    for denoising pre-training following the paper.
decoder_attention_mask (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
    Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also
    be used by default.

    If you want to change padding behavior, you should read [`modeling_mvp._prepare_decoder_attention_mask`]
    and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more
    information on the default strategy.
cross_attn_head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*):
    Mask to nullify selected heads of the cross-attention modules in the decoder. Mask values selected in `[0,
    1]`:

    - 1 indicates the head is **not masked**,
    - 0 indicates the head is **masked**.
NzIf no `decoder_input_ids` or `decoder_inputs_embeds` are passed, `input_ids` cannot be `None`. Please pass either `input_ids` or `decoder_input_ids` or `decoder_inputs_embeds`.)r   rn   r0  r1  rq   r2  r3  r   r   r1   r=  r   rn   r   r   r0  r_  r`  r1  r   rq   r2  r3  )r>  r`  decoder_hidden_statesdecoder_attentionsrd  encoder_last_hidden_stater   encoder_attentions)r$   r(   r   r   r   rq   r2  r   r@  ry  r   r   rA  rz  r   r>  r`  rk   r?  rd  )r5   r   rn   r  r  r0  r  r_  r  r`  r1  r  r   rq   r2  r3  decoder_outputss                    r'   rC   MvpModel.forward  s   d $)>)F  U  !3;;33T[[5W5W! 2C1N-TXT_T_TqTq$8$D $++JjJj 	 "+!6IDKK<Q<Q	%0%<k$++B]B]""ll#-#+"3%9' + O O_!M!M-"1!"4474H14Loa0RV14_1E1I?1-tO ,,'1"1!"4#1'!5+//!5# ' 
 "_44!-??+;;"1"?"?.99,==&5&G&G"1"?"?.99	
 		
r)   )rz  ry  rx  r  NNNNNNNNNNNNNNN)rH   rI   rJ   rK   "_keys_to_ignore_on_load_unexpected_tied_weights_keysr   r4   r'  r-  r  r  r  r   r   r=   rS  rN   r   r   r   r   r   r   rC   rO   rP   rQ   s   @r'   rs  rs    s   *=)>&79VWy 0
<  15158<=A,0487;=A=A59=A$(,0/3&*!r
E,,-r
 !.r
 $E$4$45	r

 !))9)9 :r
 ELL)r
 $ELL1r
 'u||4r
 "$u'8'8"9:r
 "$u'8'8"9:r
   1 12r
  ((9(9:r
 D>r
 $D>r
 'tnr
  d^!r
" 
u((	)#r
 r
r)   rs  ze
    The MVP Model with a language modeling head. Can be used for various text generation tasks.
    )custom_introc            &         ^  \ rS rSr/ SQrS\4U 4S jjrS rS r S%S\	S	\
\	   S
\S\R                  4U 4S jjjrS\	SS4S jrS rS rS r\                S&S\
\R*                     S\
\R,                     S\
\R*                     S\
\R*                     S\
\R,                     S\
\R,                     S\
\R,                     S\
\\R0                        S\
\\R0                        S\
\R0                     S\
\R0                     S\
\R*                     S\
\   S\
\   S\
\   S \
\   S\\\4   4"S! jj5       rS\R,                  4S" jr\S# 5       rS$r U =r!$ )'MvpForConditionalGenerationi  )ru  rv  lm_head.weightr   c                 v  > [         TU ]  U5        [        U5      U l        U R	                  S[
        R                  " SU R                  R                  R                  45      5        [        R                  " UR                  U R                  R                  R                  SS9U l        U R                  5         g )Nrt  r   Fr[   )r3   r4   rs  r   register_bufferr=   r{   rx  r-   r   r^   r   lm_headr"  r   s     r'   r4   $MvpForConditionalGeneration.__init__  s     f%
0%++q$**BSBSBbBb>c2deyy1B1B1Q1QX]^ 	r)   c                 6    U R                   R                  5       $ r   )r   r  r&  s    r'   r  'MvpForConditionalGeneration.get_encoder      zz%%''r)   c                 6    U R                   R                  5       $ r   )r   r  r&  s    r'   r  'MvpForConditionalGeneration.get_decoder  r  r)   Nnew_num_tokenspad_to_multiple_ofmean_resizingrr   c                 J   > [         TU ]  XU5      nU R                  U5        U$ r   )r3   resize_token_embeddings_resize_final_logits_bias)r5   r  r  r  new_embeddingsr6   s        r'   r  3MvpForConditionalGeneration.resize_token_embeddings  s+     8]jk&&~6r)   c                 ,   U R                   R                  S   nX::  a  U R                   S S 2S U24   nON[        R                  " SX-
  4U R                   R                  S9n[        R
                  " U R                   U/SS9nU R                  SU5        g )Nr    r   r  rt   rt  )rt  r"   r=   r{   r<   rz   r  )r5   r  old_num_tokensnew_bias
extra_biass        r'   r  5MvpForConditionalGeneration._resize_final_logits_bias  s    //55b9+--a..@AHa)H%IRVRhRhRoRopJyy$"8"8*!E1MH0(;r)   c                     U R                   $ r   r  r&  s    r'   get_output_embeddings1MvpForConditionalGeneration.get_output_embeddings  r  r)   c                     Xl         g r   r  r5   r  s     r'   set_output_embeddings1MvpForConditionalGeneration.set_output_embeddings      %r)   c                 n    U R                   R                  5         U R                  R                  S5        g rR  r   r  r  r  r&  s    r'   r  2MvpForConditionalGeneration.set_lightweight_tuning  $    

))+##E*r)   r   rn   r  r  r0  r  r_  r  r`  r1  r  labelsr   rq   r2  r3  c                    Ub  UOU R                   R                  nUbX  U(       a  [        R                  S5        SnUc7  Uc4  [	        XR                   R
                  U R                   R                  5      nU R                  UUUUUUUUU	U
UUUUUS9nU R                  US   5      U R                  -   nSnUbF  [        5       nU" UR                  SU R                   R                  5      UR                  S5      5      nU(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  UR                   UR"                  UR$                  UR&                  UR(                  S9	$ )	a=  
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
    Indices of decoder input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are decoder input IDs?](../glossary#decoder-input-ids)

    Mvp uses the `eos_token_id` as the starting token for `decoder_input_ids` generation. If `past_key_values`
    is used, optionally only the last `decoder_input_ids` have to be input (see `past_key_values`).

    For translation and summarization training, `decoder_input_ids` should be provided. If no
    `decoder_input_ids` is provided, the model will create this tensor by shifting the `input_ids` to the right
    for denoising pre-training following the paper.
decoder_attention_mask (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
    Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also
    be used by default.

    If you want to change padding behavior, you should read [`modeling_mvp._prepare_decoder_attention_mask`]
    and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more
    information on the default strategy.
cross_attn_head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*):
    Mask to nullify selected heads of the cross-attention modules in the decoder. Mask values selected in `[0,
    1]`:

    - 1 indicates the head is **not masked**,
    - 0 indicates the head is **masked**.
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
    config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
    (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

Example of summarization:

Fine-tuning a model
```python
>>> import torch
>>> from transformers import AutoTokenizer, MvpForConditionalGeneration

>>> tokenizer = AutoTokenizer.from_pretrained("RUCAIBox/mvp")
>>> model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp")

>>> inputs = tokenizer(
...     "Summarize: You may want to stick it to your boss and leave your job, but don't do it if these are your reasons.",
...     return_tensors="pt",
... )
>>> labels = tokenizer("Bad Reasons To Quit Your Job", return_tensors="pt")["input_ids"]

>>> loss = model(**inputs, labels=labels).loss
>>> loss.backward()
```

Inference after the model fine-tuned
```python
>>> with torch.no_grad():
...     generated_ids = model.generate(**inputs)

>>> generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
```
NzJThe `use_cache` argument is changed to `False` since `labels` is provided.F)rn   r  r  r  r0  r  r_  r`  r1  r  r   rq   r2  r3  r   r    r   	losslogitsr`  r  r  rd  r  r   r  )r   r@  re  warningr(   r   r   r   r  rt  r	   rf   r  r   r`  r  r  rd  r  r   r  )r5   r   rn   r  r  r0  r  r_  r  r`  r1  r  r  r   rq   r2  r3  r   	lm_logitsmasked_lm_lossloss_fctoutputs                         r'   rC   #MvpForConditionalGeneration.forward  s   b &1%<k$++B]B]klI (-B-J$6KK44dkk6X6X%! **)/+#9/!5+'"7/!5#  
" LL,t/E/EE	')H%innR9O9O&PRXR]R]^`RabN\GABK/F3A3M^%.YSYY#33")"?"?&99$55&-&G&G")"?"?&99

 
	
r)   c                 j    [        XR                  R                  U R                  R                  5      $ r   )r(   r   r   r   )r5   r  s     r'   %prepare_decoder_input_ids_from_labelsAMvpForConditionalGeneration.prepare_decoder_input_ids_from_labels;  s#    !&++*B*BDKKDfDfggr)   c                 b   ^ SnU  H%  nU[        U4S jUS S  5       5      USS  -   4-  nM'     U$ )Nr  c              3   x   >#    U  H/  oR                  S TR                  UR                  5      5      v   M1     g7frG   index_selectr|   r<   r9  
past_statebeam_idxs     r'   r;  =MvpForConditionalGeneration._reorder_cache.<locals>.<genexpr>D  s1     rcqU_--aZ=N=N1OPPcq   7:r1   rF  r`  r  reordered_past
layer_pasts    `  r'   _reorder_cache*MvpForConditionalGeneration._reorder_cache>  sO    )JrcmnpopcqrrQR.! N * r)   r  r   )NTNNNNNNNNNNNNNNNN)"rH   rI   rJ   rK   r  r   r4   r  r  rM   r   r   r   r   r  r  r  r  r  r   r=   rS  rN   r   r   r   r   r   rC   r  staticmethodr  rO   rP   rQ   s   @r'   r  r    sE    jy (( dh!7?}\`	 < < <&+  15158<=A,0487;=A=A59=A-1$(,0/3&*#A
E,,-A
 !.A
 $E$4$45	A

 !))9)9 :A
 ELL)A
 $ELL1A
 'u||4A
 "$u'8'8"9:A
 "$u'8'8"9:A
   1 12A
  ((9(9:A
 ))*A
 D>A
 $D>A
  'tn!A
" d^#A
$ 
uo%	&%A
 A
FhELL h  r)   r  z
    Mvp model with a sequence classification/head on top (a linear layer on top of the pooled output) e.g. for GLUE
    tasks.
    c            $         ^  \ rS rSrSS/rS\4U 4S jjrS r\               SS\	\
R                     S\	\
R                     S	\	\
R                     S
\	\
R                     S\	\
R                     S\	\
R                     S\	\
R                     S\	\\
R                        S\	\
R                     S\	\
R                     S\	\
R                     S\	\   S\	\   S\	\   S\	\   S\\\4   4 S jj5       rSrU =r$ )MvpForSequenceClassificationiJ  ru  rv  r   c                    > [         TU ]  " U40 UD6  [        U5      U l        [	        UR
                  UR
                  UR                  UR                  5      U l        U R                  5         g r   )
r3   r4   rs  r   r   r   
num_labelsclassifier_dropoutclassification_headr"  )r5   r   kwargsr6   s      r'   r4   %MvpForSequenceClassification.__init__S  sZ    *6*f%
#8NNNN%%	$
  	r)   c                 n    U R                   R                  5         U R                  R                  S5        g rR  )r   r  r  r  r&  s    r'   r  3MvpForSequenceClassification.set_lightweight_tuning`  s&    

))+  //6r)   r   rn   r  r  r0  r  r_  r  r1  r  r  r   rq   r2  r3  rr   c                    Ub  UOU R                   R                  nUb  SnUc%  U	b"  [        SU R                  R                   35      eU R                  UUUUUUUUU	U
UUUUS9nUS   nUR                  U R                   R                  5      R                  UR                  5      n[        [        R                  " UR                  S5      5      5      S:  a  [        S5      eUUSS24   R                  UR!                  S5      SUR!                  S5      5      SS2SSS24   nU R#                  U5      nSnUGb  U R                   R$                  c  U R                   R&                  S:X  a  S	U R                   l        OyU R                   R&                  S:  aN  UR(                  [        R*                  :X  d  UR(                  [        R,                  :X  a  S
U R                   l        OSU R                   l        U R                   R$                  S	:X  aT  [/        5       nU R                   R&                  S:X  a&  U" UR1                  5       UR1                  5       5      nOU" UU5      nOU R                   R$                  S
:X  aG  [3        5       nU" UR                  SU R                   R&                  5      UR                  S5      5      nO-U R                   R$                  S:X  a  [5        5       nU" UU5      nU(       d  U4USS -   nUb  U4U-   $ U$ [7        UUUR8                  UR:                  UR<                  UR>                  UR@                  URB                  URD                  S9	$ )a
  
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
    Indices of decoder input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are decoder input IDs?](../glossary#decoder-input-ids)

    Mvp uses the `eos_token_id` as the starting token for `decoder_input_ids` generation. If `past_key_values`
    is used, optionally only the last `decoder_input_ids` have to be input (see `past_key_values`).

    For translation and summarization training, `decoder_input_ids` should be provided. If no
    `decoder_input_ids` is provided, the model will create this tensor by shifting the `input_ids` to the right
    for denoising pre-training following the paper.
decoder_attention_mask (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
    Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also
    be used by default.

    If you want to change padding behavior, you should read [`modeling_mvp._prepare_decoder_attention_mask`]
    and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more
    information on the default strategy.
cross_attn_head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*):
    Mask to nullify selected heads of the cross-attention modules in the decoder. Mask values selected in `[0,
    1]`:

    - 1 indicates the head is **not masked**,
    - 0 indicates the head is **masked**.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

Example of single-label classification:

Fine-tuning a model on `num_labels` classes
```python
>>> import torch
>>> from transformers import AutoTokenizer, MvpForSequenceClassification

>>> num_labels = 2  # for example, this is a binary classification task
>>> tokenizer = AutoTokenizer.from_pretrained("RUCAIBox/mvp")
>>> model = MvpForSequenceClassification.from_pretrained("RUCAIBox/mvp", num_labels=num_labels)

>>> inputs = tokenizer("Classify: Hello, my dog is cute", return_tensors="pt")
>>> labels = torch.tensor(1)  # the real label for inputs

>>> loss = model(**inputs, labels=labels).loss
>>> loss.backward()
```

Inference after the model fine-tuned
```python
>>> with torch.no_grad():
...     logits = model(**inputs).logits

>>> predicted_class_id = logits.argmax()
```
NFz8Passing input embeddings is currently not supported for rn   r  r  r0  r  r_  r  r1  r  r   rq   r2  r3  r   r   z7All examples must have the same number of <eos> tokens.r    
regressionsingle_label_classificationmulti_label_classificationr  )#r   r@  NotImplementedErrorr6   rH   r   eqeos_token_idr|   r<   rA  r=   unique_consecutivesumr$   rf   ry   r  problem_typer  r;   r?   rM   r
   squeezer	   r   r   r`  r  r  rd  r  r   r  )r5   r   rn   r  r  r0  r  r_  r  r1  r  r  r   rq   r2  r3  r   rk   eos_masksentence_representationr  r  r  r  s                           r'   rC   $MvpForSequenceClassification.forwardd  s   Z &1%<k$++B]B]I!:%J4>>KbKbJcd  **)/#9/!5+'"7/!5#  
   
<< 8 89<<]=Q=QRu''Q89A=VWW"/!"<"A"A-BTBTUVBWY[]j]o]opr]s"tr1H#
 ))*AB{{''/;;))Q./;DKK,[[++a/V\\UZZ5OSYS_S_chclclSl/LDKK,/KDKK,{{''<7"9;;))Q.#FNN$4fnn6FGD#FF3D))-JJ+-B0F0F GUWY))-II,./Y,F)-)9TGf$EvE.#33")"?"?&99$55&-&G&G")"?"?&99

 
	
r)   )r  r   r  )rH   rI   rJ   rK   r  r   r4   r  r   r   r=   rS  rN   r   r   r   r   r   r   rC   rO   rP   rQ   s   @r'   r  r  J  s    89VWy 7  15158<=A,0487;=A59=A-1$(,0/3&*!T
E,,-T
 !.T
 $E$4$45	T

 !))9)9 :T
 ELL)T
 $ELL1T
 'u||4T
 "$u'8'8"9:T
   1 12T
  ((9(9:T
 ))*T
 D>T
 $D>T
 'tnT
  d^!T
" 
u55	6#T
 T
r)   r  c            &         ^  \ rS rSrSS/rU 4S jrS r\                SS\\	R                     S\\	R                     S\\	R                     S	\\	R                     S
\\	R                     S\\	R                     S\\	R                     S\\\	R                        S\\	R                     S\\	R                     S\\	R                     S\\	R                     S\\   S\\   S\\   S\\   S\\\4   4"S jj5       rSrU =r$ )MvpForQuestionAnsweringi  ru  rv  c                    > [         TU ]  U5        SUl        UR                  U l        [        U5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r0   )
r3   r4   r  rs  r   r   r^   hidden_size
qa_outputsr"  r   s     r'   r4    MvpForQuestionAnswering.__init__   s[      ++f%
))F$6$68I8IJ 	r)   c                 n    U R                   R                  5         U R                  R                  S5        g rR  )r   r  r   r  r&  s    r'   r  .MvpForQuestionAnswering.set_lightweight_tuning  s$    

))+&&u-r)   r   rn   r  r  r0  r  r_  r  start_positionsend_positionsr1  r  r   rq   r2  r3  rr   c                    Ub  UOU R                   R                  nU	b  U
b  SnU R                  UUUUUUUUUUUUUUS9nUS   nU R                  U5      nUR	                  SSS9u  nnUR                  S5      R                  5       nUR                  S5      R                  5       nSnU	b  U
b  [        U	R                  5       5      S:  a  U	R                  S5      n	[        U
R                  5       5      S:  a  U
R                  S5      n
UR                  S5      nU	R                  SU5      n	U
R                  SU5      n
[        US9nU" UU	5      nU" UU
5      nUU-   S	-  nU(       d  UU4USS -   nUb  U4U-   $ U$ [        UUUUR                  UR                  UR                  UR                  UR                   UR"                  UR$                  S
9
$ )a
  
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
    Indices of decoder input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are decoder input IDs?](../glossary#decoder-input-ids)

    Mvp uses the `eos_token_id` as the starting token for `decoder_input_ids` generation. If `past_key_values`
    is used, optionally only the last `decoder_input_ids` have to be input (see `past_key_values`).

    For translation and summarization training, `decoder_input_ids` should be provided. If no
    `decoder_input_ids` is provided, the model will create this tensor by shifting the `input_ids` to the right
    for denoising pre-training following the paper.
decoder_attention_mask (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
    Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also
    be used by default.

    If you want to change padding behavior, you should read [`modeling_mvp._prepare_decoder_attention_mask`]
    and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more
    information on the default strategy.
cross_attn_head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*):
    Mask to nullify selected heads of the cross-attention modules in the decoder. Mask values selected in `[0,
    1]`:

    - 1 indicates the head is **not masked**,
    - 0 indicates the head is **masked**.

Example:

Fine-tuning a model for extrative question answering, and our model also supports generative question answering
using `BartForConditionalGeneration`
```python
>>> import torch
>>> from transformers import AutoTokenizer, MvpForQuestionAnswering

>>> tokenizer = AutoTokenizer.from_pretrained("RUCAIBox/mvp")
>>> model = MvpForQuestionAnswering.from_pretrained("RUCAIBox/mvp")

>>> inputs = tokenizer(
...     "Answer the following question: Who was Jim Henson? [SEP] Jim Henson was a nice puppet",
...     return_tensors="pt",
... )
>>> target_start_index = torch.tensor([18])
>>> target_end_index = torch.tensor([19])

>>> loss = model(**inputs, start_positions=target_start_index, end_positions=target_end_index).loss
>>> loss.backward()
```

Inference after the model fine-tuned
```python
>>> with torch.no_grad():
...     outputs = model(**inputs)

>>> answer_start_index = outputs.start_logits.argmax()
>>> answer_end_index = outputs.end_logits.argmax()

>>> predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
>>> predict_answer = tokenizer.decode(predict_answer_tokens)
```
NFr  r   r   r    rt   )ignore_indexr1   )
r  start_logits
end_logitsr`  r  r  rd  r  r   r  )r   r@  r   r   r   r  rh   rA  ry   r   r	   r   r`  r  r  rd  r  r   r  )r5   r   rn   r  r  r0  r  r_  r  r  r  r1  r  r   rq   r2  r3  r   sequence_outputr  r  r	  
total_lossignored_indexr  
start_lossend_lossr  s                               r'   rC   MvpForQuestionAnswering.forward  s   f &1%<k$++B]B]&=+DI**)/#9/!5+'"7/!5#  
" "!*1#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
M:H$x/14J F 0:/EZMF*Q6Q2%!#33")"?"?&99$55&-&G&G")"?"?&99
 	
r)   )r   r  r   r  )rH   rI   rJ   rK   r  r4   r  r   r   r=   rN   rS  r   r   r   r   r   r   rC   rO   rP   rQ   s   @r'   r  r    s   79VW
.  -1158<=A,0487;=A6:4859=A$(,0/3&*#Q
ELL)Q
 !.Q
 $E$4$45	Q

 !))9)9 :Q
 ELL)Q
 $ELL1Q
 'u||4Q
 "$u'8'8"9:Q
 "%"2"23Q
   0 01Q
   1 12Q
  ((9(9:Q
 D>Q
 $D>Q
  'tn!Q
" d^#Q
$ 
u99	:%Q
 Q
r)   r  c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )MvpDecoderWrapperi  z
This wrapper class is a helper class to correctly load pretrained checkpoints when the causal language model is
used in combination with the [`EncoderDecoderModel`] framework.
c                 D   > [         TU ]  U5        [        U5      U l        g r   )r3   r4   rU  rz  r   s     r'   r4   MvpDecoderWrapper.__init__  s     !&)r)   c                 &    U R                   " U0 UD6$ r   r  )r5   argsr  s      r'   rC   MvpDecoderWrapper.forward  s    ||T,V,,r)   r  )	rH   rI   rJ   rK   rL   r4   rC   rO   rP   rQ   s   @r'   r  r    s    
*- -r)   r  c                      ^  \ rS rSrS/rU 4S jrS rS rS rS r	S r
S	 rS
 r\             SS\\R                      S\\R"                     S\\R$                     S\\R$                     S\\R"                     S\\R"                     S\\\R$                        S\\R$                     S\\R                      S\\   S\\   S\\   S\\   S\\\4   4S jj5       r\S 5       rSrU =r$ )MvpForCausalLMi  r  c                 
  > [         R                  " U5      nSUl        SUl        [        TU ]  U5        [        U5      U l        [        R                  " UR                  UR                  SS9U l        U R                  5         g )NTFr[   )copydeepcopyrX   is_encoder_decoderr3   r4   r  r   r   r^   r  r  r  r"  r   s     r'   r4   MvpForCausalLM.__init__  sf    v& $)! &v.
yy!3!3V5F5FUS 	r)   c                 B    U R                   R                  R                  $ r   r   rz  r  r&  s    r'   r'  #MvpForCausalLM.get_input_embeddings  s    zz!!...r)   c                 8    XR                   R                  l        g r   r  r+  s     r'   r-  #MvpForCausalLM.set_input_embeddings  s    */

'r)   c                     U R                   $ r   r  r&  s    r'   r  $MvpForCausalLM.get_output_embeddings  r  r)   c                     Xl         g r   r  r  s     r'   r  $MvpForCausalLM.set_output_embeddings  r  r)   c                 $    XR                   l        g r   r   rz  )r5   rz  s     r'   set_decoderMvpForCausalLM.set_decoder  s    $

r)   c                 .    U R                   R                  $ r   r(  r&  s    r'   r  MvpForCausalLM.get_decoder  s    zz!!!r)   c                 n    U R                   R                  5         U R                  R                  S5        g rR  r  r&  s    r'   r  %MvpForCausalLM.set_lightweight_tuning  r  r)   r   rn   r   r   r0  r_  r`  r1  r  r   rq   r2  r3  rr   c                 L   Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nU R                  R                  UUUUUUUUU
UUUS9nU R                  US   5      nSnU	bF  [        5       nU" UR                  SU R                   R                  5      U	R                  S5      5      nU(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  UR                  UR                  S9$ )a9  
cross_attn_head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*):
    Mask to nullify selected heads of the cross-attention modules. Mask values selected in `[0, 1]`:

    - 1 indicates the head is **not masked**,
    - 0 indicates the head is **masked**.
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
    config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
    (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

Example:

```python
>>> from transformers import AutoTokenizer, MvpForCausalLM

>>> tokenizer = AutoTokenizer.from_pretrained("RUCAIBox/mvp")
>>> model = MvpForCausalLM.from_pretrained("RUCAIBox/mvp", add_cross_attention=False)

>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)

>>> logits = outputs.logits
>>> list(logits.shape)
[1, 8, 50267]
```Nr  r   r    r   )r  r  r`  rk   r?  rd  )r   rq   r2  r@  r   rz  r  r	   rf   r  r   r`  rk   r?  rd  )r5   r   rn   r   r   r0  r_  r`  r1  r  r   rq   r2  r3  r   r  r  r  r  s                      r'   rC   MvpForCausalLM.forward  sF   X 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] **$$)"7#9!5+'/!5# % 
 gaj)')HFKKDKK,B,BCV[[QS_UDY,F'+'7D7V#CVC0#33!//))$55
 	
r)   c                 P   ^ SnU  H  nU[        U4S jU 5       5      4-  nM     U$ )Nr  c              3   x   >#    U  H/  oR                  S TR                  UR                  5      5      v   M1     g7frG   r  r  s     r'   r;  0MvpForCausalLM._reorder_cache.<locals>.<genexpr>4  s1     ncmU_--aZ=N=N1OPPcmr  r  r  s    `  r'   r  MvpForCausalLM._reorder_cache/  s8    )Jncmnn N * r)   r  )NNNNNNNNNNNNN)rH   rI   rJ   rK   r  r4   r'  r-  r  r  r)  r  r  r   r   r=   rS  rN   r   r   r   r   r   r   rC   r  r  rO   rP   rQ   s   @r'   r  r    s   *+
/0&%"+  1515=A>B,07;=A59-1$(,0/3&*S
E,,-S
 !.S
  ((9(9:	S

 !)):): ;S
 ELL)S
 'u||4S
 "$u'8'8"9:S
   1 12S
 ))*S
 D>S
 $D>S
 'tnS
 d^S
 
u77	8S
 S
j  r)   r  )r  r  r  r  rs  r   )=rL   r  r  typingr   r   r   r   r=   torch.utils.checkpointr   torch.nnr   r	   r
   activationsr   
generationr   modeling_attn_mask_utilsr   r   modeling_outputsr   r   r   r   r   r   r   modeling_utilsr   utilsr   r   configuration_mvpr   
get_loggerrH   re  rN   rM   r(   r   r+   ModulerS   r   r   r   r   r   r  rU  rs  r  r  r  r  r  __all__r  r)   r'   <module>rB     s      / /    A A ! )   . , ( 
		H	%%,, c [^ ";BLL ;2XB299 XBvEbii EPzbii z|BII 0		 2   6A
# A
HW
# W
t Z
! Z
 Z
z 
|"4o |
|~ i
#5 i
i
X e
0 e
 e
R-* -B' BJr)   