
    fThG                     `    S r SSKJr  SSKJr  \R
                  " \5      r " S S\5      rS/r	g)zPop2Piano model configuration   )PretrainedConfig)loggingc                   `   ^  \ rS rSrSrSrS/r                   SU 4S jjrSrU =r	$ )Pop2PianoConfig   a  
This is the configuration class to store the configuration of a [`Pop2PianoForConditionalGeneration`]. It is used
to instantiate a Pop2PianoForConditionalGeneration model according to the specified arguments, defining the model
architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of the
Pop2Piano [sweetcocoa/pop2piano](https://huggingface.co/sweetcocoa/pop2piano) architecture.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Arguments:
    vocab_size (`int`, *optional*, defaults to 2400):
        Vocabulary size of the `Pop2PianoForConditionalGeneration` model. Defines the number of different tokens
        that can be represented by the `inputs_ids` passed when calling [`Pop2PianoForConditionalGeneration`].
    composer_vocab_size (`int`, *optional*, defaults to 21):
        Denotes the number of composers.
    d_model (`int`, *optional*, defaults to 512):
        Size of the encoder layers and the pooler layer.
    d_kv (`int`, *optional*, defaults to 64):
        Size of the key, query, value projections per attention head. The `inner_dim` of the projection layer will
        be defined as `num_heads * d_kv`.
    d_ff (`int`, *optional*, defaults to 2048):
        Size of the intermediate feed forward layer in each `Pop2PianoBlock`.
    num_layers (`int`, *optional*, defaults to 6):
        Number of hidden layers in the Transformer encoder.
    num_decoder_layers (`int`, *optional*):
        Number of hidden layers in the Transformer decoder. Will use the same value as `num_layers` if not set.
    num_heads (`int`, *optional*, defaults to 8):
        Number of attention heads for each attention layer in the Transformer encoder.
    relative_attention_num_buckets (`int`, *optional*, defaults to 32):
        The number of buckets to use for each attention layer.
    relative_attention_max_distance (`int`, *optional*, defaults to 128):
        The maximum distance of the longer sequences for the bucket separation.
    dropout_rate (`float`, *optional*, defaults to 0.1):
        The ratio for all dropout layers.
    layer_norm_epsilon (`float`, *optional*, defaults to 1e-6):
        The epsilon used by the layer normalization layers.
    initializer_factor (`float`, *optional*, defaults to 1.0):
        A factor for initializing all weight matrices (should be kept to 1.0, used internally for initialization
        testing).
    feed_forward_proj (`string`, *optional*, defaults to `"gated-gelu"`):
        Type of feed forward layer to be used. Should be one of `"relu"` or `"gated-gelu"`.
    use_cache (`bool`, *optional*, defaults to `True`):
        Whether or not the model should return the last key/values attentions (not used by all models).
    dense_act_fn (`string`, *optional*, defaults to `"relu"`):
        Type of Activation Function to be used in `Pop2PianoDenseActDense` and in `Pop2PianoDenseGatedActDense`.
	pop2pianopast_key_valuesc                   > Xl         X l        X0l        X@l        XPl        X`l        Ub  UOU R
                  U l        Xl        Xl        Xl	        Xl
        Xl        Xl        Xl        UU l        UU l        U R                  R!                  S5      S   S:H  U l        U R                  U l        Xl        X`l        [*        TU ]X  " SUUUS.UD6  g )N-    gated)pad_token_ideos_token_idis_encoder_decoder )
vocab_sizecomposer_vocab_sized_modeld_kvd_ff
num_layersnum_decoder_layers	num_headsrelative_attention_num_bucketsrelative_attention_max_distancedropout_ratelayer_norm_epsiloninitializer_factorfeed_forward_proj	use_cachedense_act_fnsplitis_gated_acthidden_sizenum_attention_headsnum_hidden_layerssuper__init__)selfr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r   r   r!   kwargs	__class__s                        m/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/pop2piano/configuration_pop2piano.pyr(   Pop2PianoConfig.__init__K   s    . %#6 		$8J8V"4\`\k\k".L+/N,("4"4!2"( 2288=a@GK<<#, !+ 	
%%1	
 		
    )r   r   r   r   r!   r   r   r$   r   r#   r   r%   r   r   r&   r   r   r   r    r   )i`	     i   @   i      N          g?gư>g      ?z
gated-geluTTr      relu)
__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferencer(   __static_attributes____classcell__)r+   s   @r,   r   r      s^    -^ J#4"5 ')(+&)1
 1
r.   r   N)
r;   configuration_utilsr   utilsr   
get_loggerr7   loggerr   __all__r   r.   r,   <module>rE      s<    $ 3  
		H	%d
& d
N 
r.   