o
    ZhY                     @   s8  d dl Z d dlZd dlmZ d dlmZ d dlmZ ddl	m
Z
 ddlmZ eeZdZG dd dZG d	d
 d
ZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd  d eZG d!d" d"eZdS )#    N)sparse   )add_start_docstrings)
get_loggerad  
    Args:
        input_ids (`jnp.ndarray` of shape `(batch_size, sequence_length)`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`PreTrainedTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        scores (`jnp.ndarray` of shape `(batch_size, config.vocab_size)`):
            Prediction scores of a language modeling head. These can be logits for each vocabulary when not using beam
            search or log softmax for each vocabulary token when using beam search
        kwargs (`Dict[str, Any]`, *optional*):
            Additional logits processor specific kwargs.

    Return:
        `jnp.ndarray` of shape `(batch_size, config.vocab_size)`: The processed prediction scores.

c                   @   4   e Zd ZdZeedejdejdejfddZdS )FlaxLogitsProcessorzSAbstract base class for all logit processors that can be applied during generation.	input_idsscoresreturnc                 C      t | j d)z"Flax method for processing logits.H is an abstract class. Only classes inheriting this class can be called.NotImplementedError	__class__selfr   r	    r   Z/var/www/auris/lib/python3.10/site-packages/transformers/generation/flax_logits_process.py__call__6      
zFlaxLogitsProcessor.__call__N	__name__
__module____qualname____doc__r   !LOGITS_PROCESSOR_INPUTS_DOCSTRINGjnpndarrayr   r   r   r   r   r   3       "r   c                   @   r   )FlaxLogitsWarperzjAbstract base class for all logit warpers that can be applied during generation with multinomial sampling.r   r	   r
   c                 C   r   )zFlax method for warping logits.r   r   r   r   r   r   r   A   r   zFlaxLogitsWarper.__call__Nr   r   r   r   r   r   >   r   r   c                	   @   s8   e Zd ZdZeedejdejdedejfddZ	dS )	FlaxLogitsProcessorLista.  
    This class can be used to create a list of [`FlaxLogitsProcessor`] or [`FlaxLogitsWarper`] to subsequently process
    a `scores` input tensor. This class inherits from list and adds a specific *__call__* method to apply each
    [`FlaxLogitsProcessor`] or [`FlaxLogitsWarper`] to the inputs.
    r   r	   cur_lenr
   c                    s   | D ]C}t |jj}t|dkr?t fddt| dd  D s4tdt|  d|j	 d||||fi  }q||||}q|S )N   c                 3   s    | ]}| v V  qd S Nr   ).0argkwargsr   r   	<genexpr>U   s    z3FlaxLogitsProcessorList.__call__.<locals>.<genexpr>r   z,Make sure that all the required parameters: z for z$ are passed to the logits processor.)
inspect	signaturer   
parameterslenalllistkeys
ValueErrorr   )r   r   r	   r!   r'   	processorZfunction_argsr   r&   r   r   P   s   &z FlaxLogitsProcessorList.__call__N)
r   r   r   r   r   r   r   r   intr   r   r   r   r   r    I   s    &r    c                   @   >   e Zd ZdZdefddZdejdejdedejfd	d
Z	dS )FlaxTemperatureLogitsWarperz
    [`FlaxLogitsWarper`] for temperature (exponential scaling output probability distribution).

    Args:
        temperature (`float`):
            The value used to module the logits distribution.
    temperaturec                 C   s*   t |tr	|dkstd| || _d S )Nr   z:`temperature` has to be a strictly positive float, but is )
isinstancefloatr0   r5   )r   r5   r   r   r   __init__i   s   
z$FlaxTemperatureLogitsWarper.__init__r   r	   r!   r
   c                 C   s   || j  }|S r#   )r5   r   r   r	   r!   r   r   r   r   o   s   
z$FlaxTemperatureLogitsWarper.__call__N)
r   r   r   r   r7   r8   r   r   r2   r   r   r   r   r   r4   `   s    $r4   c                   @   sR   e Zd ZdZed dfdededefddZd	ejd
ejdedejfddZ	dS )FlaxTopPLogitsWarpera=  
    [`FlaxLogitsWarper`] that performs top-p, i.e. restricting to top tokens summing to prob_cut_off <= prob_cut_off.

    Args:
        top_p (`float`):
            If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or
            higher are kept for generation.
        filter_value (`float`, *optional*, defaults to -inf):
            All filtered values will be set to this float value.
        min_tokens_to_keep (`int`, *optional*, defaults to 1):
            Minimum number of tokens that cannot be filtered.
    Inf   top_pfilter_valuemin_tokens_to_keepc                 C   s^   t |tr|dk s|dkrtd| t |tr|dk r$td| || _|| _|| _d S )Nr   g      ?z.`top_p` has to be a float > 0 and < 1, but is r<   z:`min_tokens_to_keep` has to be a positive integer, but is )r6   r7   r0   r2   r=   r>   r?   )r   r=   r>   r?   r   r   r   r8      s   
zFlaxTopPLogitsWarper.__init__r   r	   r!   r
   c                 C   s   t ||jd \}}t|| j}tjj|ddj	dd}|| j
k }t|d}||jd d df dO }|jd d d | jf d}t|||}	tj ||	d }
|
S )NZaxisr<   r   T)laxtop_kshaper   Z	full_liker>   jaxnnZsoftmaxZcumsumr=   Zrollatsetr?   whereZsort_key_val)r   r   r	   r!   topk_scorestopk_indicesZmask_scoresZcumulative_probsZ
score_maskZtopk_next_scoresnext_scoresr   r   r   r      s   
zFlaxTopPLogitsWarper.__call__N
r   r   r   r   r7   r2   r8   r   r   r   r   r   r   r   r:   t   s    "$
r:   c                   @   sR   e Zd ZdZed dfdededefddZd	ejd
ejdedejfddZ	dS )FlaxTopKLogitsWarpera  
    [`FlaxLogitsWarper`] that performs top-k, i.e. restricting to the k highest probability elements.

    Args:
        top_k (`int`):
            The number of highest probability vocabulary tokens to keep for top-k-filtering.
        filter_value (`float`, *optional*, defaults to -inf):
            All filtered values will be set to this float value.
        min_tokens_to_keep (`int`, *optional*, defaults to 1):
            Minimum number of tokens that cannot be filtered.
    r;   r<   rC   r>   r?   c                 C   s6   t |tr	|dkrtd| t||| _|| _d S )Nr   z6`top_k` has to be a strictly positive integer, but is )r6   r2   r0   maxrC   r>   )r   rC   r>   r?   r   r   r   r8      s   
zFlaxTopKLogitsWarper.__init__r   r	   r!   r
   c                 C   s   |j \}}t|| | j}t| j|j d }t||\}}	tt|| d d d f ||f	 }
|	 }|		 |
 }|j
| |}|||}|S )Nr@   )rD   r   fullr>   minrC   rB   Zbroadcast_toarangeflattenrG   rH   Zreshape)r   r   r	   r!   
batch_size
vocab_sizeZnext_scores_flatZtopkrJ   rK   shiftZtopk_scores_flatZtopk_indices_flatrL   r   r   r   r      s   
*zFlaxTopKLogitsWarper.__call__NrM   r   r   r   r   rN      s    "$rN   c                   @   s>   e Zd ZdZdefddZdejdejdedejfd	d
ZdS )!FlaxForcedBOSTokenLogitsProcessorz
    [`FlaxLogitsProcessor`] that enforces the specified token as the first generated token.

    Args:
        bos_token_id (`int`):
            The id of the token to force as the first generated token.
    bos_token_idc                 C   s
   || _ d S r#   )rX   )r   rX   r   r   r   r8      s   
z*FlaxForcedBOSTokenLogitsProcessor.__init__r   r	   r!   r
   c                 C   sN   t |jtd }dt |d  }t ||jd d | jf d|}|S Ninfr<   r   )	r   rP   rD   r7   bool_rI   rG   rX   rH   r   r   r	   r!   
new_scoresapply_penaltyr   r   r   r      s   $z*FlaxForcedBOSTokenLogitsProcessor.__call__N	r   r   r   r   r2   r8   r   r   r   r   r   r   r   rW      s    $rW   c                   @   B   e Zd ZdZdedefddZdejdejded	ejfd
dZdS )!FlaxForcedEOSTokenLogitsProcessorae  
    [`FlaxLogitsProcessor`] that enforces the specified token as the last generated token when `max_length` is reached.

    Args:
        max_length (`int`):
            The maximum length of the sequence to be generated.
        eos_token_id (`int`):
            The id of the token to force as the last generated token when `max_length` is reached.
    
max_lengtheos_token_idc                 C   s   || _ || _d S r#   )rb   rc   )r   rb   rc   r   r   r   r8      s   
z*FlaxForcedEOSTokenLogitsProcessor.__init__r   r	   r!   r
   c                 C   sT   t |jtd }dt || j d  }t ||jd d | jf 	d|}|S rY   )
r   rP   rD   r7   r[   rb   rI   rG   rc   rH   r\   r   r   r   r      s   $z*FlaxForcedEOSTokenLogitsProcessor.__call__Nr_   r   r   r   r   ra      s    
$ra   c                   @   r`   )FlaxMinLengthLogitsProcessora3  
    [`FlaxLogitsProcessor`] enforcing a min-length by setting EOS probability to 0.

    Args:
        min_length (`int`):
            The minimum length below which the score of `eos_token_id` is set to `-float("Inf")`.
        eos_token_id (`int`):
            The id of the *end-of-sequence* token.
    
min_lengthrc   c                 C   sP   t |tr	|dk rtd| t |tr|dk r td| || _|| _d S )Nr   z2`min_length` has to be a positive integer, but is z4`eos_token_id` has to be a positive integer, but is )r6   r2   r0   re   rc   )r   re   rc   r   r   r   r8      s   
z%FlaxMinLengthLogitsProcessor.__init__r   r	   r!   r
   c                 C   sF   dt || j dd }t ||jd d | jf td |}|S )Nr<   r   rZ   )r   Zclipre   rI   rG   rc   rH   r7   r   r   r	   r!   r^   r   r   r   r     s   *z%FlaxMinLengthLogitsProcessor.__call__Nr_   r   r   r   r   rd      s    
$
rd   c                   @   s&   e Zd ZdZdd ZdefddZdS )(FlaxSuppressTokensAtBeginLogitsProcessora  
    [`FlaxLogitsProcessor`] suppressing a list of tokens as soon as the `generate` function starts generating using
    `begin_index` tokens. This should ensure that the tokens defined by `begin_suppress_tokens` are not sampled at the
    beginning of the generation.

    Args:
        begin_suppress_tokens (`List[int]`):
            Tokens to not sample.
        begin_index (`int`):
            Index where the tokens are suppressed.
    c                 C   s   t || _|| _d S r#   )r.   begin_suppress_tokensbegin_index)r   rh   ri   r   r   r   r8     s   

z1FlaxSuppressTokensAtBeginLogitsProcessor.__init__r!   c                 C   sB   dt || j  }t ||jd d | jf td |}|S )Nr<   rZ   )r   r[   ri   rI   rG   rh   rH   r7   rf   r   r   r   r   !  s   *z1FlaxSuppressTokensAtBeginLogitsProcessor.__call__N)r   r   r   r   r8   r2   r   r   r   r   r   rg     s    rg   c                   @   r3   )!FlaxSuppressTokensLogitsProcessorz
    [`FlaxLogitsProcessor`] suppressing a list of tokens at each decoding step. The processor will set their log probs
    to be `-inf` so they are not sampled.

    Args:
        suppress_tokens (`list`):
            Tokens to not sample.
    suppress_tokensc                 C   s   t || _d S r#   )r.   rk   )r   rk   r   r   r   r8   3  s   z*FlaxSuppressTokensLogitsProcessor.__init__r   r	   r!   r
   c                 C   s    |j d| jf td }|S )N.rZ   )rG   rk   rH   r7   r9   r   r   r   r   6  s   z*FlaxSuppressTokensLogitsProcessor.__call__N)
r   r   r   r   r.   r8   r   r   r2   r   r   r   r   r   rj   )  s    	$rj   c                   @   s8   e Zd ZdZdd Zdejdejdedejfdd	Zd
S )FlaxForceTokensLogitsProcessora  
    [`FlaxLogitsProcessor`] that takes a list of pairs of integers which indicates a mapping from generation indices to
    token indices that will be forced before sampling. The processor will set their log probs to 0 and all other tokens
    to `-inf` so that they are sampled at their corresponding index.

    Args:
        force_token_map (`list`):
            Map giving token ids and indices where they will be forced to be sampled.
    c                 C   sb   t |}tjt| d tjdd }| D ]\}}|d ur(|j| |}qt|| _	d S )Nr<   dtyper@   )
dictr   ZonesrO   r/   Zint32itemsrG   rH   force_token_array)r   Zforce_token_maprq   indextokenr   r   r   r8   G  s    z'FlaxForceTokensLogitsProcessor.__init__r   r	   r!   r
   c                    sB   fdd t jjd kfdd fddS )Nc                    sX    j d }j|  }tj  jdtd  }tj|df jd}t||d|f}|S )Nr   rm   rZ   r<   )	rD   rq   r   Z	ones_likern   r7   zerosrB   dynamic_update_slice)Zgeneration_idxrT   current_tokenr]   Zupdates)r	   r   r   r   _force_tokenS  s   

z=FlaxForceTokensLogitsProcessor.__call__.<locals>._force_tokenr   c                          S r#   r   r   r	   r   r   <lambda>_      z9FlaxForceTokensLogitsProcessor.__call__.<locals>.<lambda>c                      s*   t j dk fddfddS )Nr   c                      s    S r#   r   r   )rw   r!   r   r   rz   d  s    zKFlaxForceTokensLogitsProcessor.__call__.<locals>.<lambda>.<locals>.<lambda>c                      rx   r#   r   r   ry   r   r   rz   f  r{   )rB   condrq   r   rw   r!   r	   r   r   r   rz   a  s
    
)rB   r|   rq   rD   r9   r   r}   r   r   R  s   	
z'FlaxForceTokensLogitsProcessor.__call__N)	r   r   r   r   r8   r   r   r2   r   r   r   r   r   rl   <  s    
$rl   c                   @   s    e Zd ZdZdd Zdd ZdS )#FlaxWhisperTimeStampLogitsProcessora{  
    Whisper specific Processor. This processor can be used to force a list of tokens. The processor will set their log
    probs to `inf` so that they are sampled at their corresponding index.

    Args:
        generate_config (`GenerateConfig`):
            The generate config used to generate the output. The following parameters are required:
                eos_token_id (`int`, *optional*, defaults to 50257):
                    The id of the *end-of-sequence* token.
                no_timestamps_token_id (`int`, *optional*, defaults to 50363):
                    The id of the `"<|notimestamps|>"` token.
                max_initial_timestamp_index (`int`, *optional*, defaults to 1):
                    Used to set the maximum value of the initial timestamp. This is used to prevent the model from
                    predicting timestamps that are too far in the future.
    c                 C   sp   |j | _ |j| _|jd | _|d | _|jr|  jd7  _t|dr'|j| _n|j| _| jd u r6|j| _d S d S )Nr<   r   max_initial_timestamp_index)rc   no_timestamps_token_idtimestamp_beginri   Zis_multilingualhasattrr   rU   )r   Zgenerate_configZmodel_configZdecoder_input_lengthr   r   r   r8   }  s   



z,FlaxWhisperTimeStampLogitsProcessor.__init__c           	         s   |j d d jf td } fdd}t|||}t jkdd}tj	d u|d}j
j	 }t||j d d |d d f td |}tjj|dd}fd	d
}t|||}|S )NrZ   c                    s   t  j dkdd}t |  d  jk|d}t  j dk dd}t |  d  jkd|}t |t |dk|jjd  td |jd j td |S )Nr<   TFr   r   rZ   )r   rI   ri   r   rG   rH   r7   rc   )Zinput_ids_kscores_kZlast_was_timestampZpenultimate_was_timestampr!   r   r   r   handle_pairs  s*   zBFlaxWhisperTimeStampLogitsProcessor.__call__.<locals>.handle_pairsTFr<   r@   rA   c                    sX   t jj|  jd  dd}t| d  j }t||k|jd  j t	d |S )Nr@   rA   rZ   )
rE   rF   Z	logsumexpr   r   rO   rI   rG   rH   r7   )Z
logprobs_kr   Ztimestamp_logprobZmax_text_token_logprob)r   r   r   handle_cumulative_probs  s   zMFlaxWhisperTimeStampLogitsProcessor.__call__.<locals>.handle_cumulative_probs)rG   r   rH   r7   rE   vmapr   rI   ri   r   r   rF   Zlog_softmax)	r   r   r	   r!   r   Zapply_max_initial_timestampZlast_allowedZlogprobsr   r   r   r   r     s&    $	z,FlaxWhisperTimeStampLogitsProcessor.__call__N)r   r   r   r   r8   r   r   r   r   r   r~   l  s    r~   c                   @   sl   e Zd ZdZdefddZdejdedefdd	Zd
ejdejfddZ	dejdejdedejfddZ
dS ) FlaxNoRepeatNGramLogitsProcessora9  
    [`FlaxLogitsProcessor`] that enforces no repetition of n-grams. See
    [Fairseq](https://github.com/pytorch/fairseq/blob/a07cb6f40480928c9e0548b737aadd36ee66ac76/fairseq/sequence_generator.py#L345).

    Args:
        ngram_size (`int`):
            All ngrams of size `ngram_size` can only occur once.
    
ngram_sizec                 C   s*   t |tr	|dkrtd| || _d S )Nr   z;`ngram_size` has to be a strictly positive integer, but is )r6   r2   r0   r   )r   r   r   r   r   r8     s   
z)FlaxNoRepeatNGramLogitsProcessor.__init__r   rU   r!   c              	      s   j \ }|jd  }|jd  } fdd} | jd f}tjd | |tj|jd}	t |  | k 	d}
t
j|
|	f f|fj  dS )a  
        get a matrix of size (batch_size,) + (vocab_size,)*n (for n-grams) that
        represent the n-grams that occurred previously.
        The BCOO representation allow to store only the few non-zero entries, instead of the full (huge) matrix
        r<   c                    sD   |   |  |j |  t g fddtjD  S )Nc                    s"   g | ]}t  | f qS r   )r   array)r$   j)br   posr   r   
<listcomp>  s   " zZFlaxNoRepeatNGramLogitsProcessor.get_previous_ngrams.<locals>.body_fun.<locals>.<listcomp>)rG   rH   r   r   ranger   )ivalrT   r   r   )r   r   r   body_fun  s   
zFFlaxNoRepeatNGramLogitsProcessor.get_previous_ngrams.<locals>.body_funr   rm   Zfloat32)rD   )rD   r   rE   rB   Z	fori_loopr   rt   rn   rR   astyper   ZBCOO)r   r   rU   r!   Zseq_lenZ
seq_ngramsZ
cur_ngramsr   rD   Zall_update_indicesdatar   r   r   get_previous_ngrams  s   
 z4FlaxNoRepeatNGramLogitsProcessor.get_previous_ngramslatest_tokensr
   c                 C   s$   t jtjdd }t |||S )zt
        Determines which tokens must be banned given latest tokens and the previously seen
        ngrams.
        c                 S   s   |t |  S r#   )tuple)r   previous_ngramsr   r   r   inner_fn  s   zIFlaxNoRepeatNGramLogitsProcessor.get_banned_tokens_mask.<locals>.inner_fn)r   ZsparsifyrE   r   Zbcoo_todense)r   r   r   r   r   r   r   get_banned_tokens_mask  s   z7FlaxNoRepeatNGramLogitsProcessor.get_banned_tokens_maskr	   c                    s8    fdd}t j jd k|fdd}|S )Nc               
      s   j \} }| }tjj d jd fjd}tj|tj	d jd  fj d jd fd}
||d}t|td S )Nr   r<   rm   )r   r   boolrZ   )rD   r   r   rt   r   rn   rE   rB   ru   Zdynamic_slicer   r   rI   r7   )_rU   r   r   Zbanned_tokens_indices_maskr!   r   r	   r   r   r   true_fn  s   
 $	z:FlaxNoRepeatNGramLogitsProcessor.__call__.<locals>.true_fnr<   c                      rx   r#   r   r   ry   r   r   rz     r{   z;FlaxNoRepeatNGramLogitsProcessor.__call__.<locals>.<lambda>)rE   rB   r|   r   )r   r   r	   r!   r   outputr   r   r   r     s   "z)FlaxNoRepeatNGramLogitsProcessor.__call__N)r   r   r   r   r2   r8   r   r   r   r   r   r   r   r   r   r     s    	"$r   )r)   rE   Zjax.laxrB   Z	jax.numpynumpyr   Zjax.experimentalr   utilsr   Zutils.loggingr   r   loggerr   r   r   r.   r    r4   r:   rN   rW   ra   rd   rg   rj   rl   r~   r   r   r   r   r   <module>   s.   ,#0a