
    fThI                        S SK r S SKrSSKJr  SSKJr  SSKJrJrJ	r	J
r
  SSKJrJr  \" 5       (       a
  S SKrSSKJr  \	" 5       (       a  SS	KJr  \
R(                  " \5      r " S
 S\ R.                  5      r\" \" SS95       " S S\5      5       r\" \" SS95       " S S\5      5       r\" \" SS95       " S S\5      5       rg)    N   )GenerationConfig)TruncationStrategy)add_end_docstringsis_tf_availableis_torch_availablelogging   )Pipelinebuild_pipeline_init_args)/TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES),MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMESc                       \ rS rSrSrSrSrg)
ReturnType   r   r
    N)__name__
__module____qualname____firstlineno__TENSORSTEXT__static_attributes__r       c/var/www/auris/envauris/lib/python3.13/site-packages/transformers/pipelines/text2text_generation.pyr   r      s    GDr   r   T)has_tokenizerc                      ^  \ rS rSrSrSr\" SSS9rSrU 4S jr	      SS	 jr
S
\S\S\4S jrS rU 4S jr\R                   4S jrS r\R(                  S4S jrSrU =r$ )Text2TextGenerationPipeline   a  
Pipeline for text to text generation using seq2seq models.

Unless the model you're using explicitly sets these generation parameters in its configuration files
(`generation_config.json`), the following default values will be used:
- max_new_tokens: 256
- num_beams: 4

Example:

```python
>>> from transformers import pipeline

>>> generator = pipeline(model="mrm8488/t5-base-finetuned-question-generation-ap")
>>> generator(
...     "answer: Manuel context: Manuel has created RuPERTa-base with the support of HF-Transformers and Google"
... )
[{'generated_text': 'question: Who created the RuPERTa-base?'}]
```

Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial). You can pass text
generation parameters to this pipeline to control stopping criteria, decoding strategy, and more. Learn more about
text generation parameters in [Text generation strategies](../generation_strategies) and [Text
generation](text_generation).

This Text2TextGenerationPipeline pipeline can currently be loaded from [`pipeline`] using the following task
identifier: `"text2text-generation"`.

The models that this pipeline can use are models that have been fine-tuned on a translation task. See the
up-to-date list of available models on
[huggingface.co/models](https://huggingface.co/models?filter=text2text-generation). For a list of available
parameters, see the [following
documentation](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.generation.GenerationMixin.generate)

Usage:

```python
text2text_generator = pipeline("text2text-generation")
text2text_generator("question: What is 42 ? context: 42 is the answer to life, the universe and everything")
```T      )max_new_tokens	num_beams	generatedc                    > [         TU ]  " U0 UD6  U R                  U R                  S:X  a  [        5        g [
        5        g )Ntf)super__init__check_model_type	frameworkr   r   selfargskwargs	__class__s      r   r(   $Text2TextGenerationPipeline.__init__O   s>    $)&)~~% <	
 >	
r   c                    0 nUb  XXS'   Un	0 n
Ub*  Uc'  U(       a  [         R                  O[         R                  nUb  X:S'   Ub  XJS'   UbG  U R                  R	                  USS9n[        U5      S:  a  [        R                  " S5        US   US	'   U R                  b  U R                  U	S
'   U R                  b  U R                  U	S'   U R                  U	S'   XU
4$ )N
truncationreturn_typeclean_up_tokenization_spacesF)add_special_tokensr
   zStopping on a multiple token sequence is not yet supported on transformers. The first token of the stop sequence will be used as the stop sequence string in the interim.r   eos_token_idassistant_model	tokenizerassistant_tokenizer)
r   r   r   r8   encodelenwarningswarnr7   r9   )r,   return_tensorsreturn_textr3   r4   r2   stop_sequencegenerate_kwargspreprocess_paramsforward_paramspostprocess_paramsstop_sequence_idss               r   _sanitize_parameters0Text2TextGenerationPipeline._sanitize_parametersX   s    !.8l+(%+*=0>*,,JOOK"0;}-'3A]=>$ $ 5 5mX] 5 ^$%)b /@.BON++040D0DN,-##/*...N;'484L4LN01 2DDDr   input_length
min_length
max_lengthc                     g)Z
Checks whether there might be something wrong with given input with regard to the model.
Tr   r,   rH   rI   rJ   s       r   check_inputs(Text2TextGenerationPipeline.check_inputs   s     r   c                   U R                   b  U R                   OSn[        US   [        5      (       a>  U R                  R                  c  [        S5      eUS    Vs/ s H  oCU-   PM	     sn4nSnO5[        US   [        5      (       a  X2S   -   4nSnO[        SUS    S35      eU R                  " X%XR                  S.6nS	U;   a  US		 U$ s  snf )
N r   zOPlease make sure that the tokenizer has a pad_token_id when using a batch inputTFz `args[0]`: zI have the wrong format. The should be either of type `str` or type `list`)paddingr2   r>   token_type_ids)prefix
isinstancelistr8   pad_token_id
ValueErrorstrr*   )r,   r2   r-   rT   argrR   inputss          r   _parse_and_tokenize/Text2TextGenerationPipeline._parse_and_tokenize   s     $ 7Rd1gt$$~~**2 !rss-1!W5Wcc\W57DGQ%%!W$&DGtAwi'pq  :^l^lmv%'( 6s   Cc                    > [         TU ]  " U0 UD6n[        US   [        5      (       aF  [	        S US    5       5      (       a,  [	        S U 5       5      (       a  U Vs/ s H  oDS   PM	     sn$ U$ s  snf )a  
Generate the output text(s) using text(s) given as inputs.

Args:
    args (`str` or `List[str]`):
        Input text for the encoder.
    return_tensors (`bool`, *optional*, defaults to `False`):
        Whether or not to include the tensors of predictions (as token indices) in the outputs.
    return_text (`bool`, *optional*, defaults to `True`):
        Whether or not to include the decoded texts in the outputs.
    clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
        Whether or not to clean up the potential extra spaces in the text output.
    truncation (`TruncationStrategy`, *optional*, defaults to `TruncationStrategy.DO_NOT_TRUNCATE`):
        The truncation strategy for the tokenization within the pipeline. `TruncationStrategy.DO_NOT_TRUNCATE`
        (default) will never truncate, but it is sometimes desirable to truncate the input to fit the model's
        max_length instead of throwing an error down the line.
    generate_kwargs:
        Additional keyword arguments to pass along to the generate method of the model (see the generate method
        corresponding to your framework [here](./text_generation)).

Return:
    A list or a list of list of `dict`: Each result comes as a dictionary with the following keys:

    - **generated_text** (`str`, present when `return_text=True`) -- The generated text.
    - **generated_token_ids** (`torch.Tensor` or `tf.Tensor`, present when `return_tensors=True`) -- The token
      ids of the generated text.
r   c              3   B   #    U  H  n[        U[        5      v   M     g 7f)N)rU   rY   ).0els     r   	<genexpr>7Text2TextGenerationPipeline.__call__.<locals>.<genexpr>   s     :'BJr3'''s   c              3   >   #    U  H  n[        U5      S :H  v   M     g7f)r
   N)r;   )r`   ress     r   rb   rc      s     4VcCHMVs   )r'   __call__rU   rV   all)r,   r-   r.   resultre   r/   s        r   rf   $Text2TextGenerationPipeline.__call__   sq    : !4262tAw%%:$q':::4V444&,-fsFf-- .s   A2c                 0    U R                   " U4SU0UD6nU$ )Nr2   )r\   )r,   r[   r2   r.   s       r   
preprocess&Text2TextGenerationPipeline.preprocess   s!    ))&RZR6Rr   c                    U R                   S:X  a  US   R                  u  p4O9U R                   S:X  a)  [        R                  " US   5      R                  5       u  p4U R	                  WUR                  SU R                  R                  5      UR                  SU R                  R                  5      5        SU;  a  U R                  US'   U R                  R                  " S
0 UDUD6nUR                  S   nU R                   S:X  a'  UR                  " WXc-  /UR                  SS  Q76 nS	U0$ U R                   S:X  a+  [        R                  " UWXc-  /UR                  SS  Q75      nS	U0$ )Npt	input_idsr&   rI   rJ   generation_configr   r
   
output_idsr   )r*   shaper&   numpyrN   getrp   rI   rJ   modelgeneratereshape)r,   model_inputsrA   in_brH   rq   out_bs          r   _forward$Text2TextGenerationPipeline._forward   sc   >>T!!-k!:!@!@D,^^t#!#,{*C!D!J!J!LDd.D.D.O.OPd.D.D.O.OP	
 o5373I3IO/0ZZ((K<K?K
  #>>T!#++D%-W*BRBRSTSUBVWJ j)) ^^t#Ju}0\zGWGWXYXZG[0\]Jj))r   Fc                    / nUS   S    Hx  nU[         R                  :X  a  U R                   S3U0nO>U[         R                  :X  a*  U R                   S3U R                  R                  USUS90nUR                  W5        Mz     U$ )Nrq   r   
_token_ids_textT)skip_special_tokensr4   )r   r   return_namer   r8   decodeappend)r,   model_outputsr3   r4   recordsrq   records          r   postprocess'Text2TextGenerationPipeline.postprocess   s    '5a8Jj000!--.j9:F
/''(.0E0E",05Q 1F 1 NN6" 9 r   r   )NNNNNN)r   r   r   r   __doc___pipeline_calls_generater   _default_generation_configr   r(   rF   intrN   r\   rf   r   DO_NOT_TRUNCATErk   r{   r   r   r   r   __classcell__r/   s   @r   r   r      s    'R  $!1" K
 %)(ET # 3 *$L -?,N,N *0 6@__ch  r   r   c                   J   ^  \ rS rSrSrSrU 4S jrS\S\S\S\4S	 jr	S
r
U =r$ )SummarizationPipeline   a/  
Summarize news articles and other documents.

This summarizing pipeline can currently be loaded from [`pipeline`] using the following task identifier:
`"summarization"`.

The models that this pipeline can use are models that have been fine-tuned on a summarization task, which is
currently, '*bart-large-cnn*', '*google-t5/t5-small*', '*google-t5/t5-base*', '*google-t5/t5-large*', '*google-t5/t5-3b*', '*google-t5/t5-11b*'. See the up-to-date
list of available models on [huggingface.co/models](https://huggingface.co/models?filter=summarization). For a list
of available parameters, see the [following
documentation](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.generation.GenerationMixin.generate)

Unless the model you're using explicitly sets these generation parameters in its configuration files
(`generation_config.json`), the following default values will be used:
- max_new_tokens: 256
- num_beams: 4

Usage:

```python
# use bart in pytorch
summarizer = pipeline("summarization")
summarizer("An apple a day, keeps the doctor away", min_length=5, max_length=20)

# use t5 in tf
summarizer = pipeline("summarization", model="google-t5/t5-base", tokenizer="google-t5/t5-base", framework="tf")
summarizer("An apple a day, keeps the doctor away", min_length=5, max_length=20)
```summaryc                 $   > [         TU ]  " U0 UD6$ )a~  
Summarize the text(s) given as inputs.

Args:
    documents (*str* or `List[str]`):
        One or several articles (or one list of articles) to summarize.
    return_text (`bool`, *optional*, defaults to `True`):
        Whether or not to include the decoded texts in the outputs
    return_tensors (`bool`, *optional*, defaults to `False`):
        Whether or not to include the tensors of predictions (as token indices) in the outputs.
    clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
        Whether or not to clean up the potential extra spaces in the text output.
    generate_kwargs:
        Additional keyword arguments to pass along to the generate method of the model (see the generate method
        corresponding to your framework [here](./text_generation)).

Return:
    A list or a list of list of `dict`: Each result comes as a dictionary with the following keys:

    - **summary_text** (`str`, present when `return_text=True`) -- The summary of the corresponding input.
    - **summary_token_ids** (`torch.Tensor` or `tf.Tensor`, present when `return_tensors=True`) -- The token
      ids of the summary.
r'   rf   r+   s      r   rf   SummarizationPipeline.__call__  s    0 w000r   rH   rI   rJ   returnc           	          X2:  a  [         R                  SU SU S35        X:  a#  [         R                  SU SU SUS-   S35        g	g	)
rL   zYour min_length=z' must be inferior than your max_length=.zYour max_length is set to z , but your input_length is only z. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=r   )NloggerwarningrM   s       r   rN   "SummarizationPipeline.check_inputs,  sm     "NN-j\9`ak`llmno$NN,ZL8XYeXf g^^jno^o]ppqs %r   r   )r   r   r   r   r   r   rf   r   boolrN   r   r   r   s   @r   r   r      s;    < K14 # 3 SW  r   r   c                      ^  \ rS rSrSrSrS\S\S\4S jr\R                  SSS	.U 4S
 jjr
SU 4S jjrU 4S jrSrU =r$ )TranslationPipelinei;  a  
Translates from one language to another.

This translation pipeline can currently be loaded from [`pipeline`] using the following task identifier:
`"translation_xx_to_yy"`.

The models that this pipeline can use are models that have been fine-tuned on a translation task. See the
up-to-date list of available models on [huggingface.co/models](https://huggingface.co/models?filter=translation).
For a list of available parameters, see the [following
documentation](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.generation.GenerationMixin.generate)

Unless the model you're using explicitly sets these generation parameters in its configuration files
(`generation_config.json`), the following default values will be used:
- max_new_tokens: 256
- num_beams: 4

Usage:

```python
en_fr_translator = pipeline("translation_en_to_fr")
en_fr_translator("How old are you?")
```translationrH   rI   rJ   c                 N    USU-  :  a  [         R                  SU SU S35        g)Ng?zYour input_length: z" is bigger than 0.9 * max_length: z`. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)Tr   rM   s       r   rN    TranslationPipeline.check_inputsW  s=    #
**NN%l^3UV`Ua b^ ^ r   N)r2   src_langtgt_langc                   > [        U R                  SS 5      (       a'  U R                  R                  " X@R                  XUS.6$ [        TU ]  " USU06$ )N_build_translation_inputs)r>   r2   r   r   r2   )getattrr8   r   r*   r'   r\   )r,   r2   r   r   r-   r/   s        r   rk   TranslationPipeline.preprocess_  sQ    4>>#>EE>>;;nniq  7.LLLr   c                    > [         T	U ]  " S0 UD6u  pEnUb  XS'   Ub  X$S'   UcV  UcS  UR                  SU R                  5      nUR	                  S5      nU(       a  [        U5      S:X  a  US   US'   US   US'   XEU4$ )	Nr   r   task_r!   r
      r   )r'   rF   rt   r   splitr;   )
r,   r   r   r.   rB   rC   rD   r   itemsr/   s
            r   rF   (TranslationPipeline._sanitize_parametersg  s    @E@\@f_e@f=+=,4j),4j) 0::fdii0DJJsOEE
a05a!*-05a!*- 2DDDr   c                 $   > [         TU ]  " U0 UD6$ )a  
Translate the text(s) given as inputs.

Args:
    args (`str` or `List[str]`):
        Texts to be translated.
    return_tensors (`bool`, *optional*, defaults to `False`):
        Whether or not to include the tensors of predictions (as token indices) in the outputs.
    return_text (`bool`, *optional*, defaults to `True`):
        Whether or not to include the decoded texts in the outputs.
    clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
        Whether or not to clean up the potential extra spaces in the text output.
    src_lang (`str`, *optional*):
        The language of the input. Might be required for multilingual models. Will not have any effect for
        single pair translation models
    tgt_lang (`str`, *optional*):
        The language of the desired output. Might be required for multilingual models. Will not have any effect
        for single pair translation models
    generate_kwargs:
        Additional keyword arguments to pass along to the generate method of the model (see the generate method
        corresponding to your framework [here](./text_generation)).

Return:
    A list or a list of list of `dict`: Each result comes as a dictionary with the following keys:

    - **translation_text** (`str`, present when `return_text=True`) -- The translation.
    - **translation_token_ids** (`torch.Tensor` or `tf.Tensor`, present when `return_tensors=True`) -- The
      token ids of the translation.
r   r+   s      r   rf   TranslationPipeline.__call__w  s    < w000r   r   )NN)r   r   r   r   r   r   r   rN   r   r   rk   rF   rf   r   r   r   s   @r   r   r   ;  sV    0  K # 3  ,>+M+MX\gk M ME 1 1r   r   )enumr<   
generationr   tokenization_utilsr   utilsr   r   r   r	   baser   r   
tensorflowr&   models.auto.modeling_tf_autor   models.auto.modeling_autor   
get_loggerr   r   Enumr   r   r   r   r   r   r   <module>r      s      ) 3 T T 4 ^X			H	% 
 ,4@AR( R BRj ,4@AG7 G BGT ,4@AY15 Y1 BY1r   