o
    Zh                      @   s  d Z ddlZddlZddlZddlZddlZddlmZ dd Zdd Z	dd	 Z
d
d ZdZdZdd Zdd ZdiddZdZdZdZdZdZdZdZdZdZdZdZd Zd!Zd"Zd#Zeeeeeeeeeeeeeed$Z d%Z!d&Z"eZ#eZ$d'Z%d(Z&d)Z'd)Z(d)Z)d)Z*d)Z+d)Z,eZ-d)Z.d)Z/d)Z0d)Z1eZ2d)Z3d)Z4d)Z5eZ6d)Z7eZ8d)Z9eZ:eZ;eZ<d)Z=d*Z>ed+e!fd,e"fd-e<fd.e#fd/e;fd0e$fd1e>fd2e%fd3e0fd4e&fd5e'fd6e=fd7e2fd8e(fd9e5fd:e3fd;e)fd<e*fd=e1fd>e9fd?e+fd@e6fdAe7fdBe,fdCe-fdDe8fdEe:fdFe4fdGe/fdHe.fgZ?eg dIZ@dJZAdKZBdLZCdMZDdNZEdOZFdPZGdQZHdRZIdSZJdTZKeCeBeAeFeDeGeEeHeIeJeKdUZLdVZMdWZNdVZOdXZPdYZQdZZRd[ZSeOeNeMeRePeQeSd\ZTd]d^ ZUddddd_d`dadddddddbdcddZVdjdedfZWdgdh ZXdS )kz3
Doc utilities: Utilities related to documentation
    N)OrderedDictc                 C   s@   t | rdS t | }| d }t|t|  }d| S )z^Return the indentation level of the start of the docstring of a class or function (or method).   r   )inspectisclass	getsource
splitlineslenlstrip)funcsource
first_lineZfunction_def_level r   E/var/www/auris/lib/python3.10/site-packages/transformers/utils/doc.pyget_docstring_indentation_level   s   

r   c                         fdd}|S )Nc                    s$   d  | jd ur| jnd | _| S N )join__doc__fndocstrr   r   docstring_decorator&       z1add_start_docstrings.<locals>.docstring_decoratorr   r   r   r   r   r   add_start_docstrings%      r   c                     r   )Nc                    s   d| j dd  d}d| d}t|  | jd ur| jnd}ztdd	 | D }t|t|  }W n tyB    }Y nw }|d
  kr_ fddD }t	
t	|d  }d|| }|| | _| S )Nz[`.r   z`]z    The aa   forward method, overrides the `__call__` special method.

    <Tip>

    Although the recipe for forward pass needs to be defined within this function, one should call the [`Module`]
    instance afterwards instead of this since the former takes care of running the pre and post processing steps while
    the latter silently ignores them.

    </Tip>
r   c                 s   s     | ]}|  d kr|V  qdS )r   N)strip.0liner   r   r   	<genexpr>>   s    zUadd_start_docstrings_to_model_forward.<locals>.docstring_decorator.<locals>.<genexpr>r   c                    s"   g | ]}t t |d   qS ) )textwrapindentdedent)r!   docZcorrect_indentationr   r   
<listcomp>G   s   " zVadd_start_docstrings_to_model_forward.<locals>.docstring_decorator.<locals>.<listcomp>r$   )__qualname__splitr   r   nextr   r   r	   StopIterationr%   r&   r'   r   )r   
class_nameintroZcurrent_docZfirst_non_emptyZdoc_indentationdocs	docstringr   r)   r   r   .   s"   
zBadd_start_docstrings_to_model_forward.<locals>.docstring_decoratorr   r   r   r   r   %add_start_docstrings_to_model_forward-   s    r3   c                     r   )Nc                    s$   | j d ur| j ndd  | _ | S r   )r   r   r   r   r   r   r   R   r   z/add_end_docstrings.<locals>.docstring_decoratorr   r   r   r   r   add_end_docstringsQ   r   r4   a:  
    Returns:
        [`{full_output_type}`] or `tuple(torch.FloatTensor)`: A [`{full_output_type}`] or a tuple of
        `torch.FloatTensor` (if `return_dict=False` is passed or when `config.return_dict=False`) comprising various
        elements depending on the configuration ([`{config_class}`]) and inputs.

a*  
    Returns:
        [`{full_output_type}`] or `tuple(tf.Tensor)`: A [`{full_output_type}`] or a tuple of `tf.Tensor` (if
        `return_dict=False` is passed or when `config.return_dict=False`) comprising various elements depending on the
        configuration ([`{config_class}`]) and inputs.

c                 C   s$   t d| }|du rdS | d S )z.Returns the indentation in the first line of tz^(\s*)\SNr   r   )researchgroups)tr6   r   r   r   _get_indentk   s   r9   c                 C   s   t | }g }d}| dD ](}t ||kr*t|dkr$||dd  | d}q||dd  d7 }q||dd  tt|D ]}tdd|| ||< td	d
|| ||< qEd|S )z,Convert output_args_doc to display properly.r   
r   N   z^(\s+)(\S+)(\s+)z\1- **\2**\3z:\s*\n\s*(\S)z -- \1)r9   r,   r   appendranger5   subr   )Zoutput_args_docr&   blocksZcurrent_blockr"   ir   r   r   _convert_output_args_docq   s   
rB   Tc                    s  | j }d}|durU|d}d}|t|k r4td|| du r4|d7 }|t|k r4td|| du s |t|k rJd||d d }t|}n|rUtd| j d|rr| j	 d| j }| j
d	rhtnt}	|	j||d
}	nt| }d| d}	|dur|	d7 }	|	}
|dur|
|7 }
|dur|
d}d}t|| dkr|d7 }t|| dkstt|| }||k rd||    fdd|D }d|}
|
S )zH
    Prepares the return part of the docstring using `output_type`.
    Nr:   r   z^\s*(Args|Parameters):\s*$   z@No `Args` or `Parameters` section is found in the docstring of `zH`. Make sure it has docstring and contain either `Args` or `Parameters`.r   TF)full_output_typeconfig_classz
Returns:
    ``z:
r$   c                    s(   g | ]}t |d kr  | n|qS )r   )r   r    Zto_addr   r   r*      s   ( z._prepare_output_docstrings.<locals>.<listcomp>)r   r,   r   r5   r6   r   rB   
ValueError__name__
__module__
startswithTF_RETURN_INTRODUCTIONPT_RETURN_INTRODUCTIONformatstrr9   )output_typerF   
min_indentZ	add_introZoutput_docstringZparams_docstringlinesrA   rE   r0   resultr&   r   rH   r   _prepare_output_docstrings   sL   
  


rU   aJ  
    <Tip warning={true}>

    This example uses a random model as the real ones are all very big. To get proper results, you should use
    {real_checkpoint} instead of {fake_checkpoint}. If you get out-of-memory when loading that checkpoint, you can try
    adding `device_map="auto"` in the `from_pretrained` call.

    </Tip>
a  
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}
    >>> import torch

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = tokenizer(
    ...     "HuggingFace is a company based in Paris and New York", add_special_tokens=False, return_tensors="pt"
    ... )

    >>> with torch.no_grad():
    ...     logits = model(**inputs).logits

    >>> predicted_token_class_ids = logits.argmax(-1)

    >>> # Note that tokens are classified rather then input words which means that
    >>> # there might be more predicted token classes than words.
    >>> # Multiple token classes might account for the same word
    >>> predicted_tokens_classes = [model.config.id2label[t.item()] for t in predicted_token_class_ids[0]]
    >>> predicted_tokens_classes
    {expected_output}

    >>> labels = predicted_token_class_ids
    >>> loss = model(**inputs, labels=labels).loss
    >>> round(loss.item(), 2)
    {expected_loss}
    ```
a_  
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}
    >>> import torch

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"

    >>> inputs = tokenizer(question, text, return_tensors="pt")
    >>> with torch.no_grad():
    ...     outputs = model(**inputs)

    >>> answer_start_index = outputs.start_logits.argmax()
    >>> answer_end_index = outputs.end_logits.argmax()

    >>> predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
    >>> tokenizer.decode(predict_answer_tokens, skip_special_tokens=True)
    {expected_output}

    >>> # target is "nice puppet"
    >>> target_start_index = torch.tensor([{qa_target_start_index}])
    >>> target_end_index = torch.tensor([{qa_target_end_index}])

    >>> outputs = model(**inputs, start_positions=target_start_index, end_positions=target_end_index)
    >>> loss = outputs.loss
    >>> round(loss.item(), 2)
    {expected_loss}
    ```
a  
    Example of single-label classification:

    ```python
    >>> import torch
    >>> from transformers import AutoTokenizer, {model_class}

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")

    >>> with torch.no_grad():
    ...     logits = model(**inputs).logits

    >>> predicted_class_id = logits.argmax().item()
    >>> model.config.id2label[predicted_class_id]
    {expected_output}

    >>> # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)`
    >>> num_labels = len(model.config.id2label)
    >>> model = {model_class}.from_pretrained("{checkpoint}", num_labels=num_labels)

    >>> labels = torch.tensor([1])
    >>> loss = model(**inputs, labels=labels).loss
    >>> round(loss.item(), 2)
    {expected_loss}
    ```

    Example of multi-label classification:

    ```python
    >>> import torch
    >>> from transformers import AutoTokenizer, {model_class}

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}", problem_type="multi_label_classification")

    >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")

    >>> with torch.no_grad():
    ...     logits = model(**inputs).logits

    >>> predicted_class_ids = torch.arange(0, logits.shape[-1])[torch.sigmoid(logits).squeeze(dim=0) > 0.5]

    >>> # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)`
    >>> num_labels = len(model.config.id2label)
    >>> model = {model_class}.from_pretrained(
    ...     "{checkpoint}", num_labels=num_labels, problem_type="multi_label_classification"
    ... )

    >>> labels = torch.sum(
    ...     torch.nn.functional.one_hot(predicted_class_ids[None, :].clone(), num_classes=num_labels), dim=1
    ... ).to(torch.float)
    >>> loss = model(**inputs, labels=labels).loss
    ```
a   
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}
    >>> import torch

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="pt")

    >>> with torch.no_grad():
    ...     logits = model(**inputs).logits

    >>> # retrieve index of {mask}
    >>> mask_token_index = (inputs.input_ids == tokenizer.mask_token_id)[0].nonzero(as_tuple=True)[0]

    >>> predicted_token_id = logits[0, mask_token_index].argmax(axis=-1)
    >>> tokenizer.decode(predicted_token_id)
    {expected_output}

    >>> labels = tokenizer("The capital of France is Paris.", return_tensors="pt")["input_ids"]
    >>> # mask labels of non-{mask} tokens
    >>> labels = torch.where(inputs.input_ids == tokenizer.mask_token_id, labels, -100)

    >>> outputs = model(**inputs, labels=labels)
    >>> round(outputs.loss.item(), 2)
    {expected_loss}
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}
    >>> import torch

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
    >>> outputs = model(**inputs)

    >>> last_hidden_states = outputs.last_hidden_state
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}
    >>> import torch

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
    >>> choice0 = "It is eaten with a fork and a knife."
    >>> choice1 = "It is eaten while held in the hand."
    >>> labels = torch.tensor(0).unsqueeze(0)  # choice0 is correct (according to Wikipedia ;)), batch size 1

    >>> encoding = tokenizer([prompt, prompt], [choice0, choice1], return_tensors="pt", padding=True)
    >>> outputs = model(**{{k: v.unsqueeze(0) for k, v in encoding.items()}}, labels=labels)  # batch size is 1

    >>> # the linear classifier still needs to be trained
    >>> loss = outputs.loss
    >>> logits = outputs.logits
    ```
a  
    Example:

    ```python
    >>> import torch
    >>> from transformers import AutoTokenizer, {model_class}

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
    >>> outputs = model(**inputs, labels=inputs["input_ids"])
    >>> loss = outputs.loss
    >>> logits = outputs.logits
    ```
aY  
    Example:

    ```python
    >>> from transformers import AutoProcessor, {model_class}
    >>> import torch
    >>> from datasets import load_dataset

    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
    >>> dataset = dataset.sort("id")
    >>> sampling_rate = dataset.features["audio"].sampling_rate

    >>> processor = AutoProcessor.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> # audio file is decoded on the fly
    >>> inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
    >>> with torch.no_grad():
    ...     outputs = model(**inputs)

    >>> last_hidden_states = outputs.last_hidden_state
    >>> list(last_hidden_states.shape)
    {expected_output}
    ```
au  
    Example:

    ```python
    >>> from transformers import AutoProcessor, {model_class}
    >>> from datasets import load_dataset
    >>> import torch

    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
    >>> dataset = dataset.sort("id")
    >>> sampling_rate = dataset.features["audio"].sampling_rate

    >>> processor = AutoProcessor.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> # audio file is decoded on the fly
    >>> inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
    >>> with torch.no_grad():
    ...     logits = model(**inputs).logits
    >>> predicted_ids = torch.argmax(logits, dim=-1)

    >>> # transcribe speech
    >>> transcription = processor.batch_decode(predicted_ids)
    >>> transcription[0]
    {expected_output}

    >>> inputs["labels"] = processor(text=dataset[0]["text"], return_tensors="pt").input_ids

    >>> # compute loss
    >>> loss = model(**inputs).loss
    >>> round(loss.item(), 2)
    {expected_loss}
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoFeatureExtractor, {model_class}
    >>> from datasets import load_dataset
    >>> import torch

    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
    >>> dataset = dataset.sort("id")
    >>> sampling_rate = dataset.features["audio"].sampling_rate

    >>> feature_extractor = AutoFeatureExtractor.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> # audio file is decoded on the fly
    >>> inputs = feature_extractor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")

    >>> with torch.no_grad():
    ...     logits = model(**inputs).logits

    >>> predicted_class_ids = torch.argmax(logits, dim=-1).item()
    >>> predicted_label = model.config.id2label[predicted_class_ids]
    >>> predicted_label
    {expected_output}

    >>> # compute loss - target_label is e.g. "down"
    >>> target_label = model.config.id2label[0]
    >>> inputs["labels"] = torch.tensor([model.config.label2id[target_label]])
    >>> loss = model(**inputs).loss
    >>> round(loss.item(), 2)
    {expected_loss}
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoFeatureExtractor, {model_class}
    >>> from datasets import load_dataset
    >>> import torch

    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
    >>> dataset = dataset.sort("id")
    >>> sampling_rate = dataset.features["audio"].sampling_rate

    >>> feature_extractor = AutoFeatureExtractor.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> # audio file is decoded on the fly
    >>> inputs = feature_extractor(dataset[0]["audio"]["array"], return_tensors="pt", sampling_rate=sampling_rate)
    >>> with torch.no_grad():
    ...     logits = model(**inputs).logits

    >>> probabilities = torch.sigmoid(logits[0])
    >>> # labels is a one-hot array of shape (num_frames, num_speakers)
    >>> labels = (probabilities > 0.5).long()
    >>> labels[0].tolist()
    {expected_output}
    ```
a.  
    Example:

    ```python
    >>> from transformers import AutoFeatureExtractor, {model_class}
    >>> from datasets import load_dataset
    >>> import torch

    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
    >>> dataset = dataset.sort("id")
    >>> sampling_rate = dataset.features["audio"].sampling_rate

    >>> feature_extractor = AutoFeatureExtractor.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> # audio file is decoded on the fly
    >>> inputs = feature_extractor(
    ...     [d["array"] for d in dataset[:2]["audio"]], sampling_rate=sampling_rate, return_tensors="pt", padding=True
    ... )
    >>> with torch.no_grad():
    ...     embeddings = model(**inputs).embeddings

    >>> embeddings = torch.nn.functional.normalize(embeddings, dim=-1).cpu()

    >>> # the resulting embeddings can be used for cosine similarity-based retrieval
    >>> cosine_sim = torch.nn.CosineSimilarity(dim=-1)
    >>> similarity = cosine_sim(embeddings[0], embeddings[1])
    >>> threshold = 0.7  # the optimal threshold is dataset-dependent
    >>> if similarity < threshold:
    ...     print("Speakers are not the same!")
    >>> round(similarity.item(), 2)
    {expected_output}
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoImageProcessor, {model_class}
    >>> import torch
    >>> from datasets import load_dataset

    >>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
    >>> image = dataset["test"]["image"][0]

    >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = image_processor(image, return_tensors="pt")

    >>> with torch.no_grad():
    ...     outputs = model(**inputs)

    >>> last_hidden_states = outputs.last_hidden_state
    >>> list(last_hidden_states.shape)
    {expected_output}
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoImageProcessor, {model_class}
    >>> import torch
    >>> from datasets import load_dataset

    >>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
    >>> image = dataset["test"]["image"][0]

    >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = image_processor(image, return_tensors="pt")

    >>> with torch.no_grad():
    ...     logits = model(**inputs).logits

    >>> # model predicts one of the 1000 ImageNet classes
    >>> predicted_label = logits.argmax(-1).item()
    >>> print(model.config.id2label[predicted_label])
    {expected_output}
    ```
)SequenceClassificationQuestionAnsweringTokenClassificationMultipleChoiceMaskedLMLMHead	BaseModelSpeechBaseModelCTCAudioClassificationAudioFrameClassificationAudioXVectorVisionBaseModelImageClassificationa  
    Example:

    ```python
    >>> from transformers import AutoProcessor, {model_class}, SpeechT5HifiGan

    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> processor = AutoProcessor.from_pretrained("{checkpoint}")
    >>> vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
    >>> inputs = processor(text="Hello, my dog is cute", return_tensors="pt")

    >>> # generate speech
    >>> speech = model.generate(inputs["input_ids"], speaker_embeddings=speaker_embeddings, vocoder=vocoder)
    ```
az  
    Example:

    ```python
    >>> from transformers import AutoProcessor, {model_class}

    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> processor = AutoProcessor.from_pretrained("{checkpoint}")
    >>> inputs = processor(text="Hello, my dog is cute", return_tensors="pt")

    >>> # generate speech
    >>> speech = model(inputs["input_ids"])
    ```
a  
    Example:

    ```python
    >>> from PIL import Image
    >>> import requests
    >>> from transformers import AutoProcessor, {model_class}

    >>> processor = AutoProcessor.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    >>> image = Image.open(requests.get(url, stream=True).raw)

    >>> inputs = processor(images=image, return_tensors="pt")

    >>> outputs = model(**inputs)
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoImageProcessor, {model_class}
    >>> import torch
    >>> from PIL import Image
    >>> import requests

    >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    >>> image = Image.open(requests.get(url, stream=True).raw)

    >>> processor = AutoImageProcessor.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    >>> model.to(device)

    >>> # prepare image for the model
    >>> inputs = processor(images=image, return_tensors="pt").to(device)

    >>> with torch.no_grad():
    ...     outputs = model(**inputs)

    >>> # interpolate to original size
    >>> post_processed_output = processor.post_process_depth_estimation(
    ...     outputs, [(image.height, image.width)],
    ... )
    >>> predicted_depth = post_processed_output[0]["predicted_depth"]
    ```
z%
    Example:

    ```python
    ```
a  
    Example:

    ```python
    >>> from PIL import Image
    >>> import requests
    >>> from transformers import AutoProcessor, {model_class}

    >>> model = {model_class}.from_pretrained("{checkpoint}")
    >>> processor = AutoProcessor.from_pretrained("{checkpoint}")

    >>> messages = [
    ...     {{
    ...         "role": "user", "content": [
    ...             {{"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"}},
    ...             {{"type": "text", "text": "Where is the cat standing?"}},
    ...         ]
    ...     }},
    ... ]

    >>> inputs = processor.apply_chat_template(
    ...     messages,
    ...     tokenizer=True,
    ...     return_dict=True,
    ...     return_tensors="pt",
    ...     add_generation_prompt=True
    ... )
    >>> # Generate
    >>> generate_ids = model.generate(**inputs)
    >>> processor.batch_decode(generate_ids, skip_special_tokens=True)[0]
    ```
text-to-audio-spectrogramtext-to-audio-waveformautomatic-speech-recognitionaudio-frame-classificationaudio-classificationaudio-xvectorimage-text-to-textimage-to-textvisual-question-answeringdepth-estimationvideo-classificationzero-shot-image-classificationimage-classificationzero-shot-object-detectionobject-detectionimage-segmentationimage-to-imageimage-feature-extractiontext-generationtable-question-answeringdocument-question-answeringquestion-answeringtext2text-generationnext-sentence-predictionmultiple-choicetext-classificationtoken-classification	fill-maskmask-generationpretraining))Z+MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING_NAMESrd   )Z(MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING_NAMESre   )Z(MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMESrf   )ZMODEL_FOR_CTC_MAPPING_NAMESrf   )Z2MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING_NAMESrg   )Z,MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMESrh   )Z%MODEL_FOR_AUDIO_XVECTOR_MAPPING_NAMESri   )Z*MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMESrj   )Z$MODEL_FOR_VISION_2_SEQ_MAPPING_NAMESrk   )Z1MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING_NAMESrl   )Z(MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMESrm   )Z,MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMESrn   )Z6MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMESro   )Z,MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMESrp   )Z2MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMESrq   )Z(MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMESrr   )Z*MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMESrs   )Z&MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMESrt   )ZMODEL_FOR_IMAGE_MAPPING_NAMESru   )Z!MODEL_FOR_CAUSAL_LM_MAPPING_NAMESrv   )Z0MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMESrw   )Z3MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMESrx   )Z*MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMESry   )Z,MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMESrz   )Z0MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING_NAMESr{   )Z'MODEL_FOR_MULTIPLE_CHOICE_MAPPING_NAMESr|   )Z/MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMESr}   )Z,MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMESr~   )Z!MODEL_FOR_MASKED_LM_MAPPING_NAMESr   )Z'MODEL_FOR_MASK_GENERATION_MAPPING_NAMESr   )Z#MODEL_FOR_PRETRAINING_MAPPING_NAMESr   aI  
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}
    >>> import tensorflow as tf

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = tokenizer(
    ...     "HuggingFace is a company based in Paris and New York", add_special_tokens=False, return_tensors="tf"
    ... )

    >>> logits = model(**inputs).logits
    >>> predicted_token_class_ids = tf.math.argmax(logits, axis=-1)

    >>> # Note that tokens are classified rather then input words which means that
    >>> # there might be more predicted token classes than words.
    >>> # Multiple token classes might account for the same word
    >>> predicted_tokens_classes = [model.config.id2label[t] for t in predicted_token_class_ids[0].numpy().tolist()]
    >>> predicted_tokens_classes
    {expected_output}
    ```

    ```python
    >>> labels = predicted_token_class_ids
    >>> loss = tf.math.reduce_mean(model(**inputs, labels=labels).loss)
    >>> round(float(loss), 2)
    {expected_loss}
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}
    >>> import tensorflow as tf

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"

    >>> inputs = tokenizer(question, text, return_tensors="tf")
    >>> outputs = model(**inputs)

    >>> answer_start_index = int(tf.math.argmax(outputs.start_logits, axis=-1)[0])
    >>> answer_end_index = int(tf.math.argmax(outputs.end_logits, axis=-1)[0])

    >>> predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
    >>> tokenizer.decode(predict_answer_tokens)
    {expected_output}
    ```

    ```python
    >>> # target is "nice puppet"
    >>> target_start_index = tf.constant([{qa_target_start_index}])
    >>> target_end_index = tf.constant([{qa_target_end_index}])

    >>> outputs = model(**inputs, start_positions=target_start_index, end_positions=target_end_index)
    >>> loss = tf.math.reduce_mean(outputs.loss)
    >>> round(float(loss), 2)
    {expected_loss}
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}
    >>> import tensorflow as tf

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")

    >>> logits = model(**inputs).logits

    >>> predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
    >>> model.config.id2label[predicted_class_id]
    {expected_output}
    ```

    ```python
    >>> # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)`
    >>> num_labels = len(model.config.id2label)
    >>> model = {model_class}.from_pretrained("{checkpoint}", num_labels=num_labels)

    >>> labels = tf.constant(1)
    >>> loss = model(**inputs, labels=labels).loss
    >>> round(float(loss), 2)
    {expected_loss}
    ```
a4  
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}
    >>> import tensorflow as tf

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="tf")
    >>> logits = model(**inputs).logits

    >>> # retrieve index of {mask}
    >>> mask_token_index = tf.where((inputs.input_ids == tokenizer.mask_token_id)[0])
    >>> selected_logits = tf.gather_nd(logits[0], indices=mask_token_index)

    >>> predicted_token_id = tf.math.argmax(selected_logits, axis=-1)
    >>> tokenizer.decode(predicted_token_id)
    {expected_output}
    ```

    ```python
    >>> labels = tokenizer("The capital of France is Paris.", return_tensors="tf")["input_ids"]
    >>> # mask labels of non-{mask} tokens
    >>> labels = tf.where(inputs.input_ids == tokenizer.mask_token_id, labels, -100)

    >>> outputs = model(**inputs, labels=labels)
    >>> round(float(outputs.loss), 2)
    {expected_loss}
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}
    >>> import tensorflow as tf

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
    >>> outputs = model(inputs)

    >>> last_hidden_states = outputs.last_hidden_state
    ```
a#  
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}
    >>> import tensorflow as tf

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
    >>> choice0 = "It is eaten with a fork and a knife."
    >>> choice1 = "It is eaten while held in the hand."

    >>> encoding = tokenizer([prompt, prompt], [choice0, choice1], return_tensors="tf", padding=True)
    >>> inputs = {{k: tf.expand_dims(v, 0) for k, v in encoding.items()}}
    >>> outputs = model(inputs)  # batch size is 1

    >>> # the linear classifier still needs to be trained
    >>> logits = outputs.logits
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}
    >>> import tensorflow as tf

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
    >>> outputs = model(inputs)
    >>> logits = outputs.logits
    ```
a"  
    Example:

    ```python
    >>> from transformers import AutoProcessor, {model_class}
    >>> from datasets import load_dataset

    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
    >>> dataset = dataset.sort("id")
    >>> sampling_rate = dataset.features["audio"].sampling_rate

    >>> processor = AutoProcessor.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> # audio file is decoded on the fly
    >>> inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="tf")
    >>> outputs = model(**inputs)

    >>> last_hidden_states = outputs.last_hidden_state
    >>> list(last_hidden_states.shape)
    {expected_output}
    ```
aw  
    Example:

    ```python
    >>> from transformers import AutoProcessor, {model_class}
    >>> from datasets import load_dataset
    >>> import tensorflow as tf

    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
    >>> dataset = dataset.sort("id")
    >>> sampling_rate = dataset.features["audio"].sampling_rate

    >>> processor = AutoProcessor.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> # audio file is decoded on the fly
    >>> inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="tf")
    >>> logits = model(**inputs).logits
    >>> predicted_ids = tf.math.argmax(logits, axis=-1)

    >>> # transcribe speech
    >>> transcription = processor.batch_decode(predicted_ids)
    >>> transcription[0]
    {expected_output}
    ```

    ```python
    >>> inputs["labels"] = processor(text=dataset[0]["text"], return_tensors="tf").input_ids

    >>> # compute loss
    >>> loss = model(**inputs).loss
    >>> round(float(loss), 2)
    {expected_loss}
    ```
aq  
    Example:

    ```python
    >>> from transformers import AutoImageProcessor, {model_class}
    >>> from datasets import load_dataset

    >>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
    >>> image = dataset["test"]["image"][0]

    >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = image_processor(image, return_tensors="tf")
    >>> outputs = model(**inputs)

    >>> last_hidden_states = outputs.last_hidden_state
    >>> list(last_hidden_states.shape)
    {expected_output}
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoImageProcessor, {model_class}
    >>> import tensorflow as tf
    >>> from datasets import load_dataset

    >>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
    >>> image = dataset["test"]["image"][0]

    >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = image_processor(image, return_tensors="tf")
    >>> logits = model(**inputs).logits

    >>> # model predicts one of the 1000 ImageNet classes
    >>> predicted_label = int(tf.math.argmax(logits, axis=-1))
    >>> print(model.config.id2label[predicted_label])
    {expected_output}
    ```
)rV   rW   rX   rY   rZ   r[   r\   r]   r^   rb   rc   ar  
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="jax")

    >>> outputs = model(**inputs)
    >>> logits = outputs.logits
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
    >>> inputs = tokenizer(question, text, return_tensors="jax")

    >>> outputs = model(**inputs)
    >>> start_scores = outputs.start_logits
    >>> end_scores = outputs.end_logits
    ```
a}  
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="jax")

    >>> outputs = model(**inputs)
    >>> logits = outputs.logits
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="jax")
    >>> outputs = model(**inputs)

    >>> last_hidden_states = outputs.last_hidden_state
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
    >>> choice0 = "It is eaten with a fork and a knife."
    >>> choice1 = "It is eaten while held in the hand."

    >>> encoding = tokenizer([prompt, prompt], [choice0, choice1], return_tensors="jax", padding=True)
    >>> outputs = model(**{{k: v[None, :] for k, v in encoding.items()}})

    >>> logits = outputs.logits
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="np")
    >>> outputs = model(**inputs)

    >>> # retrieve logts for next token
    >>> next_token_logits = outputs.logits[:, -1]
    ```
)rV   rW   rX   rY   rZ   r\   r[   c                 K   sB   |  D ]\}}|durqd| d }td| dd| } q| S )zo
    Removes the lines testing an output with the doctest syntax in a code sample when it's set to `None`.
    N{}z\n([^\n]+)\n\s+z\nr:   )itemsr5   r?   )r2   kwargskeyvalueZdoc_keyr   r   r   filter_outputs_from_example  s   r   z[MASK]      )processor_class
checkpointrQ   rF   maskqa_target_start_indexqa_target_end_index	model_clsmodalityexpected_outputexpected_lossreal_checkpointrevisionc                    s*    	
fdd}|S )Nc                    sf  d u r| j dd n}|d d dkrt}n|d d dkr$t}nt}|	 
 dd}d	|v s<d
|v rEdkrE|d
 }nd	|v rN|d	 }nd|v rW|d }nd|v r`|d }nyd|v ri|d }npd|v sq|dv rv|d }ncd|v s~d|v r|d }nVd|v r|d }nMd|v r|d }nDd|v rdkr|d }n7d|v rdkr|d }n*d|v rdkr|d }nd|v sd|v r|d }nd|v r|d }ntd| t|d}d urt| }| jpd d 	 }d u rd nt
}|jd'i |}d ur*td!rtd" d#|d$  d%d$  d& d%}|| | | _| S )(Nr   r   r<   rD   r   ZFlaxz{true})model_classr   r   r   r   r   r   r   r   Zfake_checkpointtruerV   r_   ZaudiorW   rX   rY   rZ   )ZFlaubertWithLMHeadModelZXLMWithLMHeadModelr[   ZCausalLMr^   r`   ZXVectorra   ZModelr]   Zvisionrb   ZEncoderr\   rc   z#Docstring can't be built for model )r   r   r   z^refs/pr/\\d+zThe provided revision 'zW' is incorrect. It should point to a pull request reference on the hub like 'refs/pr/6'zfrom_pretrained("z")z", revision="r   )r+   r,   TF_SAMPLE_DOCSTRINGSFLAX_SAMPLE_DOCSTRINGSPT_SAMPLE_DOCSTRINGSrI   r   FAKE_MODEL_DISCLAIMERr   r   rU   rO   r5   matchreplace)r   r   Zsample_docstringsZ
doc_kwargsZcode_samplefunc_docZ
output_docZ	built_docr   rF   r   r   r   r   r   r   rQ   r   r   r   r   r   r   r   r     s   















z7add_code_sample_docstrings.<locals>.docstring_decoratorr   )r   r   rQ   rF   r   r   r   r   r   r   r   r   r   r   r   r   r   r   add_code_sample_docstrings  s   &Pr   c                    s    fdd}|S )Nc                    s   | j }|d}d}|t|k r.td|| d u r.|d7 }|t|k r.td|| d u s|t|k rKtt|| }t |d||< d|}n
td|  d| || _ | S )Nr:   r   z^\s*Returns?:\s*$rC   )rR   zThe function ze should have an empty 'Return:' or 'Returns:' in its docstring as placeholder, current docstring is:
)	r   r,   r   r5   r6   r9   rU   r   rI   )r   r   rS   rA   r&   rF   rQ   r   r   r     s"   
  z6replace_return_docstrings.<locals>.docstring_decoratorr   )rQ   rF   r   r   r   r   replace_return_docstrings  s   r   c                 C   s6   t j| j| j| j| j| jd}t|| }| j	|_	|S )zReturns a copy of a function f.)nameZargdefsclosure)
typesFunctionType__code____globals__rJ   __defaults____closure__	functoolsupdate_wrapper__kwdefaults__)fgr   r   r   	copy_func(  s   r   )NT)NN)Yr   r   r   r5   r%   r   collectionsr   r   r   r3   r4   rN   rM   r9   rB   rU   r   ZPT_TOKEN_CLASSIFICATION_SAMPLEZPT_QUESTION_ANSWERING_SAMPLEZ!PT_SEQUENCE_CLASSIFICATION_SAMPLEZPT_MASKED_LM_SAMPLEZPT_BASE_MODEL_SAMPLEZPT_MULTIPLE_CHOICE_SAMPLEZPT_CAUSAL_LM_SAMPLEZPT_SPEECH_BASE_MODEL_SAMPLEZPT_SPEECH_CTC_SAMPLEZPT_SPEECH_SEQ_CLASS_SAMPLEZPT_SPEECH_FRAME_CLASS_SAMPLEZPT_SPEECH_XVECTOR_SAMPLEZPT_VISION_BASE_MODEL_SAMPLEZPT_VISION_SEQ_CLASS_SAMPLEr   Z TEXT_TO_AUDIO_SPECTROGRAM_SAMPLEZTEXT_TO_AUDIO_WAVEFORM_SAMPLEZ!AUDIO_FRAME_CLASSIFICATION_SAMPLEZAUDIO_XVECTOR_SAMPLEZIMAGE_TO_TEXT_SAMPLEZDEPTH_ESTIMATION_SAMPLEZVIDEO_CLASSIFICATION_SAMPLEZ!ZERO_SHOT_OBJECT_DETECTION_SAMPLEZIMAGE_TO_IMAGE_SAMPLEZIMAGE_FEATURE_EXTRACTION_SAMPLEZ"DOCUMENT_QUESTION_ANSWERING_SAMPLEZNEXT_SENTENCE_PREDICTION_SAMPLEZMULTIPLE_CHOICE_SAMPLEZPRETRAINING_SAMPLEZMASK_GENERATION_SAMPLEZ VISUAL_QUESTION_ANSWERING_SAMPLEZTEXT_GENERATION_SAMPLEZIMAGE_CLASSIFICATION_SAMPLEZIMAGE_SEGMENTATION_SAMPLEZFILL_MASK_SAMPLEZOBJECT_DETECTION_SAMPLEZQUESTION_ANSWERING_SAMPLEZTEXT2TEXT_GENERATION_SAMPLEZTEXT_CLASSIFICATION_SAMPLEZTABLE_QUESTION_ANSWERING_SAMPLEZTOKEN_CLASSIFICATION_SAMPLEZAUDIO_CLASSIFICATION_SAMPLEZ#AUTOMATIC_SPEECH_RECOGNITION_SAMPLEZ%ZERO_SHOT_IMAGE_CLASSIFICATION_SAMPLEZ$IMAGE_TEXT_TO_TEXT_GENERATION_SAMPLEZ#PIPELINE_TASKS_TO_SAMPLE_DOCSTRINGSZMODELS_TO_PIPELINEZTF_TOKEN_CLASSIFICATION_SAMPLEZTF_QUESTION_ANSWERING_SAMPLEZ!TF_SEQUENCE_CLASSIFICATION_SAMPLEZTF_MASKED_LM_SAMPLEZTF_BASE_MODEL_SAMPLEZTF_MULTIPLE_CHOICE_SAMPLEZTF_CAUSAL_LM_SAMPLEZTF_SPEECH_BASE_MODEL_SAMPLEZTF_SPEECH_CTC_SAMPLEZTF_VISION_BASE_MODEL_SAMPLEZTF_VISION_SEQ_CLASS_SAMPLEr   Z FLAX_TOKEN_CLASSIFICATION_SAMPLEZFLAX_QUESTION_ANSWERING_SAMPLEZ#FLAX_SEQUENCE_CLASSIFICATION_SAMPLEZFLAX_MASKED_LM_SAMPLEZFLAX_BASE_MODEL_SAMPLEZFLAX_MULTIPLE_CHOICE_SAMPLEZFLAX_CAUSAL_LM_SAMPLEr   r   r   r   r   r   r   r   r   <module>   sN  $		
5!": #$#!"%(!#!$
c