o
    ZhtQ                     @   s   d dl Z d dlZd dlZddlmZ ddlmZmZm	Z	m
Z
 ddlmZmZmZmZmZ e	 r;d dlZddlmZmZ e rJd dlZddlmZmZ G d	d
 d
eZeeddG dd deZdS )    N   )GenerationConfig)add_end_docstringsis_tf_availableis_torch_availablerequires_backends   )ArgumentHandlerDatasetPipelinePipelineExceptionbuild_pipeline_init_args),MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES0MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES)/TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES3TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMESc                   @   s   e Zd ZdZdddZdS )%TableQuestionAnsweringArgumentHandlerzB
    Handles arguments for the TableQuestionAnsweringPipeline
    Nc                 K   s`  t | d dd l}|d u rtd|d u rt|tr,|dd ur,|dd ur,|g}nbt|trmt|dkrmtdd |D sLtdd	d |D  |d dd ura|d dd ura|}n-td
|d 	  dt
d urvt|t
s|t|tjr~|S tdt| d||dg}|D ]}t|d |js|d d u rtd||d |d< q|S )Npandasr   z(Keyword argument `table` cannot be None.querytablec                 s   s    | ]}t |tV  qd S N)
isinstancedict.0d r   ^/var/www/auris/lib/python3.10/site-packages/transformers/pipelines/table_question_answering.py	<genexpr>6   s    zATableQuestionAnsweringArgumentHandler.__call__.<locals>.<genexpr>z:Keyword argument `table` should be a list of dict, but is c                 s   s    | ]}t |V  qd S r   )typer   r   r   r   r   8   s    zIf keyword argument `table` is a list of dictionaries, each dictionary should have a `table` and `query` key, but only dictionary has keys z `table` and `query` keys.zZInvalid input. Keyword argument `table` should be either of type `dict` or `list`, but is ))r   r   zTable cannot be None.)r   r   
ValueErrorr   r   getlistlenallkeysr
   typesGeneratorTyper   Z	DataFrame)selfr   r   kwargspdZtqa_pipeline_inputsZtqa_pipeline_inputr   r   r   __call__&   sD   
&$
z.TableQuestionAnsweringArgumentHandler.__call__)NN)__name__
__module____qualname____doc__r,   r   r   r   r   r   !   s    r   T)Zhas_tokenizerc                       sz   e Zd ZdZdZdZeddZe f fdd	Z	dd	 Z
d
d Z fddZdddZdddZdddZdd Z  ZS )TableQuestionAnsweringPipelinea  
    Table Question Answering pipeline using a `ModelForTableQuestionAnswering`. This pipeline is only available in
    PyTorch.

    Unless the model you're using explicitly sets these generation parameters in its configuration files
    (`generation_config.json`), the following default values will be used:
    - max_new_tokens: 256

    Example:

    ```python
    >>> from transformers import pipeline

    >>> oracle = pipeline(model="google/tapas-base-finetuned-wtq")
    >>> table = {
    ...     "Repository": ["Transformers", "Datasets", "Tokenizers"],
    ...     "Stars": ["36542", "4512", "3934"],
    ...     "Contributors": ["651", "77", "34"],
    ...     "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
    ... }
    >>> oracle(query="How many stars does the transformers repository have?", table=table)
    {'answer': 'AVERAGE > 36542', 'coordinates': [(0, 1)], 'cells': ['36542'], 'aggregator': 'AVERAGE'}
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

    This tabular question answering pipeline can currently be loaded from [`pipeline`] using the following task
    identifier: `"table-question-answering"`.

    The models that this pipeline can use are models that have been fine-tuned on a tabular question answering task.
    See the up-to-date list of available models on
    [huggingface.co/models](https://huggingface.co/models?filter=table-question-answering).
    ztable,queryT   )Zmax_new_tokensc                    s   t  j|i | || _| jdkrt }|t n	t }|t	 | 
| tt| jjdd o<tt| jjdd | _t| jjdrJd| _d S d | _d S )Ntfaggregation_labelsZnum_aggregation_labelstapas)super__init___args_parser	frameworkr   copyupdater   r   r   Zcheck_model_typeboolgetattrmodelconfig	aggregatehasattrr   )r)   Zargs_parserargsr*   mapping	__class__r   r   r7      s   


"z'TableQuestionAnsweringPipeline.__init__c                 K   s   | j di |S )Nr   )r>   )r)   inputsr   r   r   batch_inference   s   z.TableQuestionAnsweringPipeline.batch_inferencec                    sh  | j dkr`g }g }d}|d jd }|d | j}|d | j}|d | j}d}	t|D ]}
|dur|	dddf }t|  }||
 }	t|jd D ]@}|	dddf 	 | }|	dddf 	 | d }|	ddd	f 	 | d }|dkr|dkr|dkrt
|||f ||< qTt|tj| j|	dddf< ||
 }||
 }||
 }	| j|d|d|	dd
}|j}| jr||j || tjj|d}|j|tj|jj }tt t| 	 D ]D\}}|	dddf 	 | }|	dddf 	 | d }|	ddd	f 	 | d }|dkr<|dkr<|dkr< ||f | q fdd D }q1tt|d}| jsV|fS |tt|dfS g }g }d}|d jd }|d }|d }|d  }d}	t|D ]}
|dur|	dddf }tj|tj d}||
 }	t|jd D ]D}|	dddf 	 | }|	dddf 	 | d }|	ddd	f 	 | d }|dkr|dkr|dkrt
|||f ||< q||	dddf< ||
 }||
 }||
 }	| jtj!|ddtj!|ddtj!|	ddd
}|j}| jr!||j || t"j#$t"%|t"jt"%|t"j }tt |	}	tt"| 	 D ]E\}}|	dddf 	 | }|	dddf 	 | d }|	ddd	f 	 | d }|dkr|dkr|dkr ||f | qJ fdd D }qt"&t|d}| js|fS |t"&t|dfS )z
        Inference used for models that need to process sequences in a sequential fashion, like the SQA models which
        handle conversational query related to a table.
        ptN	input_idsr   attention_masktoken_type_ids   r   r   )rI   rJ   rK   )logitsc                    $   i | ]}|t  |  d kqS g      ?nparraymeanr   keyZcoords_to_probsr   r   
<dictcomp>      $ zGTableQuestionAnsweringPipeline.sequential_inference.<locals>.<dictcomp>)Zdtype)Zaxisc                    rN   rO   rP   rT   rV   r   r   rW     rX   )'r9   shapetoZdevicerangerQ   Z
zeros_likecpunumpytolistinttorchZ
from_numpyr   longr>   Z	unsqueezerM   r@   appendZlogits_aggregationdistributionsZ	BernoulliZprobsZfloat32collectionsdefaultdictr#   	enumerateZsqueezecattupleZint32Zexpand_dimsr3   mathZsigmoidcastconcat)r)   rF   Z
all_logitsZall_aggregationsZprev_answersZ
batch_sizerI   rJ   rK   Ztoken_type_ids_exampleindexZprev_labels_exampleZmodel_labelsiZ
segment_idZcol_idZrow_idZinput_ids_exampleZattention_mask_exampleoutputsrM   Zdist_per_tokenZprobabilitiespcolrowZlogits_batchr   rV   r   sequential_inference   s   &

"


"z3TableQuestionAnsweringPipeline.sequential_inferencec                    s<   | j |i |}t j|fi |}t|dkr|d S |S )a  
        Answers queries according to a table. The pipeline accepts several types of inputs which are detailed below:

        - `pipeline(table, query)`
        - `pipeline(table, [query])`
        - `pipeline(table=table, query=query)`
        - `pipeline(table=table, query=[query])`
        - `pipeline({"table": table, "query": query})`
        - `pipeline({"table": table, "query": [query]})`
        - `pipeline([{"table": table, "query": query}, {"table": table, "query": query}])`

        The `table` argument should be a dict or a DataFrame built from that dict, containing the whole table:

        Example:

        ```python
        data = {
            "actors": ["brad pitt", "leonardo di caprio", "george clooney"],
            "age": ["56", "45", "59"],
            "number of movies": ["87", "53", "69"],
            "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
        }
        ```

        This dictionary can be passed in as such, or can be converted to a pandas DataFrame:

        Example:

        ```python
        import pandas as pd

        table = pd.DataFrame.from_dict(data)
        ```

        Args:
            table (`pd.DataFrame` or `Dict`):
                Pandas DataFrame or dictionary that will be converted to a DataFrame containing all the table values.
                See above for an example of dictionary.
            query (`str` or `List[str]`):
                Query or list of queries that will be sent to the model alongside the table.
            sequential (`bool`, *optional*, defaults to `False`):
                Whether to do inference sequentially or as a batch. Batching is faster, but models like SQA require the
                inference to be done sequentially to extract relations within sequences, given their conversational
                nature.
            padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`):
                Activates and controls padding. Accepts the following values:

                - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
                  sequence if provided).
                - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
                  acceptable input length for the model if that argument is not provided.
                - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different
                  lengths).

            truncation (`bool`, `str` or [`TapasTruncationStrategy`], *optional*, defaults to `False`):
                Activates and controls truncation. Accepts the following values:

                - `True` or `'drop_rows_to_fit'`: Truncate to a maximum length specified with the argument `max_length`
                  or to the maximum acceptable input length for the model if that argument is not provided. This will
                  truncate row by row, removing rows from the table.
                - `False` or `'do_not_truncate'` (default): No truncation (i.e., can output batch with sequence lengths
                  greater than the model maximum admissible input size).


        Return:
            A dictionary or a list of dictionaries containing results: Each result is a dictionary with the following
            keys:

            - **answer** (`str`) -- The answer of the query given the table. If there is an aggregator, the answer will
              be preceded by `AGGREGATOR >`.
            - **coordinates** (`List[Tuple[int, int]]`) -- Coordinates of the cells of the answers.
            - **cells** (`List[str]`) -- List of strings made up of the answer cell values.
            - **aggregator** (`str`) -- If the model has an aggregator, this returns the aggregator.
        r   r   )r8   r6   r,   r$   )r)   rB   r*   Zpipeline_inputsresultsrD   r   r   r,     s
   Kz'TableQuestionAnsweringPipeline.__call__Nc                 K   s   i }|d ur
||d< |d ur||d< i }|d ur||d< t | dd d ur)| j|d< t | dd d ur;| j|d< | j|d< ||i fS )Npadding
truncation
sequentialassistant_modelassistant_tokenizer	tokenizer)r=   rw   ry   rx   )r)   rv   rt   ru   r*   Zpreprocess_paramsZforward_paramsr   r   r   _sanitize_parametersj  s   



z3TableQuestionAnsweringPipeline._sanitize_parametersc                 C   sv   |d u r| j dkrd}nd}|d |d }}|jrtd|d u s&|dkr*td| j||| j||d	}||d< |S )
Nr5   Zdrop_rows_to_fitZdo_not_truncater   r   ztable is empty zquery is empty)Zreturn_tensorsru   rt   )r   emptyr!   ry   r9   )r)   Zpipeline_inputrv   rt   ru   r   r   rF   r   r   r   
preprocess}  s   
z)TableQuestionAnsweringPipeline.preprocessFc                 K   st   | d}| jdkr|r| jdi |}n| jdi |}nd|vr'| j|d< | jjdi ||}|||d}|S )Nr   r5   generation_config)model_inputsr   rn   r   )popr   rr   rG   r~   r>   generate)r)   r   rv   Zgenerate_kwargsr   rn   model_outputsr   r   r   _forward  s   


z'TableQuestionAnsweringPipeline._forwardc                    sr  |d }|d |d }j dkrjrE|d d \}}j|||}|\}}fddt|D  jjj fddt|D }	n|d	 }j||}|d	 }i  i }	g }
t|D ]6\}}fd
d|D } |d}|	|d}|d	| |fdd|D d}|r||d< |

| q^t|d	krtdndd jj|ddD }
t|
dkr|
S |
d	 S )Nr   r   rn   r5   r   c                    s    i | ]\}}| j jj| qS r   )r>   r?   r4   r   rm   pred)r)   r   r   rW     s     z>TableQuestionAnsweringPipeline.postprocess.<locals>.<dictcomp>c                    s&   i | ]\}}|kr| | d  qS )z > r   r   )aggregatorsno_agg_label_indexr   r   rW     s     r   c                       g | ]} j | qS r   Ziatr   Z
coordinater   r   r   
<listcomp>      z>TableQuestionAnsweringPipeline.postprocess.<locals>.<listcomp>r{   z, c                    r   r   r   r   r   r   r   r     r   )answercoordinatescells
aggregatorzEmpty answerc                 S   s   g | ]}d |iqS )r   r   )r   r   r   r   r   r     s    T)Zskip_special_tokensr   )r   r@   ry   Zconvert_logits_to_predictionsrf   r>   r?   Zno_aggregation_label_indexr"   joinrb   r$   r   Zbatch_decode)r)   r   rF   rn   rM   Z
logits_aggZpredictionsZanswer_coordinates_batchZagg_predictionsZaggregators_prefixZanswersrl   r   r   r   Zaggregator_prefixr   r   )r   r   r)   r   r   postprocess  sF   

z*TableQuestionAnsweringPipeline.postprocess)NNN)NTN)F)r-   r.   r/   r0   Zdefault_input_namesZ_pipeline_calls_generater   Z_default_generation_configr   r7   rG   rr   r,   rz   r}   r   r   __classcell__r   r   rD   r   r1   V   s     " 
R

r1   )rd   r'   r]   rQ   Z
generationr   utilsr   r   r   r   baser	   r
   r   r   r   r`   Zmodels.auto.modeling_autor   r   Z
tensorflowr3   Zmodels.auto.modeling_tf_autor   r   r   r1   r   r   r   r   <module>   s    5