
    fTh0                         S SK r S SKJrJr  S SKrSSKJr  SSKJ	r	J
r
  SSKJrJrJr  \
R                  " \5      r " S S	\5      r\	" \" S
S95       " S S\5      5       rg)    N)ListUnion   )TruncationStrategy)add_end_docstringslogging   )ArgumentHandlerChunkPipelinebuild_pipeline_init_argsc                   $    \ rS rSrSrS rS rSrg)%ZeroShotClassificationArgumentHandler   z}
Handles arguments for zero-shot for text classification by turning each possible label into an NLI
premise/hypothesis pair.
c                     [        U[        5      (       aE  UR                  S5       Vs/ s H)  o"R                  5       (       d  M  UR                  5       PM+     nnU$ s  snf )N,)
isinstancestrsplitstrip)selflabelslabels      g/var/www/auris/envauris/lib/python3.13/site-packages/transformers/pipelines/zero_shot_classification.py_parse_labels3ZeroShotClassificationArgumentHandler._parse_labels   sG    fc""17c1BT1Bkkmmekkm1BFT Us   AAc           
      r   [        U5      S:X  d  [        U5      S:X  a  [        S5      eUR                  US   5      U:X  a  [        SR                  U5      5      e[        U[        5      (       a  U/n/ nU H5  nUR                  U Vs/ s H  oeUR                  U5      /PM     sn5        M7     XA4$ s  snf )Nr   z>You must include at least one label and at least one sequence.zThe provided hypothesis_template "{}" was not able to be formatted with the target labels. Make sure the passed template includes formatting syntax such as {{}} where the label should go.)len
ValueErrorformatr   r   extend)r   	sequencesr   hypothesis_templatesequence_pairssequencer   s          r   __call__.ZeroShotClassificationArgumentHandler.__call__   s    v;!s9~2]^^%%fQi04GGw&,-	  i%%"I!H!!^d"e^dUZ.A.H.H.O#P^d"ef " (( #fs   
B4
 N)__name__
__module____qualname____firstlineno____doc__r   r%   __static_attributes__r'       r   r   r      s    

)r.   r   T)has_tokenizerc                      ^  \ rS rSrSr\" 5       4U 4S jjr\S 5       rSS\	R                  4S jrS rS\\\\   4   4U 4S	 jjrSS
 jrS rSS jrSrU =r$ )ZeroShotClassificationPipeline.   a  
NLI-based zero-shot classification pipeline using a `ModelForSequenceClassification` trained on NLI (natural
language inference) tasks. Equivalent of `text-classification` pipelines, but these models don't require a
hardcoded number of potential classes, they can be chosen at runtime. It usually means it's slower but it is
**much** more flexible.

Any combination of sequences and labels can be passed and each combination will be posed as a premise/hypothesis
pair and passed to the pretrained model. Then, the logit for *entailment* is taken as the logit for the candidate
label being valid. Any NLI model can be used, but the id of the *entailment* label must be included in the model
config's :attr:*~transformers.PretrainedConfig.label2id*.

Example:

```python
>>> from transformers import pipeline

>>> oracle = pipeline(model="facebook/bart-large-mnli")
>>> oracle(
...     "I have a problem with my iphone that needs to be resolved asap!!",
...     candidate_labels=["urgent", "not urgent", "phone", "tablet", "computer"],
... )
{'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['urgent', 'phone', 'computer', 'not urgent', 'tablet'], 'scores': [0.504, 0.479, 0.013, 0.003, 0.002]}

>>> oracle(
...     "I have a problem with my iphone that needs to be resolved asap!!",
...     candidate_labels=["english", "german"],
... )
{'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['english', 'german'], 'scores': [0.814, 0.186]}
```

Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

This NLI pipeline can currently be loaded from [`pipeline`] using the following task identifier:
`"zero-shot-classification"`.

The models that this pipeline can use are models that have been fine-tuned on an NLI task. See the up-to-date list
of available models on [huggingface.co/models](https://huggingface.co/models?search=nli).
c                 ~   > Xl         [        TU ]  " U0 UD6  U R                  S:X  a  [        R                  S5        g g )NzFailed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.)_args_parsersuper__init__entailment_idloggerwarning)r   args_parserargskwargs	__class__s       r   r7   'ZeroShotClassificationPipeline.__init__W   s@    '$)&)#NNk $r.   c                     U R                   R                  R                  R                  5        H-  u  pUR	                  5       R                  S5      (       d  M+  Us  $    g)Nentailr4   )modelconfiglabel2iditemslower
startswith)r   r   inds      r   r8   ,ZeroShotClassificationPipeline.entailment_id`   sH    **++44::<JE{{}''11
 = r.   Tc           	      |   U R                   nU R                  R                  c:  [        R	                  S5        U R                  R
                  U R                  l         U R                  UUUUUS9nU$ ! [         a=  nS[        U5      ;   a'  U R                  UUUU[        R                  S9n SnAU$ UeSnAff = f)zU
Parse arguments and tokenize only_first so that hypothesis (label) is not truncated
NzfTokenizer was not supporting padding necessary for zero-shot, attempting to use  `pad_token=eos_token`)add_special_tokensreturn_tensorspadding
truncationz	too short)
	framework	tokenizer	pad_tokenr9   error	eos_token	Exceptionr   r   DO_NOT_TRUNCATE)	r   r#   rM   rK   rN   r=   rL   inputses	            r   _parse_and_tokenize2ZeroShotClassificationPipeline._parse_and_tokenizeg   s     >>##+LL) (,~~'?'?DNN$	^^#5-% $ F2 %  	c!f$ "'9#1#1AA (   !	s   A4 4
B;>0B64B66B;c                     UR                  SS 5      b  US   US'   [        R                  S5        0 nSU;   a!  U R                  R	                  US   5      US'   SU;   a  US   US'   0 nSU;   a  US   US'   U0 U4$ )Nmulti_classmulti_labelzThe `multi_class` argument has been deprecated and renamed to `multi_label`. `multi_class` will be removed in a future version of Transformers.candidate_labelsr"   )getr9   r:   r5   r   )r   r=   preprocess_paramspostprocess_paramss       r   _sanitize_parameters3ZeroShotClassificationPipeline._sanitize_parameters   s    ::mT*6$*=$9F=!NNU '484E4E4S4STZ[mTn4o01 F*7=>S7T34F"06}0E}- "&888r.   r!   c                    > [        U5      S:X  a  O,[        U5      S:X  a  SU;  a	  US   US'   O[        SU 35      e[        TU ]  " U40 UD6$ )aJ  
Classify the sequence(s) given as inputs. See the [`ZeroShotClassificationPipeline`] documentation for more
information.

Args:
    sequences (`str` or `List[str]`):
        The sequence(s) to classify, will be truncated if the model input is too large.
    candidate_labels (`str` or `List[str]`):
        The set of possible class labels to classify each sequence into. Can be a single label, a string of
        comma-separated labels, or a list of labels.
    hypothesis_template (`str`, *optional*, defaults to `"This example is {}."`):
        The template used to turn each label into an NLI-style hypothesis. This template must include a {} or
        similar syntax for the candidate label to be inserted into the template. For example, the default
        template is `"This example is {}."` With the candidate label `"sports"`, this would be fed into the
        model like `"<cls> sequence to classify <sep> This example is sports . <sep>"`. The default template
        works well in many cases, but it may be worthwhile to experiment with different templates depending on
        the task setting.
    multi_label (`bool`, *optional*, defaults to `False`):
        Whether or not multiple candidate labels can be true. If `False`, the scores are normalized such that
        the sum of the label likelihoods for each sequence is 1. If `True`, the labels are considered
        independent and probabilities are normalized for each candidate by doing a softmax of the entailment
        score vs. the contradiction score.

Return:
    A `dict` or a list of `dict`: Each result comes as a dictionary with the following keys:

    - **sequence** (`str`) -- The sequence for which this is the output.
    - **labels** (`List[str]`) -- The labels sorted by order of likelihood.
    - **scores** (`List[float]`) -- The probabilities for each of the labels.
r   r	   r]   z%Unable to understand extra arguments )r   r   r6   r%   )r   r!   r<   r=   r>   s       r   r%   'ZeroShotClassificationPipeline.__call__   s^    H t9>Y!^ 2& @)-aF%&DTFKLLw	4V44r.   c              #      #    U R                  XU5      u  pE[        [        X$5      5       H6  u  nu  pxU R                  U/5      n	UUS   U[	        U5      S-
  :H  S.U	Ev   M8     g 7f)Nr   r	   candidate_labelr$   is_last)r5   	enumerateziprX   r   )
r   rV   r]   r"   r#   r!   irg   sequence_pairmodel_inputs
             r   
preprocess)ZeroShotClassificationPipeline.preprocess   s~     $($5$5fPc$d!3<SAQ=b3c/A/22M?CK $3%aL$4 5 99 	  4ds   A$A&c                    US   nUS   nU R                   R                   Vs0 s H  oDX   _M	     nnU R                  S:X  a  U R                  R                  OU R                  R
                  nS[        R                  " U5      R                  R                  5       ;   a  SUS'   U R                  " S0 UD6nUUUS   S.UEnU$ s  snf )	Nrg   r$   pt	use_cacheFrh   rf   r'   )
rP   model_input_namesrO   rB   forwardcallinspect	signature
parameterskeys)	r   rV   rg   r$   kmodel_inputsmodel_forwardoutputsmodel_outputss	            r   _forward'ZeroShotClassificationPipeline._forward   s     !23*%.2nn.N.NO.N69.NO.2nn.D

**$**//'++M:EEJJLL(-L%**,|,  / i(
 	
  Ps   Cc                 ,   U Vs/ s H  o3S   PM	     nnU Vs/ s H  o3S   PM	     nnU R                   S:X  aF  [        R                  " U Vs/ s H#  ofS   R                  5       R	                  5       PM%     sn5      nO7[        R                  " U Vs/ s H  ofS   R	                  5       PM     sn5      nUR
                  S   n[        U5      n	X-  n
UR                  XS45      nU(       d  [        U5      S:X  a_  U R                  nUS:X  a  SOSnUSX/4   n[        R                  " U5      [        R                  " U5      R                  SS	S
9-  nUS   nOLUSU R                  4   n[        R                  " U5      [        R                  " U5      R                  SS	S
9-  n[        [        US   R                  5       5      5      nUS   U Vs/ s H  nUU   PM
     snUSU4   R                  5       S.$ s  snf s  snf s  snf s  snf s  snf )Nrg   r$   rq   logitsr   r4   r	   .T)keepdims).r	   )r$   r   scores)rO   npconcatenatefloatnumpyshaper   reshaper8   expsumlistreversedargsorttolist)r   r~   r\   r}   r]   r!   outputr   Nnnum_sequencesreshaped_outputsr8   contradiction_identail_contr_logitsr   entail_logitstop_indsrk   s                      r   postprocess*ZeroShotClassificationPipeline.postprocess   s   FSTm7$56mT8EFWZ(	F>>T!^^Ta$bTa&H%5%;%;%=%C%C%ETa$bcF^^M$ZM&H%5%;%;%=M$Z[FLLO !!>>=R*@A#./14 ..M%2a%7rQ"239I8Y3Y"ZVV/0266:M3N3R3RSU`d3R3eeFF^F -S$2D2D-DEMVVM*RVVM-B-F-FrTX-F-YYF!2!2!456!!4<=Hq'*H=Q[)002
 	
1 UF$b$Z* >s   G=H*HHH)r5   )NzThis example is {}.)F)r(   r)   r*   r+   r,   r   r7   propertyr8   r   
ONLY_FIRSTrX   ra   r   r   r   r%   rn   r   r   r-   __classcell__)r>   s   @r   r1   r1   .   su    %N $I#J    '+tPbPmPm(T9$+5d3i(+5Z$
 
r.   r1   )rv   typingr   r   r   r   tokenization_utilsr   utilsr   r   baser
   r   r   
get_loggerr(   r9   r   r1   r'   r.   r   <module>r      sc       3 / J J 
		H	%)O )@ ,4@A]
] ]
 B]
r.   