
     `i0                         d dl Z d dlmZ d dlZddlmZ ddlmZm	Z	 ddl
mZmZmZ  e	j        e          Z G d d	e          Z e ed
                     G d de                      ZdS )    N)Union   )TruncationStrategy)add_end_docstringslogging   )ArgumentHandlerChunkPipelinebuild_pipeline_init_argsc                       e Zd ZdZd Zd ZdS )%ZeroShotClassificationArgumentHandlerz
    Handles arguments for zero-shot for text classification by turning each possible label into an NLI
    premise/hypothesis pair.
    c                 n    t          |t                    rd |                    d          D             }|S )Nc                 ^    g | ]*}|                                 |                                 +S  )strip).0labels     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/pipelines/zero_shot_classification.py
<listcomp>zGZeroShotClassificationArgumentHandler._parse_labels.<locals>.<listcomp>   s-    TTTekkmmTekkmmTTT    ,)
isinstancestrsplit)selflabelss     r   _parse_labelsz3ZeroShotClassificationArgumentHandler._parse_labels   s9    fc"" 	UTTc1B1BTTTFr   c                 ^   t          |          dk    st          |          dk    rt          d                              |d                   k    rt          d d          t          |t                    r|g}g }|D ]$|                    fd|D                        %||fS )Nr   z>You must include at least one label and at least one sequence.z"The provided hypothesis_template "z" was not able to be formatted with the target labels. Make sure the passed template includes formatting syntax such as {} where the label should go.c                 >    g | ]}                     |          gS r   )format)r   r   hypothesis_templatesequences     r   r   zBZeroShotClassificationArgumentHandler.__call__.<locals>.<listcomp>'   s.    "e"e"eUZH.A.H.H.O.O#P"e"e"er   )len
ValueErrorr    r   r   extend)r   	sequencesr   r!   sequence_pairsr"   s      ` @r   __call__z.ZeroShotClassificationArgumentHandler.__call__   s    v;;!s9~~22]^^^%%fQi004GGGq5H q q q  
 i%% 	$"I! 	g 	gH!!"e"e"e"e"e^d"e"e"effffy((r   N)__name__
__module____qualname____doc__r   r(   r   r   r   r   r      s<         
  
) ) ) ) )r   r   T)has_tokenizerc                        e Zd ZdZdZdZdZdZ e            f fd	Z	e
d             Zddej        fdZd Zdeeee         f         f fd	ZddZd ZddZ xZS )ZeroShotClassificationPipelinea  
    NLI-based zero-shot classification pipeline using a `ModelForSequenceClassification` trained on NLI (natural
    language inference) tasks. Equivalent of `text-classification` pipelines, but these models don't require a
    hardcoded number of potential classes, they can be chosen at runtime. It usually means it's slower but it is
    **much** more flexible.

    Any combination of sequences and labels can be passed and each combination will be posed as a premise/hypothesis
    pair and passed to the pretrained model. Then, the logit for *entailment* is taken as the logit for the candidate
    label being valid. Any NLI model can be used, but the id of the *entailment* label must be included in the model
    config's :attr:*~transformers.PretrainedConfig.label2id*.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> oracle = pipeline(model="facebook/bart-large-mnli")
    >>> oracle(
    ...     "I have a problem with my iphone that needs to be resolved asap!!",
    ...     candidate_labels=["urgent", "not urgent", "phone", "tablet", "computer"],
    ... )
    {'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['urgent', 'phone', 'computer', 'not urgent', 'tablet'], 'scores': [0.504, 0.479, 0.013, 0.003, 0.002]}

    >>> oracle(
    ...     "I have a problem with my iphone that needs to be resolved asap!!",
    ...     candidate_labels=["english", "german"],
    ... )
    {'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['english', 'german'], 'scores': [0.814, 0.186]}
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

    This NLI pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"zero-shot-classification"`.

    The models that this pipeline can use are models that have been fine-tuned on an NLI task. See the up-to-date list
    of available models on [huggingface.co/models](https://huggingface.co/models?search=nli).
    FTc                     || _          t                      j        di | | j        dk    rt                              d           d S d S )NzFailed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.r   )_args_parsersuper__init__entailment_idloggerwarning)r   args_parserkwargs	__class__s      r   r4   z'ZeroShotClassificationPipeline.__init__Z   sd    '""6"""##NNk     $#r   c                     | j         j        j                                        D ]0\  }}|                                                    d          r|c S 1dS )Nentailr1   )modelconfiglabel2iditemslower
startswith)r   r   inds      r   r5   z,ZeroShotClassificationPipeline.entailment_idc   sZ    *+4::<< 	 	JE3{{}}''11 


rr   c                 b   | j         }| j        j        0t                              d           | j        j        | j        _        	 |                     |||||          }nO# t          $ rB}dt          |          v r%|                     ||||t          j	                  }n|Y d}~nd}~ww xY w|S )ze
        Parse arguments and tokenize only_first so that hypothesis (label) is not truncated
        NzfTokenizer was not supporting padding necessary for zero-shot, attempting to use  `pad_token=eos_token`)add_special_tokensreturn_tensorspadding
truncationz	too short)
	framework	tokenizer	pad_tokenr6   error	eos_token	Exceptionr   r   DO_NOT_TRUNCATE)	r   r'   rG   rE   rH   r9   rF   inputses	            r   _parse_and_tokenizez2ZeroShotClassificationPipeline._parse_and_tokenizej   s     >#+LL)   (,~'?DN$	^^#5-% $  FF  	 	 	c!ff$$ "'9#1#1A (    	$ s   A   
B,*8B''B,c                    |                     d          %|d         |d<   t                              d           i }d|v r#| j                            |d                   |d<   d|v r|d         |d<   i }d|v r|d         |d<   |i |fS )Nmulti_classmulti_labelzThe `multi_class` argument has been deprecated and renamed to `multi_label`. `multi_class` will be removed in a future version of Transformers.candidate_labelsr!   )getr6   r7   r2   r   )r   r9   preprocess_paramspostprocess_paramss       r   _sanitize_parametersz3ZeroShotClassificationPipeline._sanitize_parameters   s    ::m$$0$*=$9F=!NNU   ''484E4S4STZ[mTn4o4o01 F**7=>S7T34F""06}0E}- "&888r   r&   c                     t          |          dk    rn5t          |          dk    rd|vr|d         |d<   nt          d|            t                      j        |fi |S )a  
        Classify the sequence(s) given as inputs. See the [`ZeroShotClassificationPipeline`] documentation for more
        information.

        Args:
            sequences (`str` or `list[str]`):
                The sequence(s) to classify, will be truncated if the model input is too large.
            candidate_labels (`str` or `list[str]`):
                The set of possible class labels to classify each sequence into. Can be a single label, a string of
                comma-separated labels, or a list of labels.
            hypothesis_template (`str`, *optional*, defaults to `"This example is {}."`):
                The template used to turn each label into an NLI-style hypothesis. This template must include a {} or
                similar syntax for the candidate label to be inserted into the template. For example, the default
                template is `"This example is {}."` With the candidate label `"sports"`, this would be fed into the
                model like `"<cls> sequence to classify <sep> This example is sports . <sep>"`. The default template
                works well in many cases, but it may be worthwhile to experiment with different templates depending on
                the task setting.
            multi_label (`bool`, *optional*, defaults to `False`):
                Whether or not multiple candidate labels can be true. If `False`, the scores are normalized such that
                the sum of the label likelihoods for each sequence is 1. If `True`, the labels are considered
                independent and probabilities are normalized for each candidate by doing a softmax of the entailment
                score vs. the contradiction score.

        Return:
            A `dict` or a list of `dict`: Each result comes as a dictionary with the following keys:

            - **sequence** (`str`) -- The sequence for which this is the output.
            - **labels** (`list[str]`) -- The labels sorted by order of likelihood.
            - **scores** (`list[float]`) -- The probabilities for each of the labels.
        r   r   rV   z%Unable to understand extra arguments )r#   r$   r3   r(   )r   r&   argsr9   r:   s       r   r(   z'ZeroShotClassificationPipeline.__call__   sz    H t99>>YY!^^ 2& @ @)-aF%&&KTKKLLLuww	44V444r   NThis example is {}.c              #      K   |                      |||          \  }}t          t          ||                    D ]B\  }\  }}|                     |g          }	||d         |t	          |          dz
  k    d|	V  Cd S )Nr   r   candidate_labelr"   is_last)r2   	enumerateziprR   r#   )
r   rP   rV   r!   r'   r&   ir`   sequence_pairmodel_inputs
             r   
preprocessz)ZeroShotClassificationPipeline.preprocess   s      $($5$5f>NPc$d$d!	3<SAQSa=b=b3c3c 	 	/A/22M?CCK $3%aL$4 5 5 99  	    	 	r   c                    d         }d         }fd| j         j        D             }| j        dk    r| j        j        n| j        j        }dt          j        |          j        v rd|d<    | j        d	i |}||d         d|}|S )
Nr`   r"   c                 "    i | ]}||         S r   r   )r   krP   s     r   
<dictcomp>z;ZeroShotClassificationPipeline._forward.<locals>.<dictcomp>   s    OOO6!9OOOr   pt	use_cacheFra   r_   r   )	rJ   model_input_namesrI   r=   forwardcallinspect	signature
parameters)r   rP   r`   r"   model_inputsmodel_forwardoutputsmodel_outputss    `      r   _forwardz'ZeroShotClassificationPipeline._forward   s     !23*%OOOOdn.NOOO.2n.D.D
**$*/'+M::EEE(-L%$*,,|,,  / i(
 
 	
 r   c                    d |D             d |D             }| j         dk    rt          j        d |D                       }nt          j        d |D                       }|j        d         }t	                    }||z  }|                    ||df          }|st	                    dk    rd| j        }	|	dk    rdnd}
|d	|
|	gf         }t          j        |          t          j        |                              dd
          z  }|d         }nM|d	| j        f         }t          j        |          t          j        |                              dd
          z  }t          t          |d                                                             }|d         fd|D             |d|f                                         dS )Nc                     g | ]
}|d          S )r`   r   r   rv   s     r   r   z>ZeroShotClassificationPipeline.postprocess.<locals>.<listcomp>   s    TTT7G$56TTTr   c                     g | ]
}|d          S )r"   r   r{   s     r   r   z>ZeroShotClassificationPipeline.postprocess.<locals>.<listcomp>   s    FFFWWZ(FFFr   rl   c                 f    g | ].}|d                                                                           /S logits)floatnumpyr   outputs     r   r   z>ZeroShotClassificationPipeline.postprocess.<locals>.<listcomp>   s5    $b$b$b&VH%5%;%;%=%=%C%C%E%E$b$b$br   c                 B    g | ]}|d                                           S r~   )r   r   s     r   r   z>ZeroShotClassificationPipeline.postprocess.<locals>.<listcomp>   s)    $Z$Z$Z&VH%5%;%;%=%=$Z$Z$Zr   r   r1   r   .T)keepdims).r   c                      g | ]
}|         S r   r   )r   rd   rV   s     r   r   z>ZeroShotClassificationPipeline.postprocess.<locals>.<listcomp>  s    ===q'*===r   )r"   r   scores)rI   npconcatenateshaper#   reshaper5   expsumlistreversedargsorttolist)r   rw   rU   r&   r   Nnnum_sequencesreshaped_outputsr5   contradiction_identail_contr_logitsr   entail_logitstop_indsrV   s                  @r   postprocessz*ZeroShotClassificationPipeline.postprocess   s   TTmTTTFFFFF	>T!!^$b$bTa$b$b$bccFF^$Z$ZM$Z$Z$Z[[FLO !!Q!>>=!R*@AA 
	Z#.//144 .M%2a%7%7rrQ"239I=8Y3Y"ZV/0026:M3N3N3R3RSU`d3R3e3eeFF^FF -S$2D-DEMVM**RVM-B-B-F-FrTX-F-Y-YYF!2!2!4!45566!!====H===Q[)0022
 
 	
r   )Nr]   )F)r)   r*   r+   r,   _load_processor_load_image_processor_load_feature_extractor_load_tokenizerr   r4   propertyr5   r   
ONLY_FIRSTrR   rZ   r   r   r   r(   rg   rx   r   __classcell__)r:   s   @r   r/   r/   ,   s#       % %N O!#O#H#H#J#J         X '+tPbPm( ( ( (T9 9 9$+5d3i(+5 +5 +5 +5 +5 +5Z     $
 
 
 
 
 
 
 
r   r/   )rq   typingr   r   r   tokenization_utilsr   utilsr   r   baser	   r
   r   
get_loggerr)   r6   r   r/   r   r   r   <module>r      s              3 3 3 3 3 3 / / / / / / / / J J J J J J J J J J 
	H	%	%) ) ) ) )O ) ) )< ,,4@@@AAb
 b
 b
 b
 b
] b
 b
 BAb
 b
 b
r   