
     `iI                        d dl Z d dlZd dlmZmZ ddlmZ ddlmZ ddl	m
Z
mZmZmZ ddlmZmZ  e            r
d dlZdd	lmZ  e            rdd
lmZ  ej        e          Z G d de j                  Z e
 ed                     G d de                      Z e
 ed                     G d de                      Z e
 ed                     G d de                      ZdS )    N)AnyUnion   )GenerationConfig)TruncationStrategy)add_end_docstringsis_tf_availableis_torch_availablelogging   )Pipelinebuild_pipeline_init_args)/TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES),MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMESc                       e Zd ZdZdZdS )
ReturnTyper   r   N)__name__
__module____qualname__TENSORSTEXT     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/pipelines/text2text_generation.pyr   r      s        GDDDr   r   T)has_tokenizerc            	           e Zd ZdZdZdZdZdZdZ e	dd          Z
dZ fdZ	 	 	 	 	 	 dd
ZdededefdZd Zdeeee         f         dedeeeef                  f fdZej        fdZd Zej        dfdZ xZS )Text2TextGenerationPipelinea9  
    Pipeline for text to text generation using seq2seq models.

    Unless the model you're using explicitly sets these generation parameters in its configuration files
    (`generation_config.json`), the following default values will be used:
    - max_new_tokens: 256
    - num_beams: 4

    Example:

    ```python
    >>> from transformers import pipeline

    >>> generator = pipeline(model="mrm8488/t5-base-finetuned-question-generation-ap")
    >>> generator(
    ...     "answer: Manuel context: Manuel has created RuPERTa-base with the support of HF-Transformers and Google"
    ... )
    [{'generated_text': 'question: Who created the RuPERTa-base?'}]
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial). You can pass text
    generation parameters to this pipeline to control stopping criteria, decoding strategy, and more. Learn more about
    text generation parameters in [Text generation strategies](../generation_strategies) and [Text
    generation](text_generation).

    This Text2TextGenerationPipeline pipeline can currently be loaded from [`pipeline`] using the following task
    identifier: `"text2text-generation"`.

    The models that this pipeline can use are models that have been fine-tuned on a translation task. See the
    up-to-date list of available models on
    [huggingface.co/models](https://huggingface.co/models?filter=text2text-generation). For a list of available
    parameters, see the [following
    documentation](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.generation.GenerationMixin.generate)

    Usage:

    ```python
    text2text_generator = pipeline("text2text-generation")
    text2text_generator("question: What is 42 ? context: 42 is the answer to life, the universe and everything")
    ```TF      )max_new_tokens	num_beams	generatedc                      t                      j        |i | |                     | j        dk    rt          nt
                     d S )Ntf)super__init__check_model_type	frameworkr   r   selfargskwargs	__class__s      r   r&   z$Text2TextGenerationPipeline.__init__T   sY    $)&)))~%% <;=	
 	
 	
 	
 	
r   Nc                 v   i }|||d<   |}	i }
|||rt           j        nt           j        }|||
d<   |||
d<   |N| j                            |d          }t          |          dk    rt          j        d           |d         |d	<   | j        
| j        |	d
<   | j	        | j        |	d<   | j	        |	d<   ||	|
fS )N
truncationreturn_typeclean_up_tokenization_spacesF)add_special_tokensr   zStopping on a multiple token sequence is not yet supported on transformers. The first token of the stop sequence will be used as the stop sequence string in the interim.r   eos_token_idassistant_model	tokenizerassistant_tokenizer)
r   r   r   r5   encodelenwarningswarnr4   r6   )r*   return_tensorsreturn_textr0   r1   r/   stop_sequencegenerate_kwargspreprocess_paramsforward_paramspostprocess_paramsstop_sequence_idss               r   _sanitize_parametersz0Text2TextGenerationPipeline._sanitize_parameters]   s    !.8l+(%+*=0>S*,,JOK"0;}-'3A]=>$ $ 5 5mX] 5 ^ ^$%%))b   /@.BON++040DN,-#/*..N;'484LN01 .2DDDr   input_length
min_length
max_lengthc                     dS )j
        Checks whether there might be something wrong with given input with regard to the model.
        Tr   r*   rD   rE   rF   s       r   check_inputsz(Text2TextGenerationPipeline.check_inputs   s	     tr   c                   | j         | j         ndt          |d         t                    r3| j        j        t          d          fd|d         D             f}d}nCt          |d         t                    r|d         z   f}d}nt          d|d          d           | j        |||| j        d	}d
|v r|d
= |S )N r   zOPlease make sure that the tokenizer has a pad_token_id when using a batch inputc                     g | ]}|z   S r   r   ).0argprefixs     r   
<listcomp>zCText2TextGenerationPipeline._parse_and_tokenize.<locals>.<listcomp>   s    555cVc\555r   TFz `args[0]`: zI have the wrong format. The should be either of type `str` or type `list`)paddingr/   r;   token_type_ids)	rP   
isinstancelistr5   pad_token_id
ValueErrorstr	TypeErrorr(   )r*   r/   r+   rR   inputsrP   s        @r   _parse_and_tokenizez/Text2TextGenerationPipeline._parse_and_tokenize   s     $ 7Rd1gt$$ 	~*2 !rsss5555T!W5557DGGQ%% 	T!W$&DGGqtAwqqq    w:^b^lmmmv%%'(r   r+   r,   returnc                      t                      j        |i |}t          |d         t                    rDt	          d |d         D                       r%t	          d |D                       rd |D             S |S )a  
        Generate the output text(s) using text(s) given as inputs.

        Args:
            args (`str` or `list[str]`):
                Input text for the encoder.
            return_tensors (`bool`, *optional*, defaults to `False`):
                Whether or not to include the tensors of predictions (as token indices) in the outputs.
            return_text (`bool`, *optional*, defaults to `True`):
                Whether or not to include the decoded texts in the outputs.
            clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
                Whether or not to clean up the potential extra spaces in the text output.
            truncation (`TruncationStrategy`, *optional*, defaults to `TruncationStrategy.DO_NOT_TRUNCATE`):
                The truncation strategy for the tokenization within the pipeline. `TruncationStrategy.DO_NOT_TRUNCATE`
                (default) will never truncate, but it is sometimes desirable to truncate the input to fit the model's
                max_length instead of throwing an error down the line.
            generate_kwargs:
                Additional keyword arguments to pass along to the generate method of the model (see the generate method
                corresponding to your framework [here](./text_generation)).

        Return:
            A list or a list of list of `dict`: Each result comes as a dictionary with the following keys:

            - **generated_text** (`str`, present when `return_text=True`) -- The generated text.
            - **generated_token_ids** (`torch.Tensor` or `tf.Tensor`, present when `return_tensors=True`) -- The token
              ids of the generated text.
        r   c              3   @   K   | ]}t          |t                    V  d S )N)rT   rX   )rN   els     r   	<genexpr>z7Text2TextGenerationPipeline.__call__.<locals>.<genexpr>   s,      ::BJr3''::::::r   c              3   <   K   | ]}t          |          d k    V  dS )r   N)r8   rN   ress     r   r`   z7Text2TextGenerationPipeline.__call__.<locals>.<genexpr>   s,      44cCHHM444444r   c                     g | ]
}|d          S )r   r   rb   s     r   rQ   z8Text2TextGenerationPipeline.__call__.<locals>.<listcomp>   s    ---sCF---r   )r%   __call__rT   rU   all)r*   r+   r,   resultr-   s       r   re   z$Text2TextGenerationPipeline.__call__   s    : "!42622tAw%%	.::$q':::::	. 44V44444	.
 .-f----r   c                 &     | j         |fd|i|}|S )Nr/   )r[   )r*   rZ   r/   r,   s       r   
preprocessz&Text2TextGenerationPipeline.preprocess   s&    ))&RRZR6RRr   c                 z   | j         dk    r|d         j        \  }}n:| j         dk    r/t          j        |d                                                   \  }}|                     ||                    d| j        j                  |                    d| j        j                             d|vr
| j        |d<    | j	        j
        d
i ||}|j        d         }| j         dk    r  |j        |||z  g|j        dd          R  }n5| j         dk    r*t          j        ||||z  g|j        dd          R           }d	|iS )Npt	input_idsr$   rE   rF   generation_configr   r   
output_idsr   )r(   shaper$   numpyrJ   getrm   rE   rF   modelgeneratereshape)r*   model_inputsr>   in_brD   rn   out_bs          r   _forwardz$Text2TextGenerationPipeline._forward   sp   >T!!!-k!:!@D,,^t##!#,{*C!D!D!J!J!L!LD,d.D.OPPd.D.OPP	
 	
 	
 o55373IO/0(TZ(KK<K?KK
 #>T!!++D%4-W*BRSTSUSUBVWWWJJ^t##Ju}0\zGWXYXZXZG[0\0\]]Jj))r   c                     g }|d         d         D ]k}|t           j        k    r| j         d|i}n7|t           j        k    r'| j         d| j                            |d|          i}|                    |           l|S )Nrn   r   
_token_ids_textT)skip_special_tokensr1   )r   r   return_namer   r5   decodeappend)r*   model_outputsr0   r1   recordsrn   records          r   postprocessz'Text2TextGenerationPipeline.postprocess   s    '5a8 	# 	#Jj000!-999:F
//'...0E0E",05Q 1F 1 1 NN6""""r   )NNNNNN)r   r   r   __doc___pipeline_calls_generate_load_processor_load_image_processor_load_feature_extractor_load_tokenizerr   _default_generation_configr}   r&   rC   intrJ   r[   r   rX   rU   r   dictre   r   DO_NOT_TRUNCATEri   rx   r   r   r   __classcell__r-   s   @r   r   r      s       ' 'R  $O!#O!1!1" " " K
 
 
 
 
 %)(E (E (E (ET # 3      *$eCcN3 $s $tDQTVYQYNG[ $ $ $ $ $ $L -?,N    * * *0 6@_ch        r   r   c                   >     e Zd ZdZdZ fdZdedededefdZ xZ	S )	SummarizationPipelinea  
    Summarize news articles and other documents.

    This summarizing pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"summarization"`.

    The models that this pipeline can use are models that have been fine-tuned on a summarization task, which is
    currently, '*bart-large-cnn*', '*google-t5/t5-small*', '*google-t5/t5-base*', '*google-t5/t5-large*', '*google-t5/t5-3b*', '*google-t5/t5-11b*'. See the up-to-date
    list of available models on [huggingface.co/models](https://huggingface.co/models?filter=summarization). For a list
    of available parameters, see the [following
    documentation](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.generation.GenerationMixin.generate)

    Unless the model you're using explicitly sets these generation parameters in its configuration files
    (`generation_config.json`), the following default values will be used:
    - max_new_tokens: 256
    - num_beams: 4

    Usage:

    ```python
    # use bart in pytorch
    summarizer = pipeline("summarization")
    summarizer("An apple a day, keeps the doctor away", min_length=5, max_length=20)

    # use t5 in tf
    summarizer = pipeline("summarization", model="google-t5/t5-base", tokenizer="google-t5/t5-base", framework="tf")
    summarizer("An apple a day, keeps the doctor away", min_length=5, max_length=20)
    ```summaryc                 6     t                      j        |i |S )a  
        Summarize the text(s) given as inputs.

        Args:
            documents (*str* or `list[str]`):
                One or several articles (or one list of articles) to summarize.
            return_text (`bool`, *optional*, defaults to `True`):
                Whether or not to include the decoded texts in the outputs
            return_tensors (`bool`, *optional*, defaults to `False`):
                Whether or not to include the tensors of predictions (as token indices) in the outputs.
            clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
                Whether or not to clean up the potential extra spaces in the text output.
            generate_kwargs:
                Additional keyword arguments to pass along to the generate method of the model (see the generate method
                corresponding to your framework [here](./text_generation)).

        Return:
            A list or a list of list of `dict`: Each result comes as a dictionary with the following keys:

            - **summary_text** (`str`, present when `return_text=True`) -- The summary of the corresponding input.
            - **summary_token_ids** (`torch.Tensor` or `tf.Tensor`, present when `return_tensors=True`) -- The token
              ids of the summary.
        r%   re   r)   s      r   re   zSummarizationPipeline.__call__  s!    0  uww0000r   rD   rE   rF   r\   c           	          ||k     r!t                               d| d| d           ||k     r)t                               d| d| d|dz   d           d	S d	S )
rH   zYour min_length=z' must be inferior than your max_length=.zYour max_length is set to z , but your input_length is only z. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=r   )NloggerwarningrI   s       r   rJ   z"SummarizationPipeline.check_inputs1  s     
""NNnjnnaknnnooo*$$NNsZ s sYe s s^jno^os s s     %$r   )
r   r   r   r   r}   re   r   boolrJ   r   r   s   @r   r   r      sz         < K1 1 1 1 14 # 3 SW        r   r   c                   d     e Zd ZdZdZdededefdZej        ddd fd	
Z	d fd
	Z
 fdZ xZS )TranslationPipelinea  
    Translates from one language to another.

    This translation pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"translation_xx_to_yy"`.

    The models that this pipeline can use are models that have been fine-tuned on a translation task. See the
    up-to-date list of available models on [huggingface.co/models](https://huggingface.co/models?filter=translation).
    For a list of available parameters, see the [following
    documentation](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.generation.GenerationMixin.generate)

    Unless the model you're using explicitly sets these generation parameters in its configuration files
    (`generation_config.json`), the following default values will be used:
    - max_new_tokens: 256
    - num_beams: 4

    Usage:

    ```python
    en_fr_translator = pipeline("translation_en_to_fr")
    en_fr_translator("How old are you?")
    ```translationrD   rE   rF   c                 Z    |d|z  k    r!t                               d| d| d           dS )Ng?zYour input_length: z" is bigger than 0.9 * max_length: z`. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)Tr   rI   s       r   rJ   z TranslationPipeline.check_inputs\  sX    #
***NN^l ^ ^V` ^ ^ ^   tr   N)r/   src_langtgt_langc                    t          | j        dd           r | j        j        || j        |||dS  t	                      j        |d|iS )N_build_translation_inputs)r;   r/   r   r   r/   )getattrr5   r   r(   r%   r[   )r*   r/   r   r   r+   r-   s        r   ri   zTranslationPipeline.preprocessd  sf    4>#>EE 	M;4>;dnV^iq    /577.LLLLr   c                 "    t                      j        di |\  }}}|||d<   |||d<   |]|[|                    d| j                  }|                    d          }|r)t          |          dk    r|d         |d<   |d         |d<   |||fS )	Nr   r   task_r   r      r   )r%   rC   rq   r   splitr8   )
r*   r   r   r,   r?   r@   rA   r   itemsr-   s
            r   rC   z(TranslationPipeline._sanitize_parametersl  s    @\@\@f@f_e@f@f=>+=,4j),4j) 0::fdi00DJJsOOE 9E

a05a!*-05a!*- .2DDDr   c                 6     t                      j        |i |S )a  
        Translate the text(s) given as inputs.

        Args:
            args (`str` or `list[str]`):
                Texts to be translated.
            return_tensors (`bool`, *optional*, defaults to `False`):
                Whether or not to include the tensors of predictions (as token indices) in the outputs.
            return_text (`bool`, *optional*, defaults to `True`):
                Whether or not to include the decoded texts in the outputs.
            clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
                Whether or not to clean up the potential extra spaces in the text output.
            src_lang (`str`, *optional*):
                The language of the input. Might be required for multilingual models. Will not have any effect for
                single pair translation models
            tgt_lang (`str`, *optional*):
                The language of the desired output. Might be required for multilingual models. Will not have any effect
                for single pair translation models
            generate_kwargs:
                Additional keyword arguments to pass along to the generate method of the model (see the generate method
                corresponding to your framework [here](./text_generation)).

        Return:
            A list or a list of list of `dict`: Each result comes as a dictionary with the following keys:

            - **translation_text** (`str`, present when `return_text=True`) -- The translation.
            - **translation_token_ids** (`torch.Tensor` or `tf.Tensor`, present when `return_tensors=True`) -- The
              token ids of the translation.
        r   r)   s      r   re   zTranslationPipeline.__call__|  s!    <  uww0000r   )NN)r   r   r   r   r}   r   rJ   r   r   ri   rC   re   r   r   s   @r   r   r   @  s         0  K # 3     ,>+MX\gk M M M M M M ME E E E E E 1 1 1 1 1 1 1 1 1r   r   )enumr9   typingr   r   
generationr   tokenization_utilsr   utilsr   r	   r
   r   baser   r   
tensorflowr$   models.auto.modeling_tf_autor   models.auto.modeling_autor   
get_loggerr   r   Enumr   r   r   r   r   r   r   <module>r      s@             ) ) ) ) ) ) 3 3 3 3 3 3 T T T T T T T T T T T T 4 4 4 4 4 4 4 4 ? _^^^^^^ YXXXXXX		H	%	%       
 ,,4@@@AAV V V V V( V V BAVr ,,4@@@AAG G G G G7 G G BAGT ,,4@@@AAY1 Y1 Y1 Y1 Y15 Y1 Y1 BAY1 Y1 Y1r   