
    -`iK;                     $   d dl Z d dlZd dlmZmZ d dlmZ d dlmZ d dl	Z	d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZmZmZ d d	lmZmZ d d
lmZ d dlmZ  ed           G d d                      Z G d de          Z G d de          ZdS )    N)ABCabstractmethod)	dataclass)	Annotated)Field)ModelConfig)VLLMValidationError)EmbedsPrompt
TextPromptTokensPrompt)get_prompt_componentsparse_raw_prompts)TokenizerLike)AsyncMicrobatchTokenizerT)frozenc                       e Zd ZU dZdZedz  ed<   	 dZedz  ed<   	 dZe	ed<   	 dZ
edz  ed<   	 dZe	dz  ed	<   	 d
ededz  fdZdS )RenderConfigz2Configuration to control how prompts are prepared.N
max_lengthtruncate_prompt_tokensTadd_special_tokens
cache_saltFneeds_detokenizationmodel_configreturnc                     | j         }||dk    r|S |dk     r|j        }| j        }|||k    rt          d|d|d          |S )z:Validate and normalize `truncate_prompt_tokens` parameter.Nr   ztruncate_prompt_tokens=z# cannot be greater than max_length=z*. Please select a smaller truncation size.)r   max_model_lenr   
ValueError)selfr   r   r   s       m/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/entrypoints/renderer.pyverify_truncate_prompt_tokensz*RenderConfig.verify_truncate_prompt_tokens,   s    !%!<!)-Cq-H-H))!A%%%1%?"_
!&<z&I&IK) K KK K K  
 &%    )__name__
__module____qualname____doc__r   int__annotations__r   r   boolr   strr   r   r     r!   r   r   r      s         <<!Jd
!!!9 *.C$J---2  $###K!Jd
!!!6(-$+---H&+ &#PT* & & & & & &r!   r   c                       e Zd ZdZ	 ddededz  f fdZedee	e         z  e	e
         z  e	e	e
                  z  dede	e         fd	            Zeddd
dee	e         z  e	e
         z  e	e	e
                  z  dz  dee	e         z  dz  dede	eez           fd            Z	 	 ddee	e         z  dee
 ed          f         dz  dedz  de	e         fdZ xZS )BaseRenderera  
    Base class for unified input processing and rendering.

    The Renderer serves as a unified input processor that consolidates
    tokenization, chat template formatting, and multimodal input handling
    into a single component.
    It converts high-level API requests (OpenAI-style JSON) into token IDs and
    multimodal features ready for engine consumption.

    Key responsibilities:
    - Convert text prompts to token sequences with proper special tokens
    - Apply chat templates and format conversations
    - Handle multimodal inputs (images, audio, etc.) when applicable
    - Manage prompt truncation and length validation
    - Provide clean separation between API layer and engine core
    Nr   	tokenizerc                 d    t                                                       || _        || _        d S N)super__init__r   r-   )r   r   r-   	__class__s      r   r1   zBaseRenderer.__init__Q   s.    
 	("r!   prompt_or_promptsconfigr   c                   K   t           )aL  
        Convert text or token inputs into engine-ready TokensPrompt objects.

        This method accepts text or token inputs and produces a
        list of [`TokensPrompt`][vllm.inputs.data.TokensPrompt] objects
        for the engine.

        Args:
            prompt_or_prompts: One of:
                - `str`: Single text prompt.
                - `list[str]`: Batch of text prompts.
                - `list[int]`: Single pre-tokenized sequence.
                - `list[list[int]]`: Batch of pre-tokenized sequences.
            config: Render configuration controlling how prompts are prepared
                (e.g., tokenization and length handling).

        Returns:
            list[TokensPrompt]: Engine-ready token prompts.

        Raises:
            ValueError: If input formats are invalid or length limits exceeded.
        NotImplementedError)r   r3   r4   s      r   render_promptzBaseRenderer.render_promptZ   s      : "!r!   r3   prompt_embedsr:   c                   K   t           )a  
        Convert text/token and/or base64-encoded embeddings inputs into
        engine-ready prompt objects using a unified RenderConfig.

        At least one of `prompt_or_prompts` or `prompt_embeds` must be
        provided and non-empty. If both are omitted or empty (e.g., empty
        string and empty list), a `ValueError` is raised.

        Args:
            prompt_or_prompts: Text or token inputs to include.
            prompt_embeds: Base64-encoded bytes (or list thereof) containing a
                torch-saved tensor to be used as prompt embeddings.
            config: Render configuration controlling how prompts are prepared
                (e.g., tokenization and length handling).

        Returns:
            list[Union[TokensPrompt, EmbedsPrompt]]:
                Engine-ready prompt objects.

        Raises:
            ValueError: If both `prompt_or_prompts` and `prompt_embeds`
                are omitted or empty (decoder prompt cannot be empty), or if
                length limits are exceeded.
        r6   )r   r3   r:   r4   s       r   render_prompt_and_embedsz%BaseRenderer.render_prompt_and_embedsy   s      @ "!r!   r   r   )ger   c                     | j         j        st          dd          dt          dt          ffdt          |t                    rfd|D             S  |          gS )z@Load and validate base64-encoded embeddings into prompt objects.z?You must set `--enable-prompt-embeds` to input `prompt_embeds`.r:   )	parameterembedr   c           	         t           j                                        5  t          j        t	          j        t          j        | d                    dt          j        d                    }t          |t           j
                  r*|j        t           j        t           j        t           j        fv sJ |                                }d d d            n# 1 swxY w Y   |                                dk    r/|                    d          }|                                dk    sJ | d          }t%          |          }|d<   |S )	NT)validatecpu)weights_onlymap_location   r   )r:   r   )torchsparsecheck_sparse_tensor_invariantsloadioBytesIOpybase64	b64decodedevice
isinstanceTensordtypefloat32bfloat16float16to_densedimsqueezer
   )r@   tensorembeds_promptr   r   s      r   _load_and_validate_embedzABaseRenderer.load_prompt_embeds.<locals>._load_and_validate_embed   s    <<>> + +Jx1%$GGGHH!%!&e!4!4  
 "&%,77 FLMNMM = = = 
  **+ + + + + + + + + + + + + + + zz||a**zz||q((((%1!7 7 8 89(v>>>M%.8l+  s   B*CCCc                 &    g | ]} |          S r*   r*   ).0r@   r[   s     r   
<listcomp>z3BaseRenderer.load_prompt_embeds.<locals>.<listcomp>   s%    OOO,,U33OOOr!   )r   enable_prompt_embedsr	   bytesr
   rP   list)r   r:   r   r   r[   s     ``@r   load_prompt_embedszBaseRenderer.load_prompt_embeds   s      5 	%Q)   
	!E 	!l 	! 	! 	! 	! 	! 	! 	!2 mT** 	POOOOOOOO((7788r!   r/   NN)r"   r#   r$   r%   r   r   r1   r   r)   ra   r&   r   r   r8   r`   r
   r<   r   r   rb   __classcell__r2   s   @r   r,   r,   ?   s        ( +/# #!# !4'# # # # # # " c?T#Y6d3iH" 	"
 
l	" " " ^"<  SW48	" " " c?T#Y6d3iH4O" tE{*T1	"
 " 
l\)	*" " " ^"H FJ!%	)9 )9tE{*)9 !*#uu{{{*: ;d B)9 $J	)9
 
l	)9 )9 )9 )9 )9 )9 )9 )9r!   r,   c                   `    e Zd Z	 	 ddededz  deeef         dz  f fdZdee	e         z  e	e
         z  e	e	e
                  z  dede	e         fd	Zddd
dee	e         z  e	e
         z  e	e	e
                  z  dz  dee	e         z  dz  dede	eez           fdZde	e
         de
dz  de	e
         fdZdeez  dede
dz  defdZdede
dz  de
dz  dededz  defdZ	 dde	e
         de
dz  de
dz  dedz  dedz  defdZdefdZ	 	 	 dde	e
         de
dz  dedz  dedz  def
dZ xZS ) CompletionRendererNr   r-   async_tokenizer_poolc                 h    t                                          ||           || _        d | _        d S r/   )r0   r1   rh   async_tokenizer)r   r   r-   rh   r2   s       r   r1   zCompletionRenderer.__init__   s5     	y111$8!@Dr!   r3   r4   r   c                    K                         j                  dk    rg S  fdt          |          D             }t          j        |  d{V S )zImplementation of prompt rendering for completion-style requests.

        Uses async tokenizer pooling for improved performance. See base class
        for detailed parameter documentation.
        r   c              3   H   K   | ]}                     |           V  dS ))r4   r   N)_create_prompt)r]   prompt_inputr4   r   r   s     r   	<genexpr>z3CompletionRenderer.render_prompt.<locals>.<genexpr>   sW       
 
  '=    
 
 
 
 
 
r!   N)r    r   r   asynciogather)r   r3   r4   tasksr   s   ` ` @r   r8   z CompletionRenderer.render_prompt   s       "(!E!EdFW!X!X!Q&&I
 
 
 
 
 
 !22C D D
 
 
 ^U++++++++r!   r9   r:   c                ,  K   |                     | j                  }|dk    rg S g }|/|                    |                     |||j                             ||dk    r|S |                     ||           d{V }|                    |           |S )z
        Render text/token prompts and/or precomputed embedding prompts. At
        least one of `prompt_or_prompts` or `prompt_embeds` must be provided.
        r   N )r3   r4   )r    r   extendrb   r   r8   )r   r3   r:   r4   r   renderedtoken_promptss          r   r<   z+CompletionRenderer.render_prompt_and_embeds   s       "(!E!EdFW!X!X!Q&&I68$OO''!#96;L   
 $(9R(?(?O"00/ 1 
 
 
 
 
 
 
 
 	&&&r!   	token_idsr   c                 J    ||S |t          |          k    r|S || d         S )z#Apply truncation to token sequence.N)len)r   rx   r   s      r   _maybe_apply_truncationz*CompletionRenderer._maybe_apply_truncation  s<     ")!S^^33001122r!   rn   c                    K   t          |          \  }}}|.|                     ||j        ||j        |j                   d {V S |.|                     ||j        ||j        |j                   d {V S t          r/   )r   _create_prompt_from_token_idsr   r   r   _create_prompt_from_textr   r7   )r   rn   r4   r   promptprompt_token_ids_s          r   rm   z!CompletionRenderer._create_prompt  s       '<L&I&I# !' ;; !&!+         66!&)!         "!r!   textr   r   r   c                 >  K   |                                  }| j        j        4| j        j                            dd          r|                                }| |||           d{V }n |||d|           d{V }|                     |j        |||          S )z#Tokenize text input asynchronously.Ndo_lower_caseF)r   T)r   
truncationr   )_get_async_tokenizerr   encoder_configgetlower_create_tokens_prompt	input_ids)r   r   r   r   r   r   rj   encodeds           r   r~   z+CompletionRenderer._create_prompt_from_text9  s       3355 ,8!044_eLL 9 ::<<D ")+ODEWXXXXXXXXXGG+O#51	        G ))z:t
 
 	
r!   Fr   c                    K   |                      ||          }d}|r/|                                 }|                    |           d{V }|                     ||||          S )z:Optionally detokenize token IDs and build a tokens prompt.N)rx   r   r   r   )r{   r   decoder   )r   rx   r   r   r   r   r   rj   s           r   r}   z0CompletionRenderer._create_prompt_from_token_idsZ  s       00<RSS	 	="7799O*11)<<<<<<<<F))!!	 * 
 
 	
r!   c                     | j         }||S | j        }|t          d          | j        t	          |          }n5| j                            |          }|t	          |          }|| j        |<   || _         |S )z0Get or create async tokenizer using shared pool.Nz0No tokenizer available for text input processing)rj   r-   r   rh   r   r   )r   rj   r-   s      r   r   z'CompletionRenderer._get_async_tokenizerq  s    .&""N	OPPP$,6yAAOO"7;;IFFO&":9"E"E7F))4.r!   r   c                     |Ft          |          |k    r3t          d| dt          |           ddt          |                    t          |          }|||d<   |||d	<   |S )
zCreate validated TokensPrompt.Nz'This model's maximum context length is z# tokens. However, your request has z> input tokens. Please reduce the length of the input messages.input_tokens)r?   value)r   r   r   )rz   r	   r   )r   rx   r   r   r   tokens_prompts         r   r   z(CompletionRenderer._create_tokens_prompt  s     !c)nnz&A&A%B* B B-0^^B B B ))nn    %i@@@!*4M,'&,M(#r!   rc   )F)NNN)r"   r#   r$   r   r   dictr   r1   r)   ra   r&   r   r   r8   r`   r
   r<   r{   r   rm   r(   r~   r}   r   r   rd   re   s   @r   rg   rg      s+        +/	E 	E!	E !4'	E #=2J#JK
		E 	E 	E 	E 	E 	E, c?T#Y6d3iH, 	,
 
l	, , , ,: SW48	      c?T#Y6d3iH4O  tE{*T1	 
   
l\)	*       D	3c	3<?$J	3	c	3 	3 	3 	3" </" " !$d
	"
 
" " " ">

 $J
 !$d
	

 !
 $J
 

 
 
 
N -2
 
9
 $J
 !$d
	

 $J
 #Tk
 

 
 
 
.&>    . "&!%! 9 $J $J	
 d
 
       r!   rg   )rp   rK   abcr   r   dataclassesr   typingr   rM   rG   pydanticr   vllm.configr   vllm.exceptionsr	   vllm.inputs.datar
   r   r   vllm.inputs.parser   r   vllm.tokenizersr   vllm.utils.async_utilsr   r   r,   rg   r*   r!   r   <module>r      s    				 # # # # # # # # ! ! ! ! ! !               # # # # # # / / / / / / C C C C C C C C C C F F F F F F F F ) ) ) ) ) ) ; ; ; ; ; ; $%& %& %& %& %& %& %& %&PE9 E9 E9 E9 E93 E9 E9 E9PT T T T T T T T T Tr!   