
    -`iS2                     t   d dl mZ d dl mZ d dlmZ d dlmZmZ d dl	Z
d dlZd dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZ  ee          Ze G d d                      Ze G d d                      Z G d d          Z eddddg d          Z ede          Z G d dee                   Z e G d d                      Z! G d de e!                   Z"e G d d                      Z# G d d e e#                   Z$e G d! d"                      Z% G d# d$e e%                   Z&dS )%    )MutableSequence)Sequence)	dataclass)AnyGenericN)TypeVar)init_logger)PromptLogprobsSampleLogprobs)LoRARequest)MultiModalPlaceholderDict)RequestStateStatsc                       e Zd ZU dZeed<   eed<   ee         ed<   edz  ed<   e	dz  ed<   dZ
ej        dz  ed<   dZedz  ed	<   dZeez  dz  ed
<   dZedz  ed<   defdZdefdZdS )CompletionOutputa!  The output data of one completion output of a request.

    Args:
        index: The index of the output in the request.
        text: The generated output text.
        token_ids: The token IDs of the generated output text.
        cumulative_logprob: The cumulative log probability of the generated
            output text.
        logprobs: The log probabilities of the top probability words at each
            position if the logprobs are requested.
        finish_reason: The reason why the sequence is finished.
        stop_reason: The stop string or token id that caused the completion
            to stop, None if the completion finished for some other reason
            including encountering the EOS token.
        lora_request: The LoRA request that was used to generate the output.
    indextext	token_idsNcumulative_logproblogprobsrouted_expertsfinish_reasonstop_reasonlora_requestreturnc                     | j         d uS N)r   selfs    `/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/outputs.pyfinishedzCompletionOutput.finished3   s    !--    c                     d| j          d| j        d| j         d| j         d| j         d| j         d| j         d| j         d	S )
NzCompletionOutput(index=z, text=z, token_ids=z, routed_experts=z, cumulative_logprob=z, logprobs=z, finish_reason=z, stop_reason=))r   r   r   r   r   r   r   r   r   s    r   __repr__zCompletionOutput.__repr__6   s    /dj / /I/ // / #1/ / #'"9	/ /
 / / "// /  +/ / /		
r!   )__name__
__module____qualname____doc__int__annotations__strGenericSequencefloatr   r   npndarrayr   r   r   r   boolr    r$    r!   r   r   r      s          " JJJ
IIIs####$$$t####(,NBJ%,,, $M3:$$$$(KsT!((('+L+$+++.$ . . . .

# 

 

 

 

 

 

r!   r   c                   D    e Zd ZU dZej        ed<   defdZde	de
fdZdS )PoolingOutputznThe output data of one pooling output of a request.

    Args:
        data: The extracted hidden states.
    datar   c                     d| j          dS )NzPoolingOutput(data=r#   )r4   r   s    r   r$   zPoolingOutput.__repr__M   s    1TY1111r!   otherc                     t          || j                  o.t          | j        |j        k                                              S r   )
isinstance	__class__r0   r4   all)r   r6   s     r   __eq__zPoolingOutput.__eq__P   s?    %00 
TY%*$))++6
 6
 	
r!   N)r%   r&   r'   r(   torchTensorr*   r+   r$   objectr0   r;   r1   r!   r   r3   r3   C   sm           ,2# 2 2 2 2
F 
t 
 
 
 
 
 
r!   r3   c                        e Zd ZdZ	 	 	 	 	 dddddededz  dee         dz  dedz  dee         d	e	d
e
dz  dedz  dedz  dee         dz  dedz  dedz  deeef         dz  deddfdZdd de	ddfdZdefdZdS )RequestOutputak  The output data of a completion request to the LLM.

    Args:
        request_id: The unique ID of the request.
        prompt: The prompt string of the request.
                For encoder/decoder models, this is the
                decoder input prompt.
        prompt_token_ids: The token IDs of the prompt.
                          For encoder/decoder models, this is the
                          decoder input prompt token ids.
        prompt_logprobs: The log probabilities to return per prompt token.
        outputs: The output sequences of the request.
        finished: Whether the whole request is finished.
        metrics: Metrics associated with the request.
        lora_request: The LoRA request that was used to generate the output.
        encoder_prompt: The encoder prompt string of the request.
                        None if decoder-only.
        encoder_prompt_token_ids: The token IDs of the encoder prompt.
                                  None if decoder-only.
        num_cached_tokens: The number of tokens with prefix cache hit.
        kv_transfer_params: The params for remote K/V transfer.
    N)multi_modal_placeholderskv_transfer_params
request_idpromptprompt_token_idsprompt_logprobsoutputsr    metricsr   encoder_promptencoder_prompt_token_idsnum_cached_tokensrA   rB   kwargsr   c                   |r(t                               dt          |                     || _        || _        || _        |pi | _        || _        || _        || _	        || _
        || _        |	| _        |
| _        || _        || _        d S )Nz+RequestOutput: Ignoring extra arguments: %s)loggerwarning_oncer+   rC   rD   rE   rA   rF   rG   r    rH   r   rI   rJ   rK   rB   )r   rC   rD   rE   rF   rG   r    rH   r   rI   rJ   rK   rA   rB   rL   s                  r   __init__zRequestOutput.__init__n   s    (  	=s6{{   % 0(@(FB%. (,(@%!2"4r!   next_output	aggregatec                 z   | xj         |j         z  c_         |j        | _        |j        D ]}t          | j                  D ]\  }}|j        |j        k    r|r|xj        |j        z  c_        t          |j        t                    st          |j                  |_        |j        
                    |j                   |j        r(|j        J |j        
                    |j                   |j        |_        |j        |_        |j        |_        n
|| j        |<    n| j                            |           dS )z,Merge subsequent RequestOutput into this oneN)r    rB   rG   	enumerater   r   r8   r   r   listextendr   r   r   r   append)r   rQ   rR   next_completioni
completions         r   addzRequestOutput.add   sY    	--"-"@*2 	5 	5O!*4<!8!8 5 5:#'<<<  :"?+??)**>PP N37
8L3M3MJ0",33O4MNNN*3 Q#-#6#B#B#B&/667OPPP+> #5 4C3P
01@1L
.. +:QE% =( ##O444-	5 	5r!   c                     d| j          d| j        d| j         d| j        d| j         d| j         d| j         d| j         d	| j         d
| j	         d| j
         d| j         dS )NzRequestOutput(request_id=z	, prompt=, prompt_token_ids=z, encoder_prompt=z, encoder_prompt_token_ids=z, prompt_logprobs=
, outputs=, finished=z
, metrics=z, lora_request=, num_cached_tokens=z, multi_modal_placeholders=r#   )rC   rD   rE   rI   rJ   rF   rG   r    rH   r   rK   rA   r   s    r   r$   zRequestOutput.__repr__   s    I I IkI I $ 5I I #1I I )-(E	I I
  $3I I |I I I I |I I !-I I "&!7I I )-(EI I I	
r!   )NNNNN)r%   r&   r'   r(   r+   rU   r)   r
   r   r0   r   r   r   dictr   rP   r[   r$   r1   r!   r   r@   r@   V   sz        > -1+/%)59(,$5 FJ48$5 $5 $5$5 d
$5 s)d*	$5
 ($.$5 &'$5 $5 #T)$5 "D($5 d
$5 #'s)d"2$5 :$5 #<d"B$5 !cNT1$5$ %$5& 
'$5 $5 $5 $5L5 54 5D 5 5 5 5<
# 
 
 
 
 
 
r!   r@    T)rC   rD   rE   rF   rG   r    _O)defaultc            
       @    e Zd ZdZdededee         dedef
dZ	d Z
d	S )
PoolingRequestOutputa  
    The output data of a pooling request to the LLM.

    Args:
        request_id (str): A unique identifier for the pooling request.
        outputs (PoolingOutput): The pooling results for the given input.
        prompt_token_ids (list[int]): A list of token IDs used in the prompt.
        num_cached_tokens: The number of tokens with prefix cache hit.
        finished (bool): A flag indicating whether the pooling is completed.
    rC   rG   rE   rK   r    c                 L    || _         || _        || _        || _        || _        d S r   )rC   rE   rK   r    rG   )r   rC   rG   rE   rK   r    s         r   rP   zPoolingRequestOutput.__init__   s-     % 0!2 r!   c                     t          |           j         d| j        d| j        d| j         d| j         d| j         dS )Nz(request_id=r^   r]   r`   r_   r#   )typer%   rC   rG   rE   rK   r    r   s    r   r$   zPoolingRequestOutput.__repr__   sr    Dzz" ) ) ) )|) ) $ 5) ) "&!7) ) 	) ) )	
r!   N)r%   r&   r'   r(   r+   rc   rU   r)   r0   rP   r$   r1   r!   r   rf   rf      su        	 	  s)	
     
 
 
 
 
r!   rf   c                   n    e Zd ZU dZee         ed<   edefd            Z	e
defd            ZdefdZdS )	EmbeddingOutputzThe output data of one embedding output of a request.

    Args:
        embedding: The embedding vector, which is a list of floats.
            Its length depends on the hidden dimension of the model.
    	embeddingpooling_outputc                     | j         }|j        dk    rt          d          t          |                                          S )N   z,pooled_data should be a 1-D embedding vector)r4   ndim
ValueErrorrk   tolistrm   pooled_datas     r   	from_basezEmbeddingOutput.from_base   sA    $)q  KLLL{1133444r!   r   c                 *    t          | j                  S r   )lenrl   r   s    r   hidden_sizezEmbeddingOutput.hidden_size  s    4>"""r!   c                     d| j          dS )NzEmbeddingOutput(hidden_size=r#   )rx   r   s    r   r$   zEmbeddingOutput.__repr__  s    Ad.>AAAAr!   N)r%   r&   r'   r(   rU   r-   r*   staticmethodr3   ru   propertyr)   rx   r+   r$   r1   r!   r   rk   rk      s           E{5- 5 5 5 \5 #S # # # X#B# B B B B B Br!   rk   c                   *    e Zd Zedefd            ZdS )EmbeddingRequestOutputrequest_outputc                     t          | j        t                              | j                  | j        | j        | j                  S N)rC   rG   rE   rK   r    )r}   rC   rk   ru   rG   rE   rK   r    r~   s    r   ru   z EmbeddingRequestOutput.from_base  sE    %%0#--n.DEE+<,>#,
 
 
 	
r!   Nr%   r&   r'   rz   rf   ru   r1   r!   r   r}   r}     :        
"6 
 
 
 \
 
 
r!   r}   c                   n    e Zd ZU dZee         ed<   edefd            Z	e
defd            ZdefdZdS )	ClassificationOutputzThe output data of one classification output of a request.

    Args:
        probs: The probability vector, which is a list of floats.
            Its length depends on the number of classes.
    probsrm   c                     | j         }|j        dk    rt          d          t          |                                          S )Nro   z.pooled_data should be a 1-D probability vector)r4   rp   rq   r   rr   rs   s     r   ru   zClassificationOutput.from_base&  sC     %)q  MNNN#K$6$6$8$8999r!   r   c                 *    t          | j                  S r   )rw   r   r   s    r   num_classesz ClassificationOutput.num_classes/  s    4:r!   c                     d| j          dS )Nz!ClassificationOutput(num_classes=r#   )r   r   s    r   r$   zClassificationOutput.__repr__3  s    F43CFFFFr!   N)r%   r&   r'   r(   rU   r-   r*   rz   r3   ru   r{   r)   r   r+   r$   r1   r!   r   r   r     s           ;:- : : : \: S    XG# G G G G G Gr!   r   c                   *    e Zd Zedefd            ZdS )ClassificationRequestOutputr~   c                     t          | j        t                              | j                  | j        | j        | j                  S r   )r   rC   r   ru   rG   rE   rK   r    r   s    r   ru   z%ClassificationRequestOutput.from_base8  sE    *%0(22>3IJJ+<,>#,
 
 
 	
r!   Nr   r1   r!   r   r   r   7  r   r!   r   c                   F    e Zd ZU dZeed<   edefd            Zde	fdZ
dS )ScoringOutputzThe output data of one scoring output of a request.

    Args:
        score: The similarity score, which is a scalar value.
    scorerm   c                     | j                                         }|j        dk    rt          d          t	          |                                          S )Nr   z$pooled_data should be a scalar score)r4   squeezerp   rq   r   itemrs   s     r   ru   zScoringOutput.from_baseM  sO    
 %)1133q  CDDD[--//000r!   r   c                     d| j          dS )NzScoringOutput(score=r#   )r   r   s    r   r$   zScoringOutput.__repr__X  s    3dj3333r!   N)r%   r&   r'   r(   r-   r*   rz   r3   ru   r+   r$   r1   r!   r   r   r   C  sk           LLL1- 1 1 1 \14# 4 4 4 4 4 4r!   r   c                   *    e Zd Zedefd            ZdS )ScoringRequestOutputr~   c                     t          | j        t                              | j                  | j        | j        | j                  S r   )r   rC   r   ru   rG   rE   rK   r    r   s    r   ru   zScoringRequestOutput.from_base]  sE    #%0!++N,BCC+<,>#,
 
 
 	
r!   Nr   r1   r!   r   r   r   \  r   r!   r   )'collections.abcr   r   r,   dataclassesr   typingr   r   numpyr.   r<   typing_extensionsr   vllm.loggerr	   vllm.logprobsr
   r   vllm.lora.requestr   vllm.multimodal.inputsr   vllm.v1.metrics.statsr   r%   rN   r   r3   r@   STREAM_FINISHEDrc   rf   rk   r}   r   r   r   r   r1   r!   r   <module>r      s;   , + + + + + 7 7 7 7 7 7 ! ! ! ! ! !              % % % % % % # # # # # # 8 8 8 8 8 8 8 8 ) ) ) ) ) ) < < < < < < 3 3 3 3 3 3	X		 )
 )
 )
 )
 )
 )
 )
 )
X 
 
 
 
 
 
 
 
$j
 j
 j
 j
 j
 j
 j
 j
\  -   WT=)))!
 !
 !
 !
 !
72; !
 !
 !
H B B B B B B B B4	
 	
 	
 	
 	
1/B 	
 	
 	
 G G G G G G G G6	
 	
 	
 	
 	
"67K"L 	
 	
 	
 4 4 4 4 4 4 4 40	
 	
 	
 	
 	
/> 	
 	
 	
 	
 	
r!   