
    .`i                      0   U d dl mZmZ d dlmZmZ d dlmZmZm	Z	 d dl
Zd dlZd dlmZ d dlmZ erd dlmZ d dlmZ neZeZ G d	 d
e          Z G d de          Zej        eej                 z  eej        dz           z  Ze	ed<   e G d d                      Ze G d d                      Ze G d d                      Ze G d d                      Z G d de          Z e G d d                      Z!dddefdZ" eg i           Z#dS )    )ABCabstractmethod)	dataclassfield)TYPE_CHECKING
NamedTuple	TypeAliasN)CUDAGraphStat)SchedulerOutput)KVConnectorKVEvents)KVConnectorStatsc                   |    e Zd ZU ej        ed<   ej        ed<   ej        ed<   dZee         dz  ed<   dedefdZ	dS )	LogprobsListslogprob_token_idslogprobssampled_token_ranksNcu_num_generated_tokensreq_idxnum_positionsc                     | j         | j         |         }||z   }t          | j        ||         | j        ||         | j        ||         d           S N)r   r   r   r   r   )selfr   r   end_idxs       c/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/outputs.pyslice_requestzLogprobsLists.slice_request#   sc    '327;GM)"77?3M''/*$WW_5	
 
 	
    )
__name__
__module____qualname__npndarray__annotations__r   listintr    r   r   r   r      s~         z!!!j###
 15T#Y-444	
S 	
 	
 	
 	
 	
 	
 	
r   r   c                       e Zd ZU ej        ed<   ej        ed<   ej        ed<   ddee         dz  fdZddZ	d	ej        dd fd
Z
edededd fd            ZdS )LogprobsTensorsr   r   selected_token_ranksNr   c                    t          | j                                                                        | j                                                                        | j                                                                        |          S r   )r   r   cpunumpyr   r(   )r   r   s     r   tolistszLogprobsTensors.tolists7   sp    "&&((..00M%%''%))++1133#	
 
 	
r   returnc                     | j         j        j        dk    r| S t          | j                             dd          | j                            dd          | j                            dd                    S )Nr*   T)non_blocking)r   devicetyper'   tor   r(   r   s    r   to_cpu_nonblockingz"LogprobsTensors.to_cpu_nonblocking?   sw    !(-66K"%%e$%??MU66%((T(BB
 
 	
r   maskc                 f    t          | j        |         | j        |         | j        |                   S )z5Filter the logprobs tensors with the given bool mask.)r'   r   r   r(   )r   r5   s     r   filterzLogprobsTensors.filterH   s4    "4(M$%d+
 
 	
r   r   num_tokens_per_positionc                     t          j        | |ft           j        d          }t          j        |t           j                  }t          j        | t           j        d          }t          |||          S )z$Create empty LogprobsTensors on CPU.r*   )dtyper0   )r:   )r   r   r(   )torchemptyint32
empty_likefloat32r'   )r   r8   r   r   r(   s        r   	empty_cpuzLogprobsTensors.empty_cpuP   s     "K34EKPU
 
 
 #$5U]KKK${U 
  
  
 /!5
 
 
 	
r   r   )r-   r'   )r   r   r   r;   Tensorr"   r#   r$   r,   r4   r7   staticmethodr@   r%   r   r   r'   r'   /   s         |###l,&&&
 
tCy4/? 
 
 
 

 
 
 

5< 
,= 
 
 
 
 

58
	
 
 
 \
 
 
r   r'   PoolerOutputc                   4    e Zd ZU ej        ed<   edz  ed<   dS )SamplerOutputsampled_token_idsNlogprobs_tensors)r   r   r   r;   rA   r"   r'   r%   r   r   rE   rE   i   s3          |###%,,,,,,r   rE   c                       e Zd ZU dZee         dz  ed<   dZee         dz  ed<   dZe	dz  ed<   dZ
edz  ed<    ee          Zee         ed<   dZeed	<   d
 ZdS )KVConnectorOutputNfinished_sendingfinished_recvingkv_connector_statskv_cache_eventsdefault_factoryinvalid_block_idsr   expected_finished_countc                 R    | j          o| j         o| j         o| j         o| j         S r   )rJ   rK   rL   rM   rP   r3   s    r   is_emptyzKVConnectorOutput.is_empty   sL    %% +))++++ ((+ **	
r   )r   r   r   rJ   setstrr"   rK   rL   r   rM   r   r   rP   r$   rQ   rS   r%   r   r   rI   rI   s   s          )-c#ho,,,(,c#ho,,,26(4/66626O(4/666 #(%"<"<"<s3x<<< $%S$$$
 
 
 
 
r   rI   c                   P    e Zd ZU dZee         dz  ed<   dZee         dz  ed<   dS )ECConnectorOutputNrJ   rK   )r   r   r   rJ   rT   rU   r"   rK   r%   r   r   rW   rW      sF          )-c#ho,,,(,c#ho,,,,,r   rW   c                   V   e Zd ZU ee         ed<   eeef         ed<    ee          Z	eee                  ed<   dZ
edz  ed<    ee          Zeeedz  f         ed<   dZeej        dz           dz  ed<   dZedz  ed	<   dZedz  ed
<   dZeeef         dz  ed<   dZedz  ed<   dS )ModelRunnerOutputreq_idsreq_id_to_indexrN   rF   Nr   prompt_logprobs_dictpooler_outputkv_connector_outputec_connector_outputnum_nans_in_logitscudagraph_stats)r   r   r   r#   rU   r"   dictr$   r   rF   r   r   r\   r'   r]   r;   rA   r^   rI   r_   rW   r`   ra   r
   r%   r   r   rY   rY      s<         #Y#s(^### */t)D)D)DtDIDDD
 &*Hmd"))) ?De? ? ?$sOd$::;   
 7;M4t+,t3:::48*T188848*T1888 15S#X-444 -1O]T)00000r   rY   c                   *    e Zd Zedefd            ZdS )AsyncModelRunnerOutputr-   c                     dS )a  Get the ModelRunnerOutput for this async output.

        This is a blocking call that waits until the results are ready, which
        might involve copying device tensors to the host.
        This method should only be called once per AsyncModelRunnerOutput.
        Nr%   r3   s    r   
get_outputz!AsyncModelRunnerOutput.get_output   s	     	r   N)r   r   r   r   rY   rf   r%   r   r   rd   rd      s:        -    ^  r   rd   c                   H    e Zd ZU ee         ed<   eee                  ed<   dS )DraftTokenIdsrZ   draft_token_idsN)r   r   r   r#   rU   r"   r$   r%   r   r   rh   rh      s8          #Y$s)_$$$$$r   rh   scheduler_outputr   r-   c                     | j         st          S t          | j                                                   }d t	          |          D             }d |D             }d |D             }t          ||||          S )zz
    Create a ModelRunnerOutput stub that contains the correct
    per-request bookkeeping but no generated data yet.
    c                     i | ]\  }}||	S r%   r%   ).0idxrids      r   
<dictcomp>z:make_empty_encoder_model_runner_output.<locals>.<dictcomp>   s    &S&S&SHCsC&S&S&Sr   c                     g | ]}d gS )r   r%   rm   _s     r   
<listcomp>z:make_empty_encoder_model_runner_output.<locals>.<listcomp>   s    )?)?)?!1#)?)?)?r   c                     g | ]}d S r   r%   rr   s     r   rt   z:make_empty_encoder_model_runner_output.<locals>.<listcomp>   s    /F/F/F/F/F/Fr   )rZ   r[   rF   r]   )num_scheduled_tokensEMPTY_MODEL_RUNNER_OUTPUTr#   keys	enumeraterY   )rj   rZ   r[   rF   r]   s        r   &make_empty_encoder_model_runner_outputrz      s     0 )(( .CHHJJKKG 'T&S	'@R@R&S&S&SO *@)?w)?)?)? 0G/Fg/F/F/FM'+#	   r   )rZ   r[   )$abcr   r   dataclassesr   r   typingr   r   r	   r+   r    r;   vllm.compilation.cuda_graphr
   vllm.v1.core.sched.outputr   vllm.distributed.kv_eventsr   4vllm.distributed.kv_transfer.kv_connector.v1.metricsr   objectr   r'   rA   r#   rC   r"   rE   rI   rW   rY   rd   rh   rz   rw   r%   r   r   <module>r      s   $ # # # # # # # # ( ( ( ( ( ( ( ( 7 7 7 7 7 7 7 7 7 7      5 5 5 5 5 5 5 5 5 5 5 5 !>>>>>>UUUUUUU 
 
 
 
 
J 
 
 
22
 2
 2
 2
 2
j 2
 2
 2
n  ,el);;d5<RVCV>WWi W W W - - - - - - - - 
 
 
 
 
 
 
 
4 - - - - - - - - $1 $1 $1 $1 $1 $1 $1 $1P	 	 	 	 	S 	 	 	 % % % % % % % %'   < .-b"MMM   r   