
    .`i                         d dl Zd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZ  G d d          ZdS )    N)LogprobsMode)SamplingParams)apply_top_k_top_p)get_num_nans)apply_temperaturegumbel_sample)LogitBiasState)compute_topk_logprobs)apply_min_p)SamplerOutput)PenaltiesState)NO_LOGPROBSSamplingStatesc                   .   e Zd Z	 ddededej        defdZdeded	ed
dfdZ	dej
        dej        dej        d
dfdZdej
        dej
        dej        dej
        d
ef
dZdej
        dej
        dej        dej
        d
eej
        ej
        f         f
dZdS )Samplerraw_logprobsmax_num_reqs
vocab_sizedevicelogprobs_modec                     |dvrt          d|           || _        t          j        | _        t          ||          | _        t          |||          | _        t          ||          | _
        d S )N)processed_logprobsr   zUnsupported logprobs_mode: )NotImplementedErrorr   envsVLLM_COMPUTE_NANS_IN_LOGITScompute_nansr   sampling_statesr   penalties_stater	   logit_bias_state)selfr   r   r   r   s        u/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/worker/gpu/sample/sampler.py__init__zSampler.__init__   sx      FFF%&SM&S&STTT* <-lJGG-lJOO .|V D D    req_idx
prompt_lensampling_paramsreturnNc                     | j                             ||           | j                            ||           | j                            |||           d S N)r   add_requestr   r   )r    r$   r%   r&   s       r!   r*   zSampler.add_request&   sW     	((/BBB((/BBB))':OOOOOr#   prefill_token_idsprefill_lensprompt_lensc                     | j                                          | j                            |||           | j                                         d S r)   )r   apply_staged_writesr   r   )r    r+   r,   r-   s       r!   r/   zSampler.apply_staged_writes0   sX     	0022200|[	
 	
 	
 	1133333r#   logitsidx_mappingidx_mapping_npposc                 H   | j         rt          |          nd }|                     ||||          \  }}| j                            |          }|t
          k    r!| j        dk    r|n|}t          |||          }	nd }	t          |	                    dd          |	|          }
|
S )Nr      )sampled_token_idslogprobs_tensorsnum_nans)
r   r   sampler   max_num_logprobsr   r   r
   r   view)r    r0   r1   r2   r3   r9   sampledprocessed_logitsr;   r8   sampler_outputs              r!   __call__zSampler.__call__<   s     ,0+<F<'''$$(KKK%
 %
!!  /@@PP{** %)=== !  
  5V=MwWW# ' &ll2q11-
 
 
 r#   c                    t          j        |t           j                                      |          }| j                            ||||           | j                            |||           t          ||| j	        j
        j                   | j	                            |          }|r t          ||| j	        j        j                   | j	                            |          }|r| j	        j        j        |         nd }| j	                            |          }|r| j	        j        j        |         nd }	|s|rt'          |||	          }t)          ||| j	        j
        j        | j	        j        j        |d          }
|
|fS )N)dtypeF)r   )torch
empty_likefloat32copy_r   apply_logit_biasr   apply_penaltiesr   r   temperaturegpudo_min_pr   min_pdo_top_ktop_kdo_top_ptop_pr   r   seeds)r    r0   r1   r2   r3   rK   rM   rN   rO   rP   r=   s              r!   r:   zSampler.sample`   s    !&>>>DDVLL 	..v{NTWXXX 	,,V[.QQQ 	&+t/C/O/STTT '00@@ 	MT-A-G-KLLL '00@@?GQ$*.{;;T'00@@?GQ$*.{;;T 	=x 	=&vue<<F   ,0 &*#
 
 
 r#   )r   )__name__
__module____qualname__intrC   r   r   r"   r   r*   Tensornpndarrayr/   r   r@   tupler:    r#   r!   r   r      s        '5E EE E 	E
 $E E E E PP P (	P
 
P P P P
4 <
4 j
4 Z	
4
 

4 
4 
4 
4"" \" 
	"
 \" 
" " " "H)) \) 
	)
 \) 
u|U\)	*) ) ) ) ) )r#   r   )numpyrW   rC   	vllm.envsr   vllm.config.modelr   vllm.sampling_paramsr   $vllm.v1.sample.ops.topk_topp_samplerr   !vllm.v1.worker.gpu.metrics.logitsr    vllm.v1.worker.gpu.sample.gumbelr   r   $vllm.v1.worker.gpu.sample.logit_biasr	   !vllm.v1.worker.gpu.sample.logprobr
   vllm.v1.worker.gpu.sample.min_pr    vllm.v1.worker.gpu.sample.outputr   #vllm.v1.worker.gpu.sample.penaltiesr    vllm.v1.worker.gpu.sample.statesr   r   r   rZ   r#   r!   <module>rh      s<              * * * * * * / / / / / / B B B B B B : : : : : : M M M M M M M M ? ? ? ? ? ? C C C C C C 7 7 7 7 7 7 : : : : : : > > > > > > H H H H H H H Ht t t t t t t t t tr#   