
    .`i                     @    d dl Z d dlmZ d dlmZ  G d d          ZdS )    N)
VllmConfig)
InputBatchc                       e Zd ZdZdefdZ	 ddedeee                  de	e
ej        f         ee	e
ej        f                  z  dz  deee                  fd	Zd
 ZdS )SuffixDecodingProposerz
    Speculative decoding proposer for Suffix Decoding (https://arxiv.org/pdf/2411.04975).
    This class imports and uses the official implementation from Arctic Inference
    (https://github.com/snowflakedb/ArcticInference).
    vllm_configc                     |j         }|j        | _        |j        | _        |j        | _        |j        | _        |j        j	        | _	        ddl
m}  ||j        |j                  | _        d S )Nr   )SuffixDecodingCache)max_tree_depthmax_cached_requests)speculative_confignum_speculative_tokenssuffix_decoding_max_tree_depthr
   suffix_decoding_max_spec_factormax_spec_factorsuffix_decoding_min_token_probmin_token_probmodel_configmax_model_len arctic_inference.suffix_decodingr	   #suffix_decoding_max_cached_requestssuffix_cache)selfr   configr	   s       w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/spec_decode/suffix_decoding.py__init__zSuffixDecodingProposer.__init__   s    /&,&C#$C%E$C(5C 	IHHHHH 0/!@ & J
 
 
    Ninput_batchsampled_token_idsslot_mappingsreturnc           
         g }t          |          D ]z\  }}|s|                    g            |j        |         }|j        |         }|| j        k    r|                    g            Y|j        |         }	|| j        j        vra|| j        j        v r| j        	                    |           |j
        |	         }
|j        |	d|
f         }| j                            ||           | j                            ||           t          d|| j        z
            }|j        |||f         }| j                            ||t#          | j        | j        |z
  dz
            | j        | j                  }|                    |j                   || j        j        |j                                        z
  D ]}| j                            |           |S )a   
        Propose speculative tokens for each request in the input batch. Suffix Decoding
        will speculate a dynamic number of tokens for each request every decoding step,
        so each entry in the returned list may have different lengths.
        Nr      )max_spec_tokensr   r   )	enumerateappendreq_idsnum_tokens_no_specr   req_id_to_indexr   active_requestscached_requestsevict_cached_responsenum_prompt_tokenstoken_ids_cpustart_requestadd_active_responsemaxr
   	speculateminr   r   r   	token_idskeysstop_request)r   r   r   r   draft_token_idsisampled_idsreq_id
num_tokensindexr,   prompt_token_idsstartpatterndrafts                  r   proposezSuffixDecodingProposer.propose"   s    ,.'(9:: (	4 (	4NA{ &&r*** (+F$7:JT///&&r***/7ET.>>>T.>>>%;;FCCC$/$A%$H!#.#<UDVEVDV=V#W !//8HIII 11&+FFF :(;;<<E!/53C0CDG%// #/1Cj1PST1T! ! !% 4#2 0  E ""5?3333 -0K0P0P0R0RR	3 	3F **62222r   c                     d S N )r   argskwargss      r   
load_modelz!SuffixDecodingProposer.load_modelb   s    r   rB   )__name__
__module____qualname____doc__r   r   r   listintdictstrtorchTensorr@   rF   rC   r   r   r   r   	   s         
J 
 
 
 
0 > >>  S	?> C-.
tC%&
'(
	> 
d3i> > > >@    r   r   )rO   vllm.configr   vllm.v1.worker.gpu_input_batchr   r   rC   r   r   <module>rS      sp     " " " " " " 5 5 5 5 5 5[ [ [ [ [ [ [ [ [ [r   