
    .`i
                         d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ  ee          Z G d d	          ZdS )
    N)
VllmConfig)set_forward_context)init_logger)	get_model)is_mixture_of_experts)SamplingMetadatac                       e Zd ZdZdedej        fdZ	 ddej        de	de
eej        f         ee
eej        f                  z  dz  d	ej        fd
Zdej        d	dfdZ ej                    ded	dfd            ZdS )MedusaProposerz>
    Medusa proposer class for generating token sequences
    vllm_configdevicec                     || _         || _        |j        j        | _        |j        j                                        | _        |j	        j
        | _
        d S N)r   r   scheduler_configmax_num_batched_tokensmax_num_tokensspeculative_configdraft_model_configget_hidden_sizehidden_sizemodel_configdtype)selfr   r   s      n/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/spec_decode/medusa.py__init__zMedusaProposer.__init__   sQ     '):Q*=MMOO 	 !-3


    Ntarget_hidden_statessampling_metadataslot_mappingsreturnc                     |                      |          }| j                             |          }t          j        d |D             d          }|S )Nc                 :    g | ]}|                     d           S )dim)argmax).0logits     r   
<listcomp>z*MedusaProposer.propose.<locals>.<listcomp>3   s&    #M#M#MUELLRL$8$8#M#M#Mr      r#   )modelcompute_logitstorchstack)r   r   r   r   blockslogitsdraft_tokenss          r   proposezMedusaProposer.propose%   sW     011**622 {#M#Mf#M#M#MSTUUUr   target_modelc                    ddl m}  |d          5  t          | j        | j        j        j                  | _        d d d            n# 1 swxY w Y   t          | j                  r| j        j        j	        rJ d            d S d S )Nr   )set_model_tagmedusa_head)r   r   z EPLB for Medusa is not supported)
vllm.compilation.backendsr4   r   r   r   r   r*   r   parallel_configenable_eplb)r   r2   r4   s      r   
load_modelzMedusaProposer.load_model7   s    ;;;;;;]=)) 	 	" ,!-@S  DJ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 "$*--	. 0<	. 	. .	. 	. 
 	. 	. 	. 	.s   +A		AA
num_tokensc                     t          j        | j        | j        f| j        | j                  }t          d | j        |          5  |                     |           d d d            d S # 1 swxY w Y   d S )N)r   r   )r:   )	r,   zerosr   r   r   r   r   r   r*   )r   r:   hidden_statess      r   	dummy_runzMedusaProposer.dummy_runD   s     $"23*;
 
 

 !t'7JOOO 	& 	&JJ}%%%	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	&s   A((A,/A,r   )__name__
__module____qualname____doc__r   r,   r   r   Tensorr   dictstrlistr1   nnModuler9   inference_modeintr>    r   r   r
   r
      s        44 4 4 4 4(  #l , C-.
tC%&
'(
	 
   $.ry .T . . . . U&C &D & & & & & &r   r
   )r,   torch.nnrG   vllm.configr   vllm.forward_contextr   vllm.loggerr    vllm.model_executor.model_loaderr   %vllm.model_executor.models.interfacesr   vllm.v1.sample.metadatar   r?   loggerr
   rK   r   r   <module>rT      s           " " " " " " 4 4 4 4 4 4 # # # # # # 6 6 6 6 6 6 G G G G G G 4 4 4 4 4 4 
X		:& :& :& :& :& :& :& :& :& :&r   