
    .`iR                         d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ ddlmZ  G d	 d
ej                  Z G d dej                  ZdS )    )IterableN)
VllmConfig)LogitsProcessor)ParallelLMHead)default_weight_loader   )maybe_prefixc                   T     e Zd Zdedededdf fdZdej        dej        fdZ xZ	S )	ResidualBlockconfighidden_size
num_layersreturnNc                     t                                                       t          j        fdt	          |          D                       | _        t          j                    | _        d S )Nc                 \    g | ](}t          j        t          d d                    )S )medusa_fc_biasF)bias)nnLineargetattr).0_r   r   s     u/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/medusa.py
<listcomp>z*ResidualBlock.__init__.<locals>.<listcomp>   sR         	 )95AA        )super__init__r   
ModuleListrangelayersSiLUact)selfr   r   r   	__class__s    `` r   r   zResidualBlock.__init__   sv    m     z**  	
 	
 799r   xc                 \    | j         D ]#}||                      ||                    z   }$|S )N)r    r"   )r#   r%   layers      r   forwardzResidualBlock.forward#   s8    [ 	' 	'EDHHUU1XX&&&AAr   )
__name__
__module____qualname__r   intr   torchTensorr(   __classcell__r$   s   @r   r   r      s        z   QU       %,        r   r   c                        e Zd ZdZdddededdf fdZd	ej        de	ej                 fd
Z
d	e	ej                 de	ej                 fdZdeeeej        f                  dee         fdZ xZS )Medusaaq  This class implements the Medusa draft model from the paper: https://arxiv.org/abs/2401.10774
    Reference implementation: https://github.com/FasterDecoding/Medusa

    Differences from reference implementation:
    1. Currently this only supports generating proposals from top-1 tokens.
    2. We have an optional token_map which reduces draft vocab to most
       frequently used tokens to give some additional speed-up by reducing
       sampling overhead. This is disabled unless the checkpoint file has
       explicit token_map tensor and config has an optional attribute
       truncated_vocab_size < vocab_size. To use this technique, one has to find
       the top-k most frequent tokens in target dataset and add that as a tensor
       in the draft checkpoint (using key token_map). Also, the draft config
       needs to have truncated_vocab_size (=k) as an attribute. prefixvllm_configr5   r   Nc                    |j         j        j        t                                                        _        t          j         fdt           j        j	                  D                        _
        j         _        j         _        t          dd          rZt           j        j        t#          d                     _         fdt           j        j	                  D              _        n=t          j        fdt           j        j	                  D                        _        t          dd	          }t)          j         j        |           _        d  _        d S )
Nc                 \    g | ](}t          j        j        j        j                   )S ))r   r   r   )r   r   r   num_hidden_layers)r   r   r   r#   s     r   r   z#Medusa.__init__.<locals>.<listcomp>=   sM         ! $ 7#{<    r   original_lm_headFlm_headr4   c                     g | ]	}j         
S  )r;   )r   r   r#   s     r   r   z#Medusa.__init__.<locals>.<listcomp>O   s    PPPaT\PPPr   c                 j    g | ]/}t          j        j        t          d |                     0S )z	lm_heads.r4   )r   
vocab_sizer   r	   )r   ir   r5   s     r   r   z#Medusa.__init__.<locals>.<listcomp>R   sY         #)*+FOOODD    r   logit_scaleg      ?)speculative_configdraft_model_config	hf_configr   r   r   r   r   r   	num_headsblocksr?   orig_vocab_sizetruncated_vocab_sizer   r   r   r	   r;   lm_headsr   logits_processor	token_map)r#   r6   r5   rA   r   r$   s   ` ` @r   r   zMedusa.__init__8   s   /BLm     t{455  	
 	
  &0$*$?!6-u55 	))"#FI66  DL
 QPPP59N3O3OPPPDMMM     #4;#899  	 	DM fmS99 /t8+!
 !
 r   hidden_statesc                 *    fd| j         D             S )Nc                 &    g | ]} |          S r=   r=   )r   blockrL   s     r   r   z"Medusa.forward.<locals>.<listcomp>j   s#    >>>m$$>>>r   )rF   )r#   rL   s    `r   r(   zMedusa.forwardi   s    >>>>$+>>>>r   c           
         g }t          || j                  D ]\  }}|                     ||          }|t          |          dk    sJ 3| j        |                    |           P|                    t          j         t          j        g |j	        d d         | j
        R |j        |j                  z             ||d         d| j        f<   |S )Nr   )sizedevicedtype.)ziprI   rJ   lenrK   appendr-   infonesshaperG   rS   rT   )r#   rL   
logits_lsthsr;   _logitss         r   compute_logitszMedusa.compute_logitsl   s     *,
}dm<< 	> 	>KB++GR88G :!++++~%!!'****!!YJjHw}SbS1H43GHH&~%m     7>
2sDN233r   weightsc                 "   t          |                                           }t                      }i }|D ]x\  }}|                    dd          }|dk    r,| j        | j        k     rt          j        |d          | _        M||v r|||<   Wt          | j
        dd          r|dk    r||d<   y|                                D ]}\  }}d	|v r5| j        .|j        d
         | j        j        d
         k    r|| j                 }||         }t          |dt                    } |||           |                    |           ~| j        0| j                            | j        d
         j        j                   | j        | j        k    s	| j        J |S )Nzmedusa_heads.r3   rK   F)requires_gradr:   zlm_heads.0.weightzlm_head.weightr;   r   weight_loader)rS   )dictnamed_parameterssetreplacerH   rG   r   	ParameterrK   r   r   itemsrZ   r   addtorI   weightrS   )	r#   r_   params_dictloaded_paramsweights_mapnameloaded_weightparamrb   s	            r   load_weightszMedusa.load_weights   s   4002233"%%%#* 	> 	>D-<<44D{"",t/CCC%'\-u%U%U%UDN$$$1D!!%7??>///0=,-#.#4#4#6#6 	$ 	$D-T!!N.!'*T^-A!-DDD -dn =%E#E?<QRRMM%///d####>%NT]1%5%<%CDDD)T-AAAN&&
 r   )r)   r*   r+   __doc__r   strr   r-   r.   listr(   r^   r   tuplere   rr   r/   r0   s   @r   r2   r2   )   s       C C BD / / /z /3 / / / / / / /b?U\ ?d5<6H ? ? ? ?EL) 
el	   >(HU33D-E$F (3s8 ( ( ( ( ( ( ( (r   r2   )collections.abcr   r-   torch.nnr   vllm.configr   +vllm.model_executor.layers.logits_processorr   3vllm.model_executor.layers.vocab_parallel_embeddingr   -vllm.model_executor.model_loader.weight_utilsr   utilsr	   Moduler   r2   r=   r   r   <module>r      s	   % $ $ $ $ $        " " " " " " G G G G G G      P O O O O O          BI   ,J J J J JRY J J J J Jr   