
    .`i(                     B   d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZmZ ddlmZmZmZmZ  ee           Z! G d de          Ze G d dej"                              Z# G d de          Z$dS )    )IterableN)LlamaConfig)support_torch_compile)
VllmConfig)init_logger)ReplicatedLinear)LogitsProcessor)QuantizationConfig)VocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)LlamaDecoderLayerLlamaForCausalLM   )AutoWeightsLoaderget_draft_quant_configmaybe_prefixprocess_eagle_weightc                   V     e Zd Z	 	 d
dededededz  ddf
 fdZdededz  fd	Z	 xZ
S )r    Nvllm_configdisable_input_layernormprefixconfigreturnc                     t                                          |||           |r| `t          j                    | _        d S d S )Nr   r   )super__init__input_layernormnnIdentity)selfr   r   r   r   	__class__s        z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/llama_eagle.pyr   zLlamaDecoderLayer.__init__"   sR     	VFCCC # 	1$#%;==D   	1 	1    c                      t          |          S )z8Use drafter's quantization config instead of verifier's.)r   )r#   r   s     r%   get_quant_configz"LlamaDecoderLayer.get_quant_config1   s    %k222r&   )r   N)__name__
__module____qualname__r   boolstrr   r   r
   r(   __classcell__r$   s   @r%   r   r   !   s        
 %)1 11 "&1 	1
 d"1 
1 1 1 1 1 13J 3;MPT;T 3 3 3 3 3 3 3 3r&   r   c            
            e Zd Zddddedededdf fd	Zd
ej        dej        fdZ	d
ej        dej        dej        de
ej        ej        f         fdZdee
eej        f                  dee         fdZ xZS )
LlamaModelr   r   )r   start_layer_idr   r   r2   r   Nc                T    t                                                       j        j        j         _         j        j         _        t                     _        t           j        j         j        j
        t          d                     _        t          j         fdt           j        j                  D                        _        t%           j        j
        dz   j        j
        dj        j         j        t          d          d           _        d S )Nembed_tokensr   c                 p    g | ]2}t          |d k    t          d|z              j                  3S )r   zlayers.r   )r   r   r   ).0ir   r#   r2   r   s     r%   
<listcomp>z'LlamaModel.__init__.<locals>.<listcomp>M   sd         "F'0N!n:L0N0NOO;	    r&      Ffc)
input_sizeoutput_sizebiasparams_dtypequant_configr   return_bias)r   r   speculative_configdraft_model_config	hf_configr   
vocab_sizer   r@   r   hidden_sizer   r4   r!   
ModuleListrangenum_hidden_layerslayersr   model_configdtyper;   )r#   r   r   r2   r$   s   ````r%   r   zLlamaModel.__init__8   s-    	!4GQ+0 3;??2K"K#77
 
 
 m       t{<==  

 

 #{.2/$17*--
 
 
r&   	input_idsc                 ,    |                      |          S N)r4   r#   rM   s     r%   embed_input_idszLlamaModel.embed_input_idsa   s      +++r&   	positionshidden_statesc                     |                      |          }|                     t          j        ||fd                    }d }| j        D ]} ||||          \  }}||z   }||fS )N)dim)r4   r;   torchcatrJ   )r#   rM   rR   rS   input_embedsresiduallayers          r%   forwardzLlamaModel.forwardd   s     ((33	<*GR P P PQQ[ 	 	E&+e' '#M88
 &0m++r&   weightsc                    g d}t          |                                           }t                      }|D ]&\  }}| j        ~| j                            |          x}rb||         }t          |dt                    }	|                                dk    r|n|d         } |	||           |                    |           d|v sd|v rt          ||          }||D ]>\  }
}}||vr|
                    ||
          }||         }|j        }	 |	|||            n*||         }t          |dt                    }	 |	||           |                    |           (|S )N))	.qkv_projz.q_projq)r_   z.k_projk)r_   z.v_projv).gate_up_projz
.gate_projr   )rc   z.up_projr   weight_loaderr   scale
zero_point)dictnamed_parameterssetr@   get_cache_scalegetattrr   rV   addr   replacerd   )r#   r]   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamrd   
param_nameweight_nameshard_ids                r%   load_weightszLlamaModel.load_weightsv   s   "
 "
 "
 4002233"%%%#* 	$ 	$D- ,"/??EEE
 - $J/ '@U V V%2%6%6%8%8A%=%=MM=QRCS  e]333!!*---$,$"6"60{CC<5K 4 41
Kd**||K<<#D) % 3e]H===#D) '@U V Ve]333d####r&   )r)   r*   r+   r   r-   intr   rW   TensorrQ   tupler\   r   ri   rx   r.   r/   s   @r%   r1   r1   6   s#        '
 '
 '
  '
 	'

 '
 
'
 '
 '
 '
 '
 '
R, ,%, , , , ,,<, <, |	,
 
u|U\)	*, , , ,$+HU33D-E$F +3s8 + + + + + + + +r&   r1   c                       e Zd ZdddedefdZdej        dej        fdZ	 ddej        d
ej        dej        dej        d	z  de	ej        ej        f         f
dZ
dee	eej        f                  fdZd	S )EagleLlamaForCausalLMr   r5   r   r   c                   t           j                            |            |j        j        j        | _        t          | j        dd           "t          | j        dd           }|| j        _        |j	        
                    |j                  }t          |d|          | _        t          | j        dd          }t          | j        j        |          | _        d S )Ndraft_vocab_sizerE   model)r   r   r2   logit_scaleg      ?)re   )r!   Moduler   rB   rC   rD   r   rk   r   rK   get_num_layersparallel_configr1   r   r	   rE   logits_processor)r#   r   r   base_vocab_sizetarget_layer_numr   s         r%   r   zEagleLlamaForCausalLM.__init__   s    
	4   !4GQ 4; 2D99A%dk<FFO+:DK(&3BB'
 
  #GDT
 
 

 dk=#>> /K"+!
 !
 !
r&   rM   r   c                 6    | j                             |          S rO   )r   rQ   rP   s     r%   rQ   z%EagleLlamaForCausalLM.embed_input_ids   s    z)))444r&   NrR   rS   inputs_embedsc                 |    |$t          t          |           j         d          |                     |||          S )Nz( does not support multimodal inputs yet.)NotImplementedErrortyper)   r   )r#   rM   rR   rS   r   s        r%   r\   zEagleLlamaForCausalLM.forward   sI     $%::&PPP   zz)Y>>>r&   r]   c                 z      fd}t           d           }|                    t          ||                     d S )Nc                 H    | \  }}d|vrd|z   }t          |           ||fS )Nlm_headzmodel.)r   )inputsrq   rr   r#   s      r%   	transformz5EagleLlamaForCausalLM.load_weights.<locals>.transform   s<    "(D-$$$ t,,,&&r&   )skip_prefixes)r   rx   map)r#   r]   r   loaders   `   r%   rx   z"EagleLlamaForCausalLM.load_weights   s^    	' 	' 	' 	' 	' #
 
 
 	C	73344444r&   rO   )r)   r*   r+   r   r-   r   rW   rz   rQ   r{   r\   r   rx    r&   r%   r}   r}      s        AC 
 
 
z 
3 
 
 
 
(5 5%, 5 5 5 5 .2? ?<? <? |	?
 |d*? 
u|U\)	*? ? ? ?5HU33D-E$F 5 5 5 5 5 5r&   r}   )%collections.abcr   rW   torch.nnr!   transformersr   vllm.compilation.decoratorsr   vllm.configr   vllm.loggerr   !vllm.model_executor.layers.linearr   +vllm.model_executor.layers.logits_processorr	   3vllm.model_executor.layers.quantization.base_configr
   3vllm.model_executor.layers.vocab_parallel_embeddingr   -vllm.model_executor.model_loader.weight_utilsr   r    vllm.model_executor.models.llamar   r   utilsr   r   r   r   r)   loggerr   r1   r}   r   r&   r%   <module>r      s   % $ $ $ $ $        $ $ $ $ $ $ = = = = = = " " " " " " # # # # # # > > > > > > G G G G G G R R R R R R V V V V V V        Q P P P P P P P            
X		3 3 3 3 3) 3 3 3* j j j j j j j jZ15 15 15 15 15, 15 15 15 15 15r&   