
    .`ic8                        d Z ddlZddlmZ ddlZddlmZ ddlmZ ddlm	Z	 ddl
mZmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ  ddlm!Z" ddlm#Z$ ddl%m&Z&m'Z'm(Z(m)Z)m*Z*  G d dej+                  Z,e	 G d dej+                              Z- G d dej+        eee          Z.dS )zFInference-only EagleMiniCPM model compatible with HuggingFace weights.    N)Iterable)nn)PretrainedConfig)support_torch_compile)CacheConfig
VllmConfig)RMSNorm)LogitsProcessor)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )SupportsEagleSupportsLoRA
SupportsPP)MiniCPMAttention)
MiniCPMMLP)
MiniCPMMoE)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymaybe_prefixprocess_eagle_weightc                        e Zd Z	 	 	 ddededz  dedz  deddf
 fdZd	 Zd
 Z	de
j        de
j        de
j        dz  dee
j        e
j        f         fdZ xZS )EagleMiniCPMDecoderLayerN configcache_configquant_configprefixreturnc                    t                                                       || _        || _        || _        |j        | _        t          |dd          | _        || _        | 	                                 | 
                                 d S )Nmax_position_embeddingsi    )super__init__r   r    r!   hidden_sizegetattrr%   r"   _init_attn_block_init_ffn_block)selfr   r    r!   r"   	__class__s        |/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/minicpm_eagle.pyr'   z!EagleMiniCPMDecoderLayer.__init__<   s     	((!-'.v7PRV'W'W$    c                    t          | j        j        | j        j                  | _        t          | j        | j        j        | j        j        | j        j        | j	        | j
        | j        | j         d          | _        d S )Nepsz
.self_attn)r(   	num_headsnum_kv_headsrope_parametersr%   r    r!   r"   )r	   r   r(   rms_norm_epsinput_layernormEagleMiniCPMAttentionnum_attention_headsnum_key_value_headsr5   r%   r    r!   r"   	self_attnr,   s    r.   r*   z)EagleMiniCPMDecoderLayer._init_attn_blockM   s    &K#)A 
  
  
 /(k58 K7$($@**k---	
 	
 	
r/   c           
         t          | j        j        | j        j                  | _        t          | j        dd          | _        | j        dk    rMt          | j        | j        j        | j        j	        t          | j        dd          | j
                  | _        d S t          | j        j        | j        j        | j        j        | j        j        | j         d          | _        d S )	Nr1   num_expertsr   hidden_act_paramg        )r(   intermediate_size
hidden_actr?   r!   z.mlp)r>   top_kr(   r@   r"   )r	   r   r(   r6   post_attention_layernormr)   r>   EagleMiniCPMMLPr@   rA   r!   mlpEagleMiniCPMMoEnum_experts_per_tokr"   r<   s    r.   r+   z(EagleMiniCPMDecoderLayer._init_ffn_block\   s    (/K#)A)
 )
 )
% #4;qAAq  & ,"&+"?;1!(6H#!N!N!.  DHHH ' K3k5 K3"&+"?++++  DHHHr/   	positionshidden_statesresidualc                    |}|                      |          }|                     ||          }||| j        j        t	          j        | j        j                  z  z  z   }|}|                     |          }|                     |          }||| j        j        t	          j        | j        j                  z  z  z   }|d fS )N)rH   rI   )	r7   r;   r   scale_depthmathsqrtmup_denominatorrC   rE   )r,   rH   rI   rJ   s       r.   forwardz EagleMiniCPMDecoderLayer.forwardr   s     !,,];;' ' 
 
 !=K#di0K&L&LL$
 

 !55mDD// =K#di0K&L&LL$
 
 d""r/   )NNr   )__name__
__module____qualname__r   r   r   strr'   r*   r+   torchTensortuplerP   __classcell__r-   s   @r.   r   r   ;   s         ,026   "D( )4/	
  
     "
 
 
  ,#<# |# ,%	#
 
u|U\)	*# # # # # # # #r/   r   c            
           e Zd Zddddededef fdZdeded	ed
z  de	d
z  def
dZ
dej        dej        fdZdej        dej        dej        dej        ez  fdZdeeeej        f                  dee         fdZ xZS )EagleMiniCPMModelr   r   r"   start_layervllm_configr"   r]   c                   t                                                       |j        j        j        }|j        }|j        }|| _        || _        || _        |j        | _        t          j
                            | j        j        dz  | j        j        d          | _        t          |j        |j                  | _        t          |j        |j                  | _        t%          | j        |j                  | _        t)          | j        dd          | _        |                     |||||           t          |j        |j                  | _        t1          ddg| j        j                  | _        d S )	N   F)biasr1   r>   r   rI   rJ   )r&   r'   speculative_configdraft_model_config	hf_configr    r!   r   
vocab_sizerU   r   Linearr(   fcr	   r6   input_norm1input_norm2r   embed_tokensr)   r>   _init_layersnormr   make_empty_intermediate_tensors)r,   r^   r"   r]   r   r    r!   r-   s          r.   r'   zEagleMiniCPMModel.__init__   sV    	/BL"/"/(( +(//K#a')@u " 
 
 #6#56;NOOO"6#56;NOOO2O
 
 #4;qAA&&,kRRRF.F4GHHH	/Vj)4;+B0
 0
,,,r/   r   r    Nr!   c                     t          j        fdt          | j        j                  D                       | _        d S )Nc                 D    g | ]}t           d |z              S )z.eagle_layers.)r   ).0ir    r   r"   r!   r]   s     r.   
<listcomp>z2EagleMiniCPMModel._init_layers.<locals>.<listcomp>   sS         )  >>Q_>>	   r/   )r   
ModuleListranger   num_hidden_layerseagle_layers)r,   r"   r   r    r!   r]   s    `````r.   rk   zEagleMiniCPMModel._init_layers   sl     M        t{<==  

 

r/   	input_idsr#   c                 J    |                      |          }|| j        j        z  S N)rj   r   	scale_emb)r,   rw   	embeddings      r.   embed_input_idsz!EagleMiniCPMModel.embed_input_ids   s%    %%i00	4;000r/   rH   rI   c                    |                      |          }|                     |          }|                     |          }|                     t	          j        ||fd                    }d }| j        D ]} ||||          \  }}||fS )N)dim)r|   rh   ri   rg   rU   catrv   )r,   rw   rH   rI   input_embedsrJ   layers          r.   rP   zEagleMiniCPMModel.forward   s     ++I66''55((77	<*GR P P PQQ& 	 	E&+e' '#M88 m++r/   weightsc                 ,   g d}d t          | j                  D             }t          |                                           }t	                      }|D ]>\  }}d|v rd|v sd|v r|D ]i\  }}	}
|	|vr|                    |	|          }|                    d          r||vr;t          ||           rL||         }|j        } ||||
            n|D ]Q\  }}	}|	|vr|                    |	|          }t          ||           r2||         }|j        } ||||	|            nU|                    d          r||vrt          ||           r||         }t          |dt                    } |||           |                    |           @|S )	N))qkv_projq_projq)r   k_projk)r   v_projv)gate_up_proj	gate_projr   )r   up_projr   c           	      <    g | ]}d D ]}|dv rdndd| d| d|fS ))w1w2w3)r   r   wsw2szexperts..z.weight )rp   	expert_idweight_names      r.   rr   z2EagleMiniCPMModel.load_weights.<locals>.<listcomp>   sm     	!
 	!
 	!
 1	!
 	!
  $|33;9;;{;;;	!
 	!
 	!
 	!
r/   zrotary_emb.inv_freqzrotary_emb.cos_cachedzrotary_emb.sin_cachedz.bias)r   weight_loader)rt   r>   dictnamed_parameterssetreplaceendswithr   r   r)   r   add)r,   r   stacked_params_mappingexpert_params_mappingparams_dictloaded_paramsnameloaded_weight
param_namer   shard_idparamr   r   s                 r.   load_weightszEagleMiniCPMModel.load_weights   sJ   "
 "
 "
	!
 	!
 #4#344	!
 	!
 	!
 4002233"%%%#* -	$ -	$D-$,,&$..2IT2Q2Q 5K %8 %81
Kd**||K<<==)) d+.E.E*466 #D) % 3e]H===:O 8 86JY"$.. <<Z@@D.tT:: ! '-E$)$7M!M}kY    E }}W-- !$k2I2I .tT:: ! '-E$+0E% %M "M%777d####r/   )rQ   rR   rS   r   rT   intr'   r   r   r   rk   rU   rV   r|   r   rP   r   rW   r   r   rX   rY   s   @r.   r[   r[      sc        9;q
 
 
(
25
IL
 
 
 
 
 
>

 !
 "D(	

 )4/
 
 
 
 
(1 1%, 1 1 1 1,<, <, |	,
 
+	+, , , ,*DHU33D-E$F D3s8 D D D D D D D Dr/   r[   c            
       P    e Zd Zg dddgdZdddZdd	d
edef fdZdddd
ededefdZ	de
j        de
j        fdZde
j        de
j        de
j        dee
j        e
j        f         fdZde
j        de
j        dz  fdZdeeee
j        f                  dee         fdZ xZS )EagleMiniCPMForCausalLM)r   r   r   r   r   )r   r   input_embeddingsoutput_embeddings)rj   lm_headr   )r"   r^   r"   c          	         t                                                       |j        j        j        }|j        }|j        }|| _        || _        || _	        || _        || _        |j
                            |j                  }|                     |t          |d          |          | _        t!          |j        |j        |t          |d                    | _        |j        r)| j                            | j        j                  | _        | j	        j        | j	        j        z  | _        t3          |j                  | _        | j        j        | _        d S )Nmodelr^   r"   r]   r   )r!   r"   )r&   r'   rb   rc   rd   r    r!   r"   r^   r   model_configget_num_layersparallel_config_init_modelr   r   r   re   r(   r   tie_word_embeddingstie_weightsrj   dim_model_basescale_widthr
   logits_processorrm   )r,   r^   r"   r   r    r!   target_layer_numr-   s          r.   r'   z EagleMiniCPMForCausalLM.__init__6  sK   /BL"/"/&((&3BB'
 
 %%#00( & 
 

 &%	22	
 
 
 % 	M<33DJ4KLLDL;2T[5OO /0A B BJ6 	,,,r/   r   r\   r]   c                &    t          |||          S )Nr   )r[   )r,   r^   r"   r]   s       r.   r   z#EagleMiniCPMForCausalLM._init_model\  s"     !#F
 
 
 	
r/   rw   r#   c                 6    | j                             |          S ry   )r   r|   )r,   rw   s     r.   r|   z'EagleMiniCPMForCausalLM.embed_input_idsc  s    z)))444r/   rH   rI   c                 f    |                      |||          \  }}|| j        z  }|| j        z  }||fS ry   )r   r   )r,   rw   rH   rI   hidden_states2s        r.   rP   zEagleMiniCPMForCausalLM.forwardf  sC     )-

9i(W(W%~%(88'$*::n,,r/   Nc                 <    |                      | j        |          }|S ry   )r   r   )r,   rI   logitss      r.   compute_logitsz&EagleMiniCPMForCausalLM.compute_logitsq  s      &&t|]CCr/   r   c                       fd}t            j        j        rdgnd           }|                    t	          ||                    S )Nc                 6    | \  }}t          |           ||fS ry   )r   )inputsr   r   r,   s      r.   	transformz7EagleMiniCPMForCausalLM.load_weights.<locals>.transformy  s(    "(D- t,,,&&r/   zlm_head.)skip_prefixes)r   r   r   r   map)r,   r   r   loaders   `   r.   r   z$EagleMiniCPMForCausalLM.load_weightsx  si    	' 	' 	' 	' 	'
 #+/;+JTJ<<PT
 
 
 ""3y'#:#:;;;r/   )rQ   rR   rS   packed_modules_mappingembedding_modulesr   rT   r'   r   r   rU   rV   r|   rW   rP   r   r   r   r   rX   rY   s   @r.   r   r   #  s       
 
 
 

 
 +& 
 BD $
 $
 $
z $
3 $
 $
 $
 $
 $
 $
N 9;q
 
 
(
25
IL
 
 
 
5 5%, 5 5 5 5	-<	- <	- |		-
 
u|U\)	*	- 	- 	- 	-| 
	   
<HU33D-E$F 
<3s8 
< 
< 
< 
< 
< 
< 
< 
<r/   r   )/__doc__rM   collections.abcr   rU   r   transformersr   vllm.compilation.decoratorsr   vllm.configr   r   $vllm.model_executor.layers.layernormr	   +vllm.model_executor.layers.logits_processorr
   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   r   r   minicpmr   r8   r   rD   r   rF   utilsr   r   r   r   r   Moduler   r[   r   r   r/   r.   <module>r      s\  2 M L  $ $ $ $ $ $        ) ) ) ) ) ) = = = = = = / / / / / / / / 8 8 8 8 8 8 G G G G G G F F F F F F        P O O O O O - - - - - - ? ? ? ? ? ? ? ? ? ? > > > > > > 2 2 2 2 2 2 2 2 2 2 2 2             P# P# P# P# P#ry P# P# P#f Q Q Q Q Q	 Q Q Qh_< _< _< _< _<biz= _< _< _< _< _<r/   