
    .`i\                     h   d Z ddlZddlmZ ddlmZ ddlmZ ddlZddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZmZ ddlmZmZmZmZ ddlmZmZ ddlmZmZ ddlmZ ddl m!Z!m"Z"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z,m-Z- ddl.m/Z/ ddl0m1Z1 ddl2m3Z3 ddl4m5Z5 ddl6m7Z7m8Z8m9Z9 ddl:m;Z;m<Z<m=Z=m>Z>m?Z?  G d de	j@                  ZA G d de	j@                  ZB G d d e	j@                  ZC G d! d"e	j@                  ZDe G d# d$e	j@                              ZE G d% d&e	j@        e8e9e7          ZFdS )'zAInference-only MiniCPM model compatible with HuggingFace weights.    N)Iterable)islice)Any)nn)PretrainedConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_groupget_tensor_model_parallel_rank$get_tensor_model_parallel_world_size tensor_model_parallel_all_reduce)FatreluAndMul
SiluAndMul)fused_experts
fused_topk)RMSNorm)MergedColumnParallelLinearQKVParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)set_weight_attrs)current_platform)IntermediateTensors   )SupportsEagle3SupportsLoRA
SupportsPP)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                        e Zd ZdZ	 	 	 ddededededej        dz  d	edz  d
ef fdZde	j
        dej        dedefdZdej        dej        fdZ xZS )
MiniCPMMoEzA tensor-parallel MoE implementation that shards each expert
    across all ranks.

    Each expert's weights are sharded across all ranks and a fused MoE
    kernel is used for the forward pass, and finally we reduce the outputs
    across ranks.
    N num_expertstop_khidden_sizeintermediate_sizeparams_dtypetp_sizeprefixc           	         t                                                       |pt                      | _        || _        || _        || _        || j        z  | _        |t          j	                    }|| _
        t          | j        | j        d| j
        d | d          | _        t          j        t          j        | j        d| j        z  | j        t           j        | j
                            | _        t          j        t          j        | j        | j        | j        t           j        | j
                            | _        t)          | j        d| j        i           t)          | j        d| j        i           d S )NFz.gate)biasr2   quant_configr4      )devicedtypeweight_loader)super__init__r   r3   num_total_expertsr/   r0   r1   torchget_default_dtyper2   r   gater   	Parameteremptyr    device_typewsw2sr   r;   )	selfr.   r/   r0   r1   r2   r3   r4   	__class__s	           v/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/minicpm.pyr=   zMiniCPMMoE.__init__U   s    	H"F"H"H!,
&!2dl!B 244L($"*###
 
 
	 ,K&D** '3'  
 
 <K& &'3'  
 
 	G!3	
 	
 	
 	H!3	
 	
 	
 	
 	
    paramloaded_weightweight_name	expert_idc                 |   t                      }|j        }| j        }t          ||z  |dz   |z            }|                    d          r||d d f         ||d|d d f<   |                    d          r||d d f         |||d|z  d d f<   |                    d          r|d d |f         ||d d d d f<   d S d S )Nr"   z	w1.weightr   z	w3.weightr8   z	w2.weight)r   datar1   sliceendswith)	rG   rK   rL   rM   rN   tp_rank
param_data
shard_sizeshards	            rI   r;   zMiniCPMMoE.weight_loader   s    122Z
+
g
*Wq[J,FGG,, 	M5B5!!!85LJy!J,12,, 	DQqqqEJy*q:~"=qqq@A ,, 	B*75*AJy!!!QQQ'''	B 	BrJ   hidden_statesreturnc                 R   |j         \  }}|                    d| j                  }|                     |          \  }}t	          ||| j        d          \  }}}t          || j        | j        ||d          }| j	        dk    rt          |          }|                    ||          S )NT)renormalize)inplacer"   )shapeviewr0   rA   r   r/   r   rE   rF   r3   r   )	rG   rW   
num_tokensr0   router_logits_topk_weightstopk_idsfinal_hidden_statess	            rI   forwardzMiniCPMMoE.forward   s    "/"5
K%**2t/?@@99]33q$.=$*$%
 %
 %
!h ,47DHlHd
 
 
 <!"BCV"W"W"''
K@@@rJ   NNr-   )__name__
__module____qualname____doc__intr?   r:   strr=   r   rB   Tensorr;   re   __classcell__rH   s   @rI   r,   r,   L   s         ,0"<
 <
<
 <
 	<

 <
 kD(<
 t<
 <
 <
 <
 <
 <
 <
|B|B |B 	B
 B B B B(AU\ Ael A A A A A A A ArJ   r,   c                   N     e Zd Z	 	 ddedededededz  ded	df fd
Zd Z xZ	S )
MiniCPMMLPNr-   r0   r1   
hidden_acthidden_act_paramr7   r4   rX   c                 R   t                                                       t          ||gdz  d|| d          | _        t	          ||d|| d          | _        |dk    rt                      | _        d S |dk    rt          |          | _        d S t          d	| d
          )Nr8   Fz.gate_up_projr6   r7   r4   z
.down_projsilufatrelu)	thresholdzUnsupported activation: z.. Only silu and fatrelu are supported for now.)
r<   r=   r   gate_up_projr   	down_projr   act_fnr   
ValueError)rG   r0   r1   rr   rs   r7   r4   rH   s          rI   r=   zMiniCPMMLP.__init__   s     	6!#%+++
 
 
 +%(((
 
 
 $,,DKKK9$$'2BCCCDKKK?: ? ? ?  rJ   c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)ry   r{   rz   )rG   xgate_upra   s       rI   re   zMiniCPMMLP.forward   sD    &&q))
KK  ~~a  1rJ   )Nr-   )
rg   rh   ri   rk   rl   floatr   r=   re   rn   ro   s   @rI   rq   rq      s         37       	 
    )4/    
           D      rJ   rq   c                        e Zd Z	 	 	 	 	 ddedededeeef         dz  ded	edz  d
edz  deddf fdZ	de
j        de
j        de
j        fdZ xZS )MiniCPMAttentionN    r-   r0   	num_headsnum_kv_headsrope_parametersmax_position_embeddingscache_configr7   r4   rX   c	           
      @   t                                                       || _        t                      }	|| _        | j        |	z  dk    sJ | j        |	z  | _        || _        | j        |	k    r| j        |	z  dk    sJ n|	| j        z  dk    sJ t          d| j        |	z            | _        || j        z  | _	        | j        | j	        z  | _
        | j        | j	        z  | _        | j	        dz  | _        || _        t          || j	        | j        | j        d|| d          | _        t!          | j        | j	        z  |d|| d          | _        t%          | j	        ||          | _        t)          | j        | j	        | j        | j        ||| d	
          | _        d S )Nr   r"   g      Fz	.qkv_projru   z.o_proj)max_positionr   z.attn)r   r   r7   r4   )r<   r=   r0   r   total_num_headsr   total_num_kv_headsmaxr   head_dimq_sizekv_sizescalingr   r   qkv_projr   o_projr   
rotary_embr   attn)rG   r0   r   r   r   r   r   r7   r4   r3   rH   s             rI   r=   zMiniCPMAttention.__init__   s    	&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF#t';;nt}4(4=8}d*'>$)M #%'''
 
 
 ( 4=0%%%%
 
 
 #M0+
 
 
 NML*%%###
 
 
			rJ   	positionsrW   c                 "   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )NrZ   )dim)r   splitr   r   r   r   r   )
rG   r   rW   qkvra   qkvattn_outputoutputs
             rI   re   zMiniCPMAttention.forward(  s    
 }--Q))T[$,E2)NN1ay!Q//1ii1a((KK,,	rJ   )Nr   NNr-   )rg   rh   ri   rk   dictrl   r   r
   r   r=   r?   rm   re   rn   ro   s   @rI   r   r      s	        26'++/26@
 @
@
 @
 	@

 c3h$.@
 "%@
 "D(@
 )4/@
 @
 
@
 @
 @
 @
 @
 @
D
<
 |
 
	
 
 
 
 
 
 
 
rJ   r   c                        e Zd Z	 	 	 ddededz  dedz  deddf
 fdZd	 Zd
 Z	de
j        de
j        de
j        dz  dee
j        e
j        f         fdZ xZS )MiniCPMDecoderLayerNr-   configr   r7   r4   rX   c                    t                                                       || _        || _        || _        |j        | _        t          |dd          | _        || _        | 	                                 | 
                                 d S )Nr   r   )r<   r=   r   r   r7   r0   getattrr   r4   _init_attn_block_init_ffn_block)rG   r   r   r7   r4   rH   s        rI   r=   zMiniCPMDecoderLayer.__init__6  s     	((!-'.v7PRV'W'W$rJ   c                    t          | j        j        | j        j                  | _        t          | j        | j        j        | j        j        | j        j        | j	        | j
        | j        | j         d          | _        d S )Nepsz
.self_attn)r0   r   r   r   r   r   r7   r4   )r   r   r0   rms_norm_epsinput_layernormr   num_attention_headsnum_key_value_headsr   r   r   r7   r4   	self_attnrG   s    rI   r   z$MiniCPMDecoderLayer._init_attn_blockG  s    &K#)A 
  
  
 *(k58 K7$($@**k---	
 	
 	
rJ   c           
         t          | j        j        | j        j                  | _        t          | j        dd          | _        | j        dk    rVt          | j        | j        j        | j        j	        t          | j        dd          | j
        | j         d          | _        d S t          | j        j        | j        j        | j        j        | j        j        | j         d          | _        d S )	Nr   r.   r   rs   g        z.mlp)r0   r1   rr   rs   r7   r4   )r.   r/   r0   r1   r4   )r   r   r0   r   post_attention_layernormr   r.   rq   r1   rr   r7   r4   mlpr,   num_experts_per_tokr   s    rI   r   z#MiniCPMDecoderLayer._init_ffn_blockV  s    (/K#)A)
 )
 )
% #4;qAAq  ! ,"&+"?;1!(6H#!N!N!.++++  DHHH " K3k5 K3"&+"?++++  DHHHrJ   r   rW   residualc                    |}|                      |          }|                     ||          }||| j        j        t	          j        | j        j                  z  z  z   }|}|                     |          }|                     |          }||| j        j        t	          j        | j        j                  z  z  z   }|d fS )N)r   rW   )	r   r   r   scale_depthmathsqrtnum_hidden_layersr   r   )rG   r   rW   r   s       rI   re   zMiniCPMDecoderLayer.forwardm  s     !,,];;' ' 
 
 !=K#di0M&N&NN$
 

 !55mDD// =K#di0M&N&NN$
 
 d""rJ   rf   )rg   rh   ri   r   r
   r   rl   r=   r   r   r?   rm   tuplere   rn   ro   s   @rI   r   r   5  s         ,026   "D( )4/	
  
     "
 
 
  .#<# |# ,%	#
 
u|U\)	*# # # # # # # #rJ   r   c                   J    e Zd Zdddedef fdZdedededz  d	edz  fd
Z	de
j        de
j        fdZ	 	 dde
j        de
j        dedz  de
j        dz  de
j        ez  ee
j        ee
j                 f         z  f
dZdeeee
j        f                  dee         fdZ xZS )MiniCPMModelr-   r4   vllm_configr4   c                $   t                                                       |j        j        }|j        }|j        }|| _        || _        || _        |j        | _        t          | j        |j	                  | _
        t          | j        dd          | _        |                     ||||           t          |j	        |j                  | _        t#          t$          df                     | _        t)          ddg| j        j	                  | _        d S )Nr.   r   r   .rW   r   )r<   r=   model_config	hf_configr   r7   r   
vocab_sizer   r0   embed_tokensr   r.   _init_layersr   r   normr   rk   aux_hidden_state_layersr(   make_empty_intermediate_tensors)rG   r   r4   r   r   r7   rH   s         rI   r=   zMiniCPMModel.__init__  s    )3"/"/(( +2O
 
 #4;qAA&&,EEEF.F4GHHH	',S#X'8'8$/Vj)4;+B0
 0
,,,rJ   r   r   Nr7   c                 p    t          j        fd| d          \  | _        | _        | _        d S )Nc                 *    t          |           S )Nr   )r   )r4   r   r   r7   s    rI   <lambda>z+MiniCPMModel._init_layers.<locals>.<lambda>  s     .l6   rJ   z.layersr   )r)   r   start_layer	end_layerlayers)rG   r4   r   r   r7   s     ```rI   r   zMiniCPMModel._init_layers  s`     9D$      %%%9
 9
 9
5$.$+++rJ   	input_idsrX   c                 J    |                      |          }|| j        j        z  S r~   )r   r   	scale_emb)rG   r   	embeddings      rI   embed_input_idszMiniCPMModel.embed_input_ids  s%    %%i00	4;000rJ   r   intermediate_tensorsinputs_embedsc                    t                      j        r||}n|                     |          }d }n|d         }|d         }g }t          t	          | j        | j        | j                            D ]:\  }}	|| j        v r|	                    |||z   n|            |	|||          \  }};t                      j
        st          ||d          S |                     |          }t          |          dk    r||fS |S )NrW   r   )rW   r   r   )r   is_first_rankr   	enumerater   r   r   r   r   appendis_last_rankr!   r   len)
rG   r   r   r   r   rW   r   aux_hidden_statesidxlayers
             rI   re   zMiniCPMModel.forward  sK    >>' 	8( - $ 4 4Y ? ?HH0AM+J7H#4; 0$.AA
 
 	 	JC d222!((080DMH,,-   ',e' '#M88 ~~* 	&"/XFF   		-00 !!A%% "333rJ   weightsc                 ,   g d}d t          | j                  D             }t          |                                           }t	                      }|D ]>\  }}d|v rd|v sd|v r|D ]i\  }}	}
|	|vr|                    |	|          }|                    d          r||vr;t          ||           rL||         }|j        } ||||
            n|D ]Q\  }}	}|	|vr|                    |	|          }t          ||           r2||         }|j        } ||||	|            nU|                    d          r||vrt          ||           r||         }t          |dt                    } |||           |                    |           @|S )	N))r   q_projr   )r   k_projr   )r   v_projr   )ry   	gate_projr   )ry   up_projr"   c           	      <    g | ]}d D ]}|dv rdndd| d| d|fS ))w1w2w3)r   r   rE   rF   zexperts..z.weight ).0rN   rM   s      rI   
<listcomp>z-MiniCPMModel.load_weights.<locals>.<listcomp>  sm     	!
 	!
 	!
 1	!
 	!
  $|33;9;;{;;;	!
 	!
 	!
 	!
rJ   zrotary_emb.inv_freqzrotary_emb.cos_cachedzrotary_emb.sin_cachedz.bias)rN   r;   )ranger.   r   named_parameterssetreplacerR   r'   r;   r   r   add)rG   r   stacked_params_mappingexpert_params_mappingparams_dictloaded_paramsnamerL   
param_namerM   shard_idrK   r;   rN   s                 rI   load_weightszMiniCPMModel.load_weights  sJ   "
 "
 "
	!
 	!
 #4#344	!
 	!
 	!
 4002233"%%%#* ,	$ ,	$D-$,,&$..2IT2Q2Q 5K $8 $81
Kd**||K<<==)) d+.E.E*466 #D) % 3e]H===:O 8 86JY"$.. <<Z@@D.tT:: ! '-E$)$7M!M}kY    E }}W-- !$k2I2I .tT:: ! '-E$+0E% %M "M%777d####rJ   NN)rg   rh   ri   r   rl   r=   r   r
   r   r   r?   rm   r   r!   r   listre   r   r   r   rn   ro   s   @rI   r   r     s       AC 
 
 
z 
3 
 
 
 
 
 
6

 !
 "D(	

 )4/
 
 
 
1 1%, 1 1 1 1 <@-1( (<( <( 2D8	(
 |d*( 
+	+eEL$u|BT4T.U	U( ( ( (TCHU33D-E$F C3s8 C C C C C C C CrJ   r   c                       e Zd Zg dddgdZdddZdd	d
edef fdZdd	d
edefdZde	j
        de	j
        fdZdeedf         ddfdZdeedf         fdZ	 	 dde	j
        de	j
        dedz  de	j
        dz  de	j
        ez  ee	j
        ee	j
                 f         z  f
dZde	j
        de	j
        dz  fdZdeeee	j
        f                  dee         fdZ xZS )MiniCPMForCausalLM)r   r   r   r   r   )r   ry   input_embeddingsoutput_embeddings)r   lm_headr-   r   r   r4   c          	         t                                                       |j        j        }|j        }|j        }|j        }|| _        || _        || _	        || _        || _        | 
                    |t          |d                    | _        t          |j        |j        |t          |d                    | _        |j        r)| j                            | j        j                  | _        | j	        j        | j	        j        z  | _        t-          |j                  | _        | j        j        | _        |j        r$t5          |dd          dk    rt7          d          d S d S )Nmodelr   r4   r  )r7   r4   r.   r   z&EPLB is not supported for MiniCPM yet.)r<   r=   r   r   r   r7   parallel_configr4   r   r   _init_modelr*   r  r   r   r0   r  tie_word_embeddingstie_weightsr   dim_model_basescale_widthr   logits_processorr   enable_eplbr   NotImplementedError)rG   r   r4   r   r   r7   r  rH   s          rI   r=   zMiniCPMForCausalLM.__init__<  sj   )3"/"/%5&((%%#L,I,I & 
 

 &%	22	
 
 
 % 	M<33DJ4KLLDL;2T[5OO /0A B BJ6 	, & 	P76=!+L+Lq+P+P%&NOOO	P 	P+P+PrJ   c                $    t          ||          S )Nr  )r   )rG   r   r4   s      rI   r  zMiniCPMForCausalLM._init_model`  s    FCCCCrJ   r   rX   c                 6    | j                             |          S r~   )r  r   )rG   r   s     rI   r   z"MiniCPMForCausalLM.embed_input_idsc  s    z)))444rJ   r   .Nc                     || j         _        d S r~   )r  r   )rG   r   s     rI   set_aux_hidden_state_layersz.MiniCPMForCausalLM.set_aux_hidden_state_layersf  s    -3
***rJ   c                 J    t          | j        j                  }d|dz  |dz
  fS )Nr8      )r   r  r   )rG   
num_layerss     rI   "get_eagle3_aux_hidden_state_layersz5MiniCPMForCausalLM.get_eagle3_aux_hidden_state_layersi  s)    *++
:?JN33rJ   r   r   r   c                     |                      ||||          }t          |t                    r&t          |          dk    r|\  }}|| j        z  }||fS t          |t
                    r|S || j        z  }|S )Nr8   )r  
isinstancer   r   r  r!   )rG   r   r   r   r   model_outputrW   r   s           rI   re   zMiniCPMForCausalLM.forwardm  s     zzy"6
 
 lE** 	%s</@/@A/E/E/;,M,)D,<<M "333 ,(;<< %## ,t/? ?$$rJ   rW   c                 <    |                      | j        |          }|S r~   )r  r  )rG   rW   logitss      rI   compute_logitsz!MiniCPMForCausalLM.compute_logits  s      &&t|]CCrJ   r   c                 l    t          | | j        j        rdgnd           }|                    |          S )Nzlm_head.)skip_prefixes)r&   r   r  r   )rG   r   loaders      rI   r   zMiniCPMForCausalLM.load_weights  sC    "+/;+JTJ<<PT
 
 
 ""7+++rJ   r   )rg   rh   ri   packed_modules_mappingembedding_modulesr   rl   r=   r  r?   rm   r   r   rk   r  r  r!   r   re   r  r   r   r   rn   ro   s   @rI   r   r   )  s;       
 
 
 

 
 +& 
 BD "P "P "Pz "P3 "P "P "P "P "P "PH EG D D D* Dc D D D D5 5%, 5 5 5 54%S/ 4d 4 4 4 44E#s(O 4 4 4 4 <@-1% %<% <% 2D8	%
 |d*% 
+	+eEL$u|BT4T.U	U% % % %0| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,rJ   r   )Grj   r   collections.abcr   	itertoolsr   typingr   r?   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr	   vllm.configr
   r   vllm.distributedr   r   r   r   %vllm.model_executor.layers.activationr   r   $vllm.model_executor.layers.fused_moer   r   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.model_executor.utilsr   vllm.platformsr    vllm.sequencer!   
interfacesr#   r$   r%   utilsr&   r'   r(   r)   r*   Moduler,   rq   r   r   r   r   r   rJ   rI   <module>r9     s  2 H G  $ $ $ $ $ $                    ) ) ) ) ) ) * * * * * * = = = = = = / / / / / / / /            L K K K K K K K J J J J J J J J 8 8 8 8 8 8            H G G G G G F F F F F F @ @ @ @ @ @        P O O O O O 6 6 6 6 6 6 + + + + + + - - - - - - @ @ @ @ @ @ @ @ @ @             lA lA lA lA lA lA lA lA^' ' ' ' ' ' ' 'TM M M M Mry M M M`Q# Q# Q# Q# Q#") Q# Q# Q#h \ \ \ \ \29 \ \ \~h, h, h, h, h,L*n h, h, h, h, h,rJ   