
    .`im                        d Z ddlZddlmZmZ ddlmZ ddlmZ ddlZddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZ ddl m!Z!m"Z"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z,m-Z- ddl.m/Z/m0Z0 ddl1m2Z2 ddl3m4Z4 ddl5m6Z6m7Z7m8Z8 ddl9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@  eeA          ZB G d de	jC                  ZD G d de	jC                  ZE G d d e	jC                  ZF G d! d"e	jC                  ZGe G d# d$e	jC                              ZH G d% d&e	jC        e8e7e6          ZIdS )'zBInference-only ErineMoE model compatible with HuggingFace weights.    N)CallableIterable)islice)Any)nn)PretrainedConfig)	Attention)support_torch_compile)CacheConfig
VllmConfigget_current_vllm_config)get_ep_groupget_pp_group$get_tensor_model_parallel_world_size)init_logger)
SiluAndMul)SharedFusedMoE)RMSNorm)MergedColumnParallelLinearQKVParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors)set_default_rope_theta   )MixtureOfExpertsSupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayerextract_layer_indexis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   V     e Zd Z	 	 	 	 ddedededed	edz  d
ededdf fdZd Z xZ	S )Ernie4_5_MoeMLPFNT hidden_sizeintermediate_size
hidden_actuse_biasquant_configreduce_resultsprefixreturnc           	         t                                                       t          ||gdz  ||| d          | _        t	          |||||| d          | _        |dk    rt          d| d          t                      | _        d S )	N   z.gate_up_projbiasr4   r6   z
.down_proj)r;   r4   r5   r6   siluzUnsupported activation: z!. Only silu is supported for now.)	super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fn)	selfr0   r1   r2   r3   r4   r5   r6   	__class__s	           z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/ernie45_moe.pyr>   zErnie4_5_MoeMLP.__init__R   s     	6!#%+++
 
 
 +%)(((
 
 
 X:XXX   !ll    c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r?   rB   r@   )rC   xgate_up_s       rE   forwardzErnie4_5_MoeMLP.forwardr   sD    &&q))
KK  ~~a  1rF   )FNTr/   )
__name__
__module____qualname__intstrboolr   r>   rL   __classcell__rD   s   @rE   r.   r.   Q   s         26## ## # 	#
 # )4/# # # 
# # # # # #@      rF   r.   c            	       b     e Zd Z	 	 	 ddededz  dedef fdZd	ej	        d
ej	        fdZ
 xZS )Ernie4_5_MoeMoENr/   Fconfigr4   r6   enable_eplbc                 \   t                                                       t          |          }|| _        t	                      | _        t          |dd           | _        t                      j	        | _
        t                      j        | _        | j
                                        | _        |j        | _        | j        | _        t%                      }|j        j        }|| _        |j        | _        | j        | _        | j        | j        z   | _        | j        | j        z  | _        | j        | j        z  | _        | j        | j        z   | _        t          |dd          dk    | _        | j        |j        k    r t=          d| j         d|j         d          t?          |j         |j        dtB          j"        d | d          | _#        tI          j%        tC          j&        |j        tB          j"        	                    | j#        _'        | j        r7|j(        |j        z  }tS          |j         ||j*        || d
d          | _+        nd | _+        tY          | j+        |j        |j-        |j         |j(        dd|| d| j#        j'        | j        | j        tB          j"                  | _.        d S )Nmoe_num_shared_expertsr   zTensor parallel size z' is greater than the number of experts .Fz.gate)r;   params_dtyper4   r6   dtypez.shared_experts)r0   r1   r2   r4   r6   r5   Tz.experts)shared_expertsnum_expertstop_kr0   r1   r5   renormalizer4   r6   e_score_correction_biasrX   num_redundant_expertsrouter_logits_dtype)/r=   r>   r(   	layer_idxr   tp_sizegetattrrZ   r   device_groupep_grouprank_in_groupep_ranksizeep_sizemoe_num_expertsn_routed_expertsn_shared_expertsr   parallel_configeplb_configrX   rd   n_redundant_expertsn_logical_expertsn_physical_expertsn_local_physical_expertsphysical_expert_startphysical_expert_endhas_shared_expertsrA   r   r0   torchfloat32gater   	Parameteremptyrc   moe_intermediate_sizer.   r2   r_   r   moe_kexperts)
rC   rW   r4   r6   rX   rf   vllm_configrs   r1   rD   s
            rE   r>   zErnie4_5_MoeMoE.__init__z   s    	'//	";==&-f6NPT&U&U#$3#~~3}))++%+%;%)%@ .//!1=&#.#D !%!6"&"84;S"S(,(?4<(O%%)\D4Q%Q"&)FF 	  #*&2JA"N"NQR"R<&000C C C)/)?C C C  
 %"###
 
 
	 -/LK.emDDD-
 -
	) " 	',v/LL  #2"."3!,) 111$# # #D #'D%..,*$: %&&&$(I$E("&": %
 
 
rF   hidden_statesr7   c                    |j         }|j         d         }|                    d|          }|                     |                    t          j                            \  }}|                     ||          }| j        r|d         |d         z   }n|d         }| j        dk    r| j        	                    |          }|                    |          S )Nr]   )r   router_logitsr   r"   )
shapeviewr}   tor{   r|   r   rz   rg   &maybe_all_reduce_tensor_model_parallel)rC   r   
orig_shape
hidden_dimr   rK   final_hidden_statess          rE   rL   zErnie4_5_MoeMoE.forward   s    "(
"(,
%**2z::99]%5%5EM%5%J%JKKq"ll'} + 
 
 " 	9"5a"8;Nq;Q"Q"5a"8<!"&,"U"U## # #''
333rF   )Nr/   F)rM   rN   rO   r   r   rQ   rR   r>   r{   TensorrL   rS   rT   s   @rE   rV   rV   y   s         37!S
 S
 S
 )4/S
 	S

 S
 S
 S
 S
 S
 S
j4U\ 4el 4 4 4 4 4 4 4 4rF   rV   c                        e Zd Z	 	 	 	 	 	 	 ddededed	eeef         d
edz  dededede	dz  de
dz  deddf fdZdej        dej        dej        fdZ xZS )Ernie4_5_MoeAttentionN   h㈵>Fr/   r0   	num_headsnum_kv_headsrope_parametershead_dimmax_position_embeddingsrms_norm_epsqkv_biascache_configr4   r6   r7   c           
         t                                                       t          |          dk    rt          |          nd}|| _        || _        t                      }|| _        | j        |z  dk    sJ | j        |z  | _        || _	        | j	        |k    r| j	        |z  dk    sJ n|| j	        z  dk    sJ t          d| j	        |z            | _        |p	|| j        z  | _        | j        | j        z  | _        | j        | j        z  | _        | j        dz  | _        || _        t#          || j        | j        | j	        ||
| d          | _        t'          | j        | j        z  |d|
| d          | _        t+          | j        ||d          | _        t/          | j        | j        | j        | j        |	|
| d	
          | _        d S )Nr   r"   g      z	.qkv_projr:   Fz.o_proj)max_positionr   is_neox_stylez.attn)r   r   r4   r6   )r=   r>   lenr(   rf   r0   r   total_num_headsr   total_num_kv_headsmaxr   r   q_sizekv_sizescalingr   r   qkv_projr   o_projr   
rotary_embr	   attn)rC   r0   r   r   r   r   r   r   r   r   r4   r6   rf   rg   rD   s                 rE   r>   zErnie4_5_MoeAttention.__init__   s    	36v;;??'///	"&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF I[D4H%Hnt}4(4=8}d*'>$)M #%'''
 
 
 ( 4=0%%%%
 
 
 #M0+	
 
 
 NML*%%###
 
 
			rF   	positionsr   c                 "   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )Nr   )dim)r   splitr   r   r   r   r   )
rC   r   r   qkvrK   qkvattn_outputoutputs
             rE   rL   zErnie4_5_MoeAttention.forward2  s    
 }--Q))T[$,E2)NN1ay!Q//1 ii1a((KK,,	rF   )Nr   r   FNNr/   )rM   rN   rO   rP   dictrQ   r   floatrR   r   r   r>   r{   r   rL   rS   rT   s   @rE   r   r      s/         $'-#+/26H
 H
H
 H
 	H

 c3hH
 *H
 "%H
 H
 H
 "D(H
 )4/H
 H
 
H
 H
 H
 H
 H
 H
T< | 
	       rF   r   c                        e Zd Z	 	 	 	 ddededz  dedz  deded	df fd
Zde	j
        de	j
        de	j
        dz  d	e	j
        fdZ xZS )Ernie4_5_MoeDecoderLayerNr/   FrW   r   r4   r6   rX   r7   c                    t                                                       |j        | _        t          |d           t	          |dd          }t          | j        |j        |j        t	          |dd           |j        ||j	        t	          |dd          ||| d	          | _
        t          |          }|| _        t	          |d
d          }t	          |dd          }	t	          |d|j        dz
            }
t	          |dd          }t	          |d|dk              }|r4|dz   |z  dk    r(||	k    r"||
k    rt          ||| d|          | _        n;t!          |j        |j        |j        t	          |dd          || d          | _        t'          |j        |j	                  | _        t'          |j        |j	                  | _        d S )Ni  )default_thetar   r   r   r3   Fz
.self_attn)r0   r   r   r   r   r   r   r   r   r4   r6   ro   r   moe_layer_start_indexmoe_layer_end_indexr"   moe_layer_intervaluse_moez.mlp)rW   r4   r6   rX   )r0   r1   r2   r3   r4   r6   eps)r=   r>   r0   r!   rh   r   num_attention_headsnum_key_value_headsr   r   	self_attnr(   rf   num_hidden_layersrV   mlpr.   r1   r2   r   input_layernormpost_attention_layernorm)rC   rW   r   r4   r6   rX   r   rf   ro   r   r   r   r   rD   s                rE   r>   z!Ernie4_5_MoeDecoderLayer.__init__D  s&    	!-vV<<<<")&2KV"T"T.(03VZ66"2$;,VZ77%%(((
 
 
 (//	" "&*;Q?? '0G K K%)6+Ca+G
 
 %V-A1EE&)_q-@AA 	a-#55::222000&) '	  DHH '"."(":!, U;;)   DH  'v'9v?RSSS(/F$7)
 )
 )
%%%rF   r   r   residualc                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS )N)r   r   )r   r   r   r   )rC   r   r   r   s       rE   rL   z Ernie4_5_MoeDecoderLayer.forward  s     $H 00??MM&*&:&:=(&S&S#M8' ' 
 
 #'"?"?x"X"Xx//h&&rF   )NNr/   F)rM   rN   rO   r   r   r   rQ   rR   r>   r{   r   rL   rS   rT   s   @rE   r   r   C  s         ,026!?
 ?
 ?
 "D(?
 )4/	?

 ?
 ?
 
?
 ?
 ?
 ?
 ?
 ?
B'<' |' ,%	'
 
' ' ' ' ' ' ' 'rF   r   c                       e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeeeef                  fdZdeeeej        f                  dee         fdZ xZS )Ernie4_5_MoeModelr/   r6   r   r6   c                   t                                                       |j        j        |j        |j        j        | _        j        | _        | _	        |j
        }|j        }|j        |j        | _        t                      j        r&t!          j        j        | d          | _        nt'                      | _        t)          j        fd| d          \  | _        | _        | _        t                      j        r!t5          j        j                  | _        nt'                      | _        t;          ddgj                  | _        d S )	Nz.embed_tokensr4   r6   c                 ,    t          |           S )N)rW   r   r4   r6   rX   )r   )r6   r   rW   rX   r4   s    rE   <lambda>z,Ernie4_5_MoeModel.__init__.<locals>.<lambda>  s&    3))'   rF   z.layersr   r   r   r   )r=   r>   model_config	hf_configr   r4   pad_token_idpadding_idx
vocab_sizerW   rr   rs   rX   rd   r   is_first_rankr   r0   embed_tokensr'   r+   r   start_layer	end_layerlayersis_last_rankr   r   normr*   make_empty_intermediate_tensors)
rC   r   r6   rr   rs   r   rW   rX   r4   rD   s
        @@@@rE   r>   zErnie4_5_MoeModel.__init__  s   )3"/"/!. +%5%1%1%0%F">>' 	1 6!") ///	! ! !D !/ 0 0D8C$       %%%
9
 
9
 
9
5$.$+ >>& 	) 28KLLLDII&((DI/Vj)6+=0
 0
,,,rF   	input_idsr7   c                 ,    |                      |          S rH   )r   rC   r   s     rE   embed_input_idsz!Ernie4_5_MoeModel.embed_input_ids  s      +++rF   Nr   intermediate_tensorsinputs_embedsc                 p   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S | 	                    ||          \  }}|S )Nr   r   )r   r   )
r   r   r   r   r   r   r   r   r    r   )	rC   r   r   r   r   r   r   layerrK   s	            rE   rL   zErnie4_5_MoeModel.forward  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDK)94>JJ 	P 	PE&+eI}h&O&O#M88~~* 	&"/XFF    99]H==qrF   c                 T    t          j        | ddd| j        j        | j                  S )N	gate_projr@   up_proj)ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namer`   rd   )r   make_expert_params_mappingrW   ro   rd   rC   s    rE   get_expert_mappingz$Ernie4_5_MoeModel.get_expert_mapping  s8     8 + +'3"&"<
 
 
 	
rF   weightsc           
         g d}t          |                                           }t                      }|                                 }|D ]6\  }}| j        j        r|                    d          r(d|v r-d|v r+|                    dd          }|                    d          }|D ]\  }}	}
|	|vrd|v r||vr|                    |	|          }|                    d	          s|                    d
          r||vrYt          ||           rj||         }|j
        } ||||
            n:d}|D ]}|\  }}	}}
|	|vrd}|                    |	|          }t          ||           r7|                    d	          s|                    d
          r||vrf||         }t          j        t          dt          f         |j
                  } |||||
|d          }|r|} n|r|                    d	          s|                    d
          r||vrt          ||           rt          ||          }|||         }t!          |dt"                    } |||           |                    |           8|S )N))r   q_projr   )r   k_projr   )r   v_projr   )r?   r   r   )r?   r   r"   zlm_head.weightmtprc   moe_staticsr}   r   zmlp.experts.z.bias_biasFT.)shard_id	expert_idreturn_successweight_loader)r   named_parameterssetr   rW   tie_word_embeddingsendswithreplacesqueezer)   r   typingcastr   rR   r   rh   r   add)rC   r   stacked_params_mappingparams_dictloaded_paramsexpert_params_mappingnameloaded_weight
param_nameweight_namer   paramr   is_expert_weightmappingr   name_mappedsuccesss                     rE   load_weightszErnie4_5_MoeModel.load_weights  sa   "
 "
 "
 4002233"%%% $ 7 7 9 9#* c	$ c	$D-{. 4==AQ3R3R }}(D00||M6:: - 5 5a 8 85K W8 W81
Kd**"d**K0G0G||K<< MM'**.2mmG.D.D+--*466 #D) % 3e]H===#( 4 @8 @8GCJ@JY"$..  (,$ #',,{J"G"GK.{DAA !  $,,W55!9D9M9Mg9V9V!%[88 '4E %+K d+U-@% %M ,m%#!)"+'+  G  * ( ! ! g..!26--2H2H!k11 .tT:: ! 4T;GGD| '-E$+0E% %M "M%777d####rF   NN)rM   rN   rO   r   rQ   r>   r{   r   r   r    rL   listtuplerP   r   r   r   r  rS   rT   s   @rE   r   r     sX       AC -
 -
 -
z -
3 -
 -
 -
 -
 -
 -
^, ,%, , , , , <@-1 < < 2D8	
 |d* 
+	+   <

DsCc/A)B$C 

 

 

 

qHU33D-E$F q3s8 q q q q q q q qrF   r   c                   p    e Zd Zg dddgdZdZddded	ef fd
ZdededdfdZ	de
j        de
j        fdZ	 	 dde
j        de
j        dedz  de
j        dz  de
j        ez  f
dZde
j        de
j        dz  fdZdeeee
j        f                  dee         fdZdeeeeeef                  fdZ xZS )Ernie4_5_MoeForCausalLM)r   r   r   r   r   )r   r?   Fr/   r   r   r6   c          	         t                                                       |j        j        |j        }| _        || _        t          |t          |d                    | _        t                      j
        r1t          j        j        |t          |d                    | _        nt                      | _        | j        j        r| j        j        j        | j        _        t'          j                  | _        | j        j        | _        g | _        fdt/          j                  D             }t3          |          | _        d| _        g | _        d }| j        j        D ]t}t=          |t                    rt=          |t>                    sJ t=          |j         tB                    r+|j         }| j        "                    |j         j#                   u|FtH          %                    d           d| _&        d| _'        d| _(        d| _)        d| _*        d| _+        d S |j,        | _&        |j-        | _'        |j.        | _(        |j/        | _)        |j0        | _*        |j1        | _+        d S )	Nmodel)r   r6   lm_headr   c                 b    g | ]+}|j         k    r|j        k    r|d z   j        z  dk    )|,S )r"   r   )r   r   r   ).0irW   s     rE   
<listcomp>z4Ernie4_5_MoeForCausalLM.__init__.<locals>.<listcomp>  sX     
 
 
V111333Uf771<< 
 =<<rF   r"   z/No Ernie4_5_MoeMoE layer found in model.layers.r   )2r=   r>   r   r   r4   rW   r   r,   r  r   r   r   r   r0   r  r'   r   r   weightr   logits_processorr   expert_weightsranger   r   num_moe_layersnum_expert_groups
moe_layersr   
isinstancer   r   rV   appendr   loggerwarningnum_logical_expertsnum_physical_expertsnum_local_physical_expertsnum_routed_expertsnum_shared_expertsrd   ru   rv   rw   rp   rq   rt   )	rC   r   r6   r4   moe_layers_indicesexample_moer   rW   rD   s	          @rE   r>   z Ernie4_5_MoeForCausalLM.__init__  sk   )3"/(&#L,I,I
 
 

 >>& 	,)!")#FI66	  DLL *++DL;* 	A"&*"9"@DL /0A B BJ6 	, !
 
 
 
6344
 
 
 ""455!"02Z& 	: 	:E%00 e%=>>>>>%)_55 :#i&&uy'8999NNLMMM'(D$()D%./D+&'D#&'D#)*D&&&'2'DD$(3(FD%.9.RD+&1&BD#&1&BD#)4)HD&&&rF   r)  r*  r7   Nc                     | j         |k    sJ || _        || _         || j        z
  | _        | j        j        D ]V}t          |j        t                    r:|j        }||_	        ||_
        | j        |_        |j                                         Wd S rH   )r*  r)  r(  rd   r  r   r$  r   rV   rw   rv   rt   r   update_expert_map)rC   r)  r*  r   moes        rE    update_physical_experts_metadataz8Ernie4_5_MoeForCausalLM.update_physical_experts_metadata  s    
 .2LLLLL$8!*D'%9D<T%T"Z& 	0 	0E%)_55 0i/I,)=&*.*D'--///	0 	0rF   r   c                 6    | j                             |          S rH   )r  r   r   s     rE   r   z'Ernie4_5_MoeForCausalLM.embed_input_ids  s    z)))444rF   r   r   r   c                 6    |                      ||||          }|S rH   )r  )rC   r   r   r   r   r   s         rE   rL   zErnie4_5_MoeForCausalLM.forward  s)     

y"6
 
 rF   r   c                 <    |                      | j        |          }|S rH   )r  r  )rC   r   logitss      rE   compute_logitsz&Ernie4_5_MoeForCausalLM.compute_logits  s      &&t|]CCrF   r   c                 l    t          | | j        j        rdgnd           }|                    |          S )Nzlm_head.)skip_prefixes)r&   rW   r   r  )rC   r   loaders      rE   r  z$Ernie4_5_MoeForCausalLM.load_weights  sC    "+/;+JTJ<<PT
 
 
 ""7+++rF   c                 4    | j                                         S rH   )r  r   r   s    rE   r   z*Ernie4_5_MoeForCausalLM.get_expert_mapping  s    z,,...rF   r  )rM   rN   rO   packed_modules_mappingfall_back_to_pt_during_loadr   rQ   r>   rP   r2  r{   r   r   r    rL   r7  r   r  r   r  r  r   rS   rT   s   @rE   r  r  q  s       
 
 
 

 
 #(AC CI CI CIz CI3 CI CI CI CI CI CIJ0!0 %(0 
	0 0 0 0"5 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , ,/DsCc/A)B$C / / / / / / / /rF   r  )J__doc__r   collections.abcr   r   	itertoolsr   r   r{   r   transformersr   vllm.attention.layerr	   vllm.compilation.decoratorsr
   vllm.configr   r   r   vllm.distributedr   r   r   vllm.loggerr   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.fused_moer   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer    vllm.transformers_utils.configr!   
interfacesr#   r$   r%   utilsr&   r'   r(   r)   r*   r+   r,   rM   r&  Moduler.   rV   r   r   r   r   rF   rE   <module>rV     s  0 I H  . . . . . . . .                    ) ) ) ) ) ) * * * * * * = = = = = = H H H H H H H H H H         
 $ # # # # # < < < < < < ? ? ? ? ? ? 8 8 8 8 8 8            H G G G G G F F F F F F @ @ @ @ @ @               . - - - - - A A A A A A B B B B B B B B B B                  
X		% % % % %bi % % %Pk4 k4 k4 k4 k4bi k4 k4 k4\Y Y Y Y YBI Y Y YxY' Y' Y' Y' Y'ry Y' Y' Y'x N N N N N	 N N NbC/ C/ C/ C/ C/bi\CS C/ C/ C/ C/ C/rF   