
    .`ik                        d Z ddlZddlmZmZ ddlmZ ddlZddlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZmZmZ dd
lmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z m!Z!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z*m+Z+ ddl,m-Z-m.Z. ddl/m0Z0 ddl1m2Z2m3Z3m4Z4 ddl5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;  ee<          Z= G d dej>                  Z? G d dej>                  Z@ G d dej>                  ZA G d d ej>                  ZB edd!ddd"#           G d$ d%ej>                              ZC G d& d'e2          ZD G d( d)ej>        e4e3eD          ZEd*e
d+eFd,eGdz  fd-ZHdS ).zSInference-only GLM-4.5, GLM-4.6, GLM-4.7 model
compatible with HuggingFace weights.    N)CallableIterable)islice)nn)Glm4MoeConfig)	Attention)support_torch_compile)CacheConfig
VllmConfigget_current_vllm_config)get_ep_groupget_pp_group$get_tensor_model_parallel_world_size)init_logger)
SiluAndMul)SharedFusedMoE)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors   )MixtureOfExpertsSupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayeris_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   P     e Zd Z	 	 	 ddededededz  ded	ed
df fdZd Z xZ	S )
Glm4MoeMLPNT hidden_sizeintermediate_size
hidden_actquant_configreduce_resultsprefixreturnc           	         t                                                       t          ||gdz  d|| d          | _        t	          ||d||| d          | _        |dk    rt          d| d	          t                      | _        d S )
N   Fz.gate_up_projbiasr/   r1   z
.down_proj)r6   r/   r0   r1   siluUnsupported activation: !. Only silu is supported for now.)	super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fn)selfr,   r-   r.   r/   r0   r1   	__class__s          w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/glm4_moe.pyr;   zGlm4MoeMLP.__init__O   s     	6!#%+++
 
 
 +%)(((
 
 
 X:XXX   !ll    c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r<   r?   r=   )r@   xgate_up_s       rB   forwardzGlm4MoeMLP.forwardn   sD    &&q))
KK  ~~a  1rC   )NTr+   )
__name__
__module____qualname__intstrr   boolr;   rI   __classcell__rA   s   @rB   r*   r*   N   s         37## ## # 	#
 )4/# # # 
# # # # # #>      rC   r*   c            	       b     e Zd Z	 	 	 ddededz  dedef fdZd	ej	        d
ej	        fdZ
 xZS )Glm4MoENr+   Fconfigr/   r1   enable_eplbc           	      4   t                                                       t                      | _        |j        | _        t                      j        | _        t                      j        | _	        | j        
                                | _        |j        | _        |j        | _        |j        dk    rt          d|j         d          t!          j        |j        |j        dt&          j                  | _        t!          j        t'          j        |j        t&          j                            | j        _        t3                      }|j        j        }|| _        |j        | _        | j        | _        | j        | j        z   | _         | j         | j        z  | _!        | j	        | j!        z  | _"        | j"        | j!        z   | _#        |j        7|j$        |j        z  }tK          |j        ||j        |d| d          | _&        nd | _&        tO          di d	| j&        d
|j        d|j(        d|j        d|j$        ddd|j)        d|ddd|j*        d|j+        d| dddddd| j        j        d| j        d| j        dt&          j        | _,        d S ) Nr7   r8   r9   F)r6   dtyperW   z.shared_experts)r,   r-   r.   r/   r0   r1   shared_expertsnum_expertstop_kr,   r-   r0   renormalizer/   use_grouped_topkTnum_expert_group
topk_groupr1   z.expertsscoring_funcsigmoidrouted_scaling_factorg      ?e_score_correction_biasrU   num_redundant_expertsrouter_logits_dtype )-r:   r;   r   tp_sizerb   r   device_groupep_grouprank_in_groupep_ranksizeep_sizen_routed_expertsn_shared_expertsr.   r>   r   Linearr,   torchfloat32gate	Parameteremptyrc   r   parallel_configeplb_configrU   rd   n_redundant_expertsn_logical_expertsn_physical_expertsn_local_physical_expertsphysical_expert_startphysical_expert_endmoe_intermediate_sizer*   rY   r   num_experts_per_toknorm_topk_probn_groupr_   experts)	r@   rT   r/   r1   rU   vllm_configrw   r-   rA   s	           rB   r;   zGlm4MoE.__init__v   s    	;==%+%A"$3#~~3}))++%+%<%+%<&&26+< 2 2 2   I#-	
 
 
	 -/LK/u}EEE-
 -
	)
 .//!1=&#.#D !%!6"&"84;S"S(,(?4<(O%%)\D4Q%Q"&)FF 	  ". & <v?V V","."3!,)$ 111# # #D #'D% 
 
 
..
//
 ,,
 **	

 %::
 !5
 --
 &
 "T
 $^^
 ((
 &&&&
 #
 #&#
  %)I$E$E!
" ((#
$ #'":":%
& !&'
rC   hidden_statesr2   c                    |j         \  }}|                    d|          }|                     |                    t          j                            }|                     ||          }| j        |\  }}|J || j        z  |z   }n
|| j        z  }| j	        dk    r| j        
                    |          }|                    ||          S )NrX   )r   router_logitsr   )shapeviewrs   torq   rr   r   rY   rb   rg   &maybe_all_reduce_tensor_model_parallel)r@   r   
num_tokens
hidden_dimr   fused_moe_outshared_outputfinal_hidden_statess           rB   rI   zGlm4MoE.forward   s    !.!4
J%**2z:: 		-"2"2"2"G"GHH'} % 
 
 *1>.M. ,,,#d&@@=P   #0$2L"L<!"&,"U"U## # #''
J???rC   )Nr+   F)rJ   rK   rL   r   r   rN   rO   r;   rq   TensorrI   rP   rQ   s   @rB   rS   rS   u   s         37!S
 S
S
 )4/S
 	S

 S
 S
 S
 S
 S
 S
j@U\ @el @ @ @ @ @ @ @ @rC   rS   c                        e Zd Z	 	 	 	 	 	 	 	 ddededed	ed
ededz  dededededz  dedz  de	ddf fdZ
dej        dej        dej        fdZ xZS )Glm4MoeAttention   Nh㈵>Fr+   rT   r,   	num_headsnum_kv_headsmax_position_embeddingshead_dimrms_norm_epsqkv_biasuse_qk_normcache_configr/   r1   r2   c           
         t                                                       || _        t                      }|| _        | j        |z  dk    sJ | j        |z  | _        || _        | j        |k    r| j        |z  dk    sJ n|| j        z  dk    sJ t          d| j        |z            | _        |p	|| j        z  | _	        | j        | j	        z  | _
        | j        | j	        z  | _        | j	        dz  | _        || _        |	| _        t          || j	        | j        | j        ||| d          | _        t#          | j        | j	        z  |d|| d          | _        |j                            dd	           t+          | j	        ||j        
          | _        t/          | j        | j	        | j        | j        |
|| d          | _        | j        r8t3          | j	        |          | _        t3          | j	        |          | _        d S d S )Nr   r   g      z	.qkv_projr5   Fz.o_projpartial_rotary_factorg      ?)max_positionrope_parametersz.attn)r   r   r/   r1   eps)r:   r;   r,   r   total_num_headsr   total_num_kv_headsmaxr   r   q_sizekv_sizescalingr   r   r   qkv_projr   o_projr   
setdefaultr   
rotary_embr   attnr   q_normk_norm)r@   rT   r,   r   r   r   r   r   r   r   r   r/   r1   rg   rA   s                 rB   r;   zGlm4MoeAttention.__init__   sY    	&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF I[D4H%Hnt}4(4=8}d*'>$&)M #%'''
 
 
 ( 4=0%%%%
 
 
 	))*A3GGG"M0"2
 
 

 NML*%%###
 
 
	  	C!$-\BBBDK!$-\BBBDKKK	C 	CrC   	positionsr   c                 `   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}| j        r|                     |                    d| j        | j                                                |j	                  }| 
                    |                    d| j        | j                                                |j	                  }|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )Nr   )dim)r   splitr   r   r   r   reshaper   r   r   r   r   r   r   r   )
r@   r   r   qkvrH   qkvattn_outputoutputs
             rB   rI   zGlm4MoeAttention.forward3  s   
 }--Q))T[$,E2)NN1a 	AIIb$.$-HHIIQQ A AIIb$*;T]KKLLTT A y!Q//1ii1a((KK,,	rC   )r   Nr   FFNNr+   )rJ   rK   rL   r   rM   floatrO   r
   r   rN   r;   rq   r   rI   rP   rQ   s   @rB   r   r      sI        (.##!+/26JC JCJC JC 	JC
 JC "%JC *JC JC JC JC "D(JC )4/JC JC 
JC JC JC JC JC JCX< | 
	       rC   r   c                        e Zd Z	 	 	 	 ddededz  dedz  deded	df fd
Zde	j
        de	j
        de	j
        dz  d	ee	j
        e	j
        f         fdZ xZS )Glm4MoeDecoderLayerNr+   FrT   r   r/   r1   rU   r2   c                    t                                                       |j        | _        t          |dd          }t	          |                    d          d                   }|| _        t          || j        |j        |j	        ||j
        |j        |j        ||| d|j                  | _        |j        '||j        k    rt#          ||| d|	          | _        n+t'          |j        |j        |j        || d
          | _        t-          |j        |j                  | _        t-          |j        |j                  | _        |j        | _        d S )Nr   r   .)sepr   z
.self_attn)rT   r,   r   r   r   r   r   r   r   r/   r1   r   z.mlp)rT   r/   r1   rU   )r,   r-   r.   r/   r1   r   )r:   r;   r,   getattrrM   r   	layer_idxr   num_attention_headsnum_key_value_headsr   r   attention_biasr   	self_attnrn   first_k_dense_replacerS   mlpr*   r-   r.   r   input_layernormpost_attention_layernormrb   )	r@   rT   r   r/   r1   rU   r   r   rA   s	           rB   r;   zGlm4MoeDecoderLayer.__init__I  s    	!-")&2KV"T"T --b122	")(03$;_,*%%(((*
 
 
  #/V999) '	  DHH ""."(":!,)   DH  'v'9v?RSSS(/F$7)
 )
 )
% &,%A"""rC   r   r   residualc                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS )N)r   r   )r   r   r   r   )r@   r   r   r   s       rB   rI   zGlm4MoeDecoderLayer.forward  s     $H 00??MM&*&:&:=(&S&S#M8-XX"&"?"?x"X"Xx//h&&rC   )NNr+   F)rJ   rK   rL   r   r
   r   rN   rO   r;   rq   r   tuplerI   rP   rQ   s   @rB   r   r   H  s         ,026!6B 6B6B "D(6B )4/	6B
 6B 6B 
6B 6B 6B 6B 6B 6Bp'<' |' ,%	'
 
u|U\)	*' ' ' ' ' ' ' 'rC   r   r   )	input_idsr   intermediate_tensorsinputs_embeds)dynamic_arg_dimsc                       e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeeeef                  fdZdeeeej        f                  dee         fdZ xZS )Glm4MoeModelr+   r1   r   r1   c                   t                                                       |j        j        |j        |j        |j        j        | _        j	        | _	        t                      j        r%t          j	        j        | d          | _        nt                      | _        t!          j        fd| d          \  | _        | _        | _        t                      j        r!t-          j        j                  | _        nt                      | _        t3          ddgj                  | _        d S )Nz.embed_tokensr   c                 ,    t          |           S )N)rT   r   r/   r1   rU   )r   )r1   r   rT   rU   r/   s    rB   <lambda>z'Glm4MoeModel.__init__.<locals>.<lambda>  s&    .))'   rC   z.layersr   r   r   )r:   r;   model_config	hf_configr   r/   rv   rU   rT   
vocab_sizer   is_first_rankr   r,   embed_tokensr$   r'   num_hidden_layersstart_layer	end_layerlayersis_last_rankr   r   normr&   make_empty_intermediate_tensors)r@   r   r1   r   rT   rU   r/   rA   s      @@@@rB   r;   zGlm4MoeModel.__init__  s[   )3"/"/!1= +>>' 	1 6!6#5>V>V>V! ! !D !/ 0 0D8C$       %%%
9
 
9
 
9
5$.$+ >>& 	) 28KLLLDII&((DI/Vj)6+=0
 0
,,,rC   r   r2   c                 ,    |                      |          S rE   )r   r@   r   s     rB   embed_input_idszGlm4MoeModel.embed_input_ids  s      +++rC   Nr   r   r   c                 p   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S | 	                    ||          \  }}|S )Nr   r   )r   r   )
r   r   r   r   r   r   r   r   r   r   )	r@   r   r   r   r   r   r   layerrH   s	            rB   rI   zGlm4MoeModel.forward  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDK)94>JJ 	P 	PE&+eI}h&O&O#M88~~* 	&"/XFF    99]H==qrC   c                 H    t          j        | ddd| j        j                  S )N	gate_projr=   up_proj)ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namerZ   )r   make_expert_params_mappingrT   rn   r@   s    rB   get_expert_mappingzGlm4MoeModel.get_expert_mapping  s2     8 + +'4
 
 
 	
rC   weightsc           
         g d}t          |                                           }t                      }|                                 }|D ]\  }}t	          | j        |          }||D ]r\  }	}
}|
|vrd|v r||vr|                    |
|	          }|                    d          r||vrDt          ||           rU||         }|j	        } ||||            nd}|D ]}|\  }	}
}}|
|vrd}|                    |
|	          }t          ||           r7||         }t          j        t          dt          f         |j	                  } ||||||d          }|r|} np|r|                    d          r||vr9t          ||          }|Mt          ||           r_||         }t          |dt                     } |||           |                    |           |S )	N))r   q_projr   )r   k_projr   )r   v_projr   )r<   r   r   )r<   r   r   zmlp.experts.z.biasFT.)shard_id	expert_idreturn_successweight_loader)dictnamed_parameterssetr   #get_spec_layer_idx_from_weight_namerT   replaceendswithr%   r   typingcastr   rO   r   r   r   add)r@   r   stacked_params_mappingparams_dictloaded_paramsexpert_params_mappingnameloaded_weight
spec_layer
param_nameweight_namer   paramr   is_expert_weightmappingr   name_mappedsuccesss                      rB   load_weightszGlm4MoeModel.load_weights  s   "
 "
 "
 4002233"%%% $ 7 7 9 9#* W	$ W	$D-<T[$OOJ%5K R8 R81
Kd** #d**K0G0G||K<<==)) d+.E.E*466 #D) % 3e]H===#( 4 98 98GCJ@JY"$..  (,$ #',,{J"G"GK.{DAA ! '4E %+K d+U-@% %M ,m%#!)"+'+  G  * ( ! ! }}W-- !$k2I2I  5T;GGD| .tT:: ! '-E$+0E% %M "M%777d####rC   NN)rJ   rK   rL   r   rN   r;   rq   r   r   r   rI   listr   rM   r   r   r   r  rP   rQ   s   @rB   r   r     s\        BD $
 $
 $
z $
3 $
 $
 $
 $
 $
 $
L, ,%, , , , , <@-1 < < 2D8	
 |d* 
+	+   :	
DsCc/A)B$C 	
 	
 	
 	
fHU33D-E$F f3s8 f f f f f f f frC   r   c                   8    e Zd Zdedz  ddfdZdededdfdZdS )Glm4MixtureOfExpertsexample_moeNr2   c                     |t          d          |j        | _        |j        | _        |j        | _        |j        | _        |j	        | _
        |j        | _        d S )Nz'No Glm4MoE layer found in model.layers.)RuntimeErrorry   num_logical_expertsrz   num_physical_expertsr{   num_local_physical_expertsrn   num_routed_expertsro   num_shared_expertsrx   rd   )r@   r  s     rB   extract_moe_parametersz+Glm4MixtureOfExperts.extract_moe_parametersV  s_    HIII'2'DD$(3(FD%.9.RD+&1&BD#&1&BD#)4)HD&&&rC   r  r  c                     | j         |k    sJ || _        || _         || j        z
  | _        | j        D ]5}||_        ||_        | j        |_        |j        	                                 6d S rE   )
r  r  r  rd   moe_mlp_layersr{   rz   rx   r   update_expert_map)r@   r  r  moes       rB    update_physical_experts_metadataz5Glm4MixtureOfExperts.update_physical_experts_metadataa  s    
 .2LLLLL$8!*D'%9D<T%T"& 	, 	,C+EC(%9C"&*&@C#K))++++		, 	,rC   )rJ   rK   rL   rS   r   rM   r%  rf   rC   rB   r  r  U  sp        	I'D. 	IT 	I 	I 	I 	I,!, %(, 
	, , , , , ,rC   r  c                   \    e Zd Zg dddgdZdZddded	ef fd
Zdej	        dej	        fdZ
	 	 ddej	        dej	        dedz  dej	        dz  dej	        ez  f
dZdej	        dej	        dz  fdZdeeeej	        f                  dee         fdZdeeeeeef                  fdZ xZS )Glm4MoeForCausalLM)r   r   r   r   r   )r   r<   Fr+   r   r   r1   c          	         t                                                       |j        j        }|j        }|| _        || _        t          |t          |d                    | _        t                      j
        r1t          |j        |j        |t          |d                    | _        nt                      | _        t!          |j                  | _        | j        j        | _        g | _        |j        |j        z
  | _        |j        | _        g | _        g | _        d }| j        j        D ]}t9          |t                    rt9          |t:                    sJ t9          |j        t>                    rJ|j        }| j                             |j                   | j                             |j        j!                   | "                    |           d S )Nmodel)r   r1   lm_head)r/   r1   )#r:   r;   r   r   r/   rT   r   r(   r)  r   r   r   r   r,   r*  r$   r   logits_processorr   expert_weightsr   r   num_moe_layersr   num_expert_groups
moe_layersr"  r   
isinstancer   r   rS   appendr   r   )r@   r   r1   rT   r/   r  r   rA   s          rB   r;   zGlm4MoeForCausalLM.__init__  s   )3"/(!#L,I,I
 
 

 >>& 	,)!")#FI66	  DLL *++DL /0A B BJ6 	, ! %69UU!'-/Z& 		: 		:E%00 e%899999%)W-- :#i#**59555&&uy'8999##K00000rC   r   r2   c                 6    | j                             |          S rE   )r)  r   r   s     rB   r   z"Glm4MoeForCausalLM.embed_input_ids  s    z)))444rC   Nr   r   r   c                 6    |                      ||||          }|S rE   )r)  )r@   r   r   r   r   r   s         rB   rI   zGlm4MoeForCausalLM.forward  s)     

y"6
 
 rC   r   c                 <    |                      | j        |          }|S rE   )r+  r*  )r@   r   logitss      rB   compute_logitsz!Glm4MoeForCausalLM.compute_logits  s      &&t|]CCrC   r   c                 J    t          |           }|                    |          S rE   )r#   r  )r@   r   loaders      rB   r  zGlm4MoeForCausalLM.load_weights  s#    "4((""7+++rC   c                 4    | j                                         S rE   )r)  r   r   s    rB   r   z%Glm4MoeForCausalLM.get_expert_mapping  s    z,,...rC   r  )rJ   rK   rL   packed_modules_mappingfall_back_to_pt_during_loadr   rN   r;   rq   r   r   r   rI   r6  r   r   r   r  r  rM   r   rP   rQ   s   @rB   r'  r'  q  s       
 
 
 

 
 #(AC +1 +1 +1z +13 +1 +1 +1 +1 +1 +1Z5 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , ,/DsCc/A)B$C / / / / / / / /rC   r'  rT   r  r2   c                     t          | d          r;| j        dk    r0| j        }t          | j                  D ]}d||z    d|v r||z   c S d S )Nnum_nextn_predict_layersr   zlayers.r   )hasattrr=  r   range)rT   r  r   is       rB   r   r     s{     v122 %'!++,	v677 	% 	%A)Q)))[88 1}$$$ 94rC   )I__doc__r  collections.abcr   r   	itertoolsr   rq   r   transformers.models.glm4_moer   vllm.attention.layerr   vllm.compilation.decoratorsr	   vllm.configr
   r   r   vllm.distributedr   r   r   vllm.loggerr   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.fused_moer   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   
interfacesr    r!   r"   utilsr#   r$   r%   r&   r'   r(   rJ   loggerModuler*   rS   r   r   r   r  r'  rN   rM   r   rf   rC   rB   <module>rX     s^  0( (  . . . . . . . .              6 6 6 6 6 6 * * * * * * = = = = = = H H H H H H H H H H         
 $ # # # # # < < < < < < ? ? ? ? ? ? 8 8 8 8 8 8         
 H G G G G G F F F F F F @ @ @ @ @ @               . - - - - - B B B B B B B B B B                
X		$ $ $ $ $ $ $ $Nn@ n@ n@ n@ n@bi n@ n@ n@b_ _ _ _ _ry _ _ _DG' G' G' G' G'") G' G' G'T  !	   x x x x x29 x x xv, , , , ,+ , , ,8W/ W/ W/ W/ W/J>R W/ W/ W/t

(+
4Z
 
 
 
 
 
rC   