
    .`iZ                     j   d Z ddlmZ ddlmZ ddlmZ ddlZddlm	c m
Z ddlm	Z	 ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZmZ ddlmZ ddlmZ ddlmZ ddlmZ ddl m!Z!m"Z"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z,m-Z- ddl.m/Z/ ddl0m1Z1 ddl2m3Z3m4Z4 ddl5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;  ee<          Z= G d de	j>                  Z? G d de	j>                  Z@ G d de	j>                  ZA G d  d!e	j>                  ZBe G d" d#e	j>                              ZC G d$ d%e	j>        e4e3          ZDdS )&zBInference-only Qwen2MoE model compatible with HuggingFace weights.    )Iterable)islice)AnyN)nn)Qwen2MoeConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)init_logger)
SiluAndMul)SharedFusedMoE)RMSNorm)MergedColumnParallelLinearQKVParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )SupportsLoRA
SupportsPP)AutoWeightsLoaderextract_layer_indexis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   p     e Zd Z	 	 	 	 ddededededz  ded	ej        j	        dz  d
eddf fdZ
d Z xZS )Qwen2MoeMLPNT hidden_sizeintermediate_size
hidden_actquant_configreduce_resultsexpert_gateprefixreturnc           	      (   t                                                       t          ||gdz  d|| d          | _        t	          ||d||| d          | _        |dk    rt          d| d	          t                      | _        || _	        d S )
N   Fz.gate_up_projbiasr,   r/   z
.down_proj)r4   r,   r-   r/   siluzUnsupported activation: z!. Only silu is supported for now.)
super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fnr.   )	selfr)   r*   r+   r,   r-   r.   r/   	__class__s	           x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/qwen2_moe.pyr7   zQwen2MoeMLP.__init__K   s     	6!#%+++
 
 
 +%)(((
 
 
 X:XXX   !ll&    c                     |                      |          \  }}|                     |          }|                     |          \  }}| j        0t	          j        |                     |          d                   |z  }|S )Nr   )r8   r;   r9   r.   Fsigmoid)r<   xgate_up_outs        r>   forwardzQwen2MoeMLP.forwardl   st    &&q))
kk'""$$Q')D,,Q//233c9C
r?   )NTNr(   )__name__
__module____qualname__intstrr   booltorchr   Linearr7   rG   __classcell__r=   s   @r>   r'   r'   J   s         37#.2' '' ' 	'
 )4/' ' X_t+' ' 
' ' ' ' ' 'B      r?   r'   c                   \     e Zd Z	 	 d
dededz  def fdZdej        dej        fd	Z	 xZ
S )Qwen2MoeSparseMoeBlockNr(   configr,   r/   c                 b   t                                                       t                      | _        | j        |j        k    r t          d| j         d|j         d          t          |j        |j        dd | d          | _        t          |j        ddd | d          | _	        |j
        d	k    r3t          |j        |j
        |j        |d| j	        | d
          | _        nd | _        t          | j        |j        |j        |j        |j        d|j        || d	  	        | _        d S )NzTensor parallel size z' is greater than the number of experts .Fz.gater3   r   z.shared_expert_gater   z.shared_expert)r)   r*   r+   r,   r-   r.   r/   z.experts)	shared_expertsnum_expertstop_kr)   r*   r-   renormalizer,   r/   )r6   r7   r   tp_sizerX   r:   r   r)   gateshared_expert_gateshared_expert_intermediate_sizer'   r+   shared_expertr   num_experts_per_tokmoe_intermediate_sizenorm_topk_probexperts)r<   rT   r,   r/   r=   s       r>   r7   zQwen2MoeSparseMoeBlock.__init__x   s    	;==<&,,,? ? ?)/);? ? ?  
 %###
 
 
	 #3111#
 #
 #
 1A55!,"."("H!,)$ 3 000" " "D "&D%-*,*$: -%&&&

 

 

r?   hidden_statesr0   c                 X   |j         }|j         d         }|                    d|          }|                     |          \  }}|                     ||          }| j        |d         |d         z   }| j        dk    r| j                            |          }|                    |          S )N)rd   router_logitsr   r   )shapeviewr\   rc   r_   r[   &maybe_all_reduce_tensor_model_parallel)r<   rd   
orig_shape
hidden_dimrg   rE   final_hidden_statess          r>   rG   zQwen2MoeSparseMoeBlock.forward   s    "(
"(,
%**2z::  99]33q"ll'} + 
 
 )"5a"8;Nq;Q"Q<!"&,"U"U## # #''
333r?   )Nr(   )rH   rI   rJ   r   r   rL   r7   rN   TensorrG   rP   rQ   s   @r>   rS   rS   w   s         37	6
 6
6
 )4/6
 	6
 6
 6
 6
 6
 6
p4U\ 4el 4 4 4 4 4 4 4 4r?   rS   c                        e Zd Z	 	 	 	 	 	 ddedededeeef         dz  ded	edz  d
edz  dedeeef         dz  ddf fdZ	de
j        de
j        de
j        fdZ xZS )Qwen2MoeAttentionN    r(   r)   	num_headsnum_kv_headsrope_parametersmax_position_embeddingscache_configr,   r/   dual_chunk_attention_configr0   c
           
      t   t                                                       || _        t                      }
|| _        | j        |
z  dk    sJ | j        |
z  | _        || _        | j        |
k    r| j        |
z  dk    sJ n|
| j        z  dk    sJ t          d| j        |
z            | _        || j        z  | _	        | j        | j	        z  | _
        | j        | j	        z  | _        | j	        dz  | _        || _        |	| _        t          || j	        | j        | j        d|| d          | _        t#          | j        | j	        z  |d|| d          | _        t'          | j	        |||		          | _        t+          | j        | j	        | j        f| j        ||| d
d|	rt-          |          |	dni | _        d S )Nr   r   g      Tz	.qkv_projr3   Fz.o_proj)max_positionrt   rw   z.attn)rs   rv   r,   r/   )	layer_idxrw   )r6   r7   r)   r   total_num_headsrr   total_num_kv_headsmaxrs   head_dimq_sizekv_sizescalingru   rw   r   qkv_projr   o_projr   
rotary_embr   r!   attn)r<   r)   rr   rs   rt   ru   rv   r,   r/   rw   r[   r=   s              r>   r7   zQwen2MoeAttention.__init__   s,    	&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF#t';;nt}4(4=8}d*'>$+F()M #%'''
 
 
 ( 4=0%%%%
 
 
 #M0+(C	
 
 
 NML
 *%%###
 
 +	088/J  
 
 
			r?   	positionsrd   c                 "   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )Nrf   )dim)r   splitr   r   r   r   r   )
r<   r   rd   qkvrE   qkvattn_outputoutputs
             r>   rG   zQwen2MoeAttention.forward  s    
 }--Q))T[$,E2)NN1ay!Q//1ii1a((KK,,	r?   )Nrq   NNr(   N)rH   rI   rJ   rK   dictrL   r   r
   r   r7   rN   rn   rG   rP   rQ   s   @r>   rp   rp      s$        26'++/26=AI
 I
I
 I
 	I

 c3h$.I
 "%I
 "D(I
 )4/I
 I
 &*#s(^d%:I
 
I
 I
 I
 I
 I
 I
V
<
 |
 
	
 
 
 
 
 
 
 
r?   rp   c                        e Zd Z	 	 	 ddededz  dedz  deddf
 fdZd	ej	        d
ej	        dej	        dz  dej	        fdZ
 xZS )Qwen2MoeDecoderLayerNr(   rT   rv   r,   r/   r0   c                    t                                                       |j        | _        t          |dd           }t          |dd          }t	          | j        |j        |j        |j        |||| d|	  	        | _        t          |          }t          |d          sg n|j        }||vr7|j        dk    r,|dz   |j        z  dk    rt          ||| d	
          | _        n+t!          |j        |j        |j        || d	          | _        t'          |j        |j                  | _        t'          |j        |j                  | _        d S )Nrw   ru   rq   z
.self_attn)	r)   rr   rs   rt   ru   rv   r,   r/   rw   mlp_only_layersr   r   z.mlp)rT   r,   r/   )r)   r*   r+   r,   r/   eps)r6   r7   r)   getattrrp   num_attention_headsnum_key_value_headsrt   	self_attnr!   hasattrr   rX   decoder_sparse_steprS   mlpr'   r*   r+   r   rms_norm_epsinput_layernormpost_attention_layernorm)
r<   rT   rv   r,   r/   rw   ru   rz   r   r=   s
            r>   r7   zQwen2MoeDecoderLayer.__init__  s    	!-&-14'
 '
# #*&2KT"R"R*(03"2$;%%((((C

 

 

 (//	f&788TBBf>T 	 _,,""	A9S'SWX'X'X-LF  DHH #"."(":!,)   DH  'v'9v?RSSS(/F$7)
 )
 )
%%%r?   r   rd   residualc                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS )N)r   rd   )r   r   r   r   )r<   r   rd   r   s       r>   rG   zQwen2MoeDecoderLayer.forwardQ  s     $H 00??MM&*&:&:=(&S&S#M8' ' 
 
 #'"?"?x"X"Xx//h&&r?   )NNr(   )rH   rI   rJ   r   r
   r   rL   r7   rN   rn   rG   rP   rQ   s   @r>   r   r     s         ,0260
 0
0
 "D(0
 )4/	0

 0
 
0
 0
 0
 0
 0
 0
d'<' |' ,%	'
 
' ' ' ' ' ' ' 'r?   r   c                       e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeeeef                  fdZdeeeej        f                  dee         fdZ xZS )Qwen2MoeModelr(   r/   vllm_configr/   c                   t                                                       |j        j        |j        |j        j        | _        | _        t          j        j	        | d          | _
        t          j        fd| d          \  | _        | _        | _        t!          j	        j                  | _        t'          ddgj	                  | _        d S )	Nz.embed_tokensr,   r/   c                 *    t          |           S )N)rT   rv   r,   r/   )r   )r/   rv   rT   r,   s    r>   <lambda>z(Qwen2MoeModel.__init__.<locals>.<lambda>|  s#    /))	   r?   z.layersr   r   rd   r   )r6   r7   model_config	hf_configrv   r,   
vocab_sizerT   r   r)   embed_tokensr$   num_hidden_layersstart_layer	end_layerlayersr   r   normr#   make_empty_intermediate_tensors)r<   r   r/   rv   rT   r,   r=   s      @@@r>   r7   zQwen2MoeModel.__init__j  s   )3"/"/ +2%+++	
 
 
 9D$      %%%	9
 	9
 	9
5$.$+ F.F4GHHH	/Vj)6+=0
 0
,,,r?   	input_idsr0   c                 ,    |                      |          S N)r   r<   r   s     r>   embed_input_idszQwen2MoeModel.embed_input_ids  s      +++r?   Nr   intermediate_tensorsinputs_embedsc                 p   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S | 	                    ||          \  }}|S )Nrd   r   )rd   r   )
r   is_first_rankr   r   r   r   r   is_last_rankr   r   )	r<   r   r   r   r   rd   r   layerrE   s	            r>   rG   zQwen2MoeModel.forward  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDK)94>JJ 	P 	PE&+eI}h&O&O#M88~~* 	&"/XFF    99]H==qr?   c                 H    t          j        | ddd| j        j                  S )N	gate_projr9   up_proj)ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namerX   )r   make_expert_params_mappingrT   rX   r<   s    r>   get_expert_mappingz Qwen2MoeModel.get_expert_mapping  s2     8 + +'/
 
 
 	
r?   weightsc           	         g d}t          |                                           }t                      }|                                 }|D ]\  }}|D ]\  }}	}
|	|vrd|v r|                    |	|          }|                    d          s|                    d          r||vrUt          ||           rf||vrk||         }|j        } ||||
            nj|D ]}|\  }}	}}
|	|vr|                    |	|          }t          ||           r5|                    d          s|                    d          r||vrd||         }|j        } |||||
|            n|                    d          s|                    d          r||vrIt          ||           r[|                    d          r:|                    dd          }||vrt          	                    d	||           |}d
|v r$t          |j                  dk    r|d d d f         }||         }t          |dt                    } |||           |                    |           |S )N))r   q_projr   )r   k_projr   )r   v_projr   )r8   r   r   )r8   r   r   zmlp.expertsz.bias_bias)shard_id	expert_idkv_scalez	.kv_scalez.attn.kv_scalez{Found kv_scale in the checkpoint (e.g. %s), but not found the expected name in the model (e.g. %s). kv_scale is not loaded.zmlp.shared_expert_gater   weight_loader)r   named_parameterssetr   replaceendswithr"   r   loggerwarning_oncelenrh   r   r   add)r<   r   stacked_params_mappingparams_dictloaded_paramsexpert_params_mappingnameloaded_weight
param_nameweight_namer   paramr   mappingr   remapped_kv_scale_names                   r>   load_weightszQwen2MoeModel.load_weights  s:   "
 "
 "
 4002233"%%% $ 7 7 9 9#* X	$ X	$D-5K V8 V81
Kd** !D((||K<< MM'**.2mmG.D.D+--*466 {**#D) % 3e]H===4 98 98GCJ@JY"$.. <<Z@@D /tT:: !  g..!26--2H2H!k11 '-E$)$7M!M%!)"+    E g..!26--2H2H!k11 .tT:: ! }}Z00 :15')92 2. 2DD"// !^ $ 6  
 %#9D 1D88 34499(5dAAAg(>'-E$+0E% %M "M%777d####r?   NN)rH   rI   rJ   r   rL   r7   rN   rn   r   r   rG   listtuplerK   r   r   r   r   rP   rQ   s   @r>   r   r   h  sW       AC 
 
 
z 
3 
 
 
 
 
 
>, ,%, , , , , <@-1 < < 2D8	
 |d* 
+	+   4	
DsCc/A)B$C 	
 	
 	
 	
fHU33D-E$F f3s8 f f f f f f f fr?   r   c                   V    e Zd ZdZdg diZdddedef fdZd	ej	        d
ej	        fdZ
	 	 dd	ej	        dej	        dedz  dej	        dz  d
ej	        ez  f
dZdej	        d
ej	        dz  fdZdeeeej	        f                  d
ee         fdZd
eeeeeef                  fdZ xZS )Qwen2MoeForCausalLMFr   )r   r   r   r(   r   r   r/   c          	      $   t                                                       |j        j        }|j        }|| _        || _        t          |dg           s|j        dk    rddg| j        d<   t          |t          |d                    | _        t          |j        |j        |t          |d          	          | _        | j        j        r| j        j        j        | j        _        t'          |j                  | _        | j        j        | _        d S )
Nr   r   r   r   r8   model)r   r/   lm_headr   )r6   r7   r   r   r,   rT   r   r^   packed_modules_mappingr   r%   r   r   r   r)   r   tie_word_embeddingsr   weightr   logits_processorr   )r<   r   r/   rT   r,   r=   s        r>   r7   zQwen2MoeForCausalLM.__init__$  s   )3"/( F-r22	S599;F	:RD'7"#L,I,I
 
 

 &%	22	
 
 
 ;* 	A"&*"9"@DL /0A B BJ6 	,,,r?   r   r0   c                 6    | j                             |          S r   )r   r   r   s     r>   r   z#Qwen2MoeForCausalLM.embed_input_idsA  s    z)))444r?   Nr   r   r   c                 6    |                      ||||          }|S r   )r   )r<   r   r   r   r   rd   s         r>   rG   zQwen2MoeForCausalLM.forwardD  s)     

y"6
 
 r?   rd   c                 <    |                      | j        |          }|S r   )r   r   )r<   rd   logitss      r>   compute_logitsz"Qwen2MoeForCausalLM.compute_logitsP  s      &&t|]CCr?   r   c                 J    t          |           }|                    |          S r   )r    r   )r<   r   loaders      r>   r   z Qwen2MoeForCausalLM.load_weightsW  s#    "4((""7+++r?   c                 4    | j                                         S r   )r   r   r   s    r>   r   z&Qwen2MoeForCausalLM.get_expert_mapping[  s    z,,...r?   r   )rH   rI   rJ   fall_back_to_pt_during_loadr   r   rL   r7   rN   rn   r   r   rG   r   r   r   r   r   r   rK   r   rP   rQ   s   @r>   r   r     s       "' 
 
 
 BD 
 
 
z 
3 
 
 
 
 
 
:5 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , ,/DsCc/A)B$C / / / / / / / /r?   r   )E__doc__collections.abcr   	itertoolsr   typingr   rN   torch.nn.functionalr   
functionalrA   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr	   vllm.configr
   r   vllm.distributedr   r   vllm.loggerr   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.fused_moer   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   r   utilsr    r!   r"   r#   r$   r%   rH   r   Moduler'   rS   rp   r   r   r    r?   r>   <module>r     s  4 I H $ $ $ $ $ $                             ' ' ' ' ' ' * * * * * * = = = = = = / / / / / / / / O O O O O O O O # # # # # # < < < < < < ? ? ? ? ? ? 8 8 8 8 8 8            H G G G G G F F F F F F @ @ @ @ @ @        P O O O O O - - - - - - 0 0 0 0 0 0 0 0                
X		* * * * *") * * *ZK4 K4 K4 K4 K4RY K4 K4 K4\V V V V V	 V V VrG' G' G' G' G'29 G' G' G'T n n n n nBI n n nbB/ B/ B/ B/ B/")Z B/ B/ B/ B/ B/r?   