
    .`iD^              	          d dl mZ d dlZd dlmZ d dlmZ d dlmZmZm	Z	m
Z
 d dlmZmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZmZmZ d dlmZ d dl m!Z!m"Z"m#Z#m$Z$ d dl%m&Z&m'Z' d dl(m)Z) d dl*m+Z+m,Z, d dl-m.Z.m/Z/ d dl0m1Z1 d dl2m3Z3 ddl4m5Z5m6Z6m7Z7m8Z8 ddl9m:Z:m;Z;m<Z<m=Z=  ee>          Z? G d dej@                  ZA G d dej@                  ZB G d dej@                  ZC G d dej@                  ZDe G d  d!ej@                              ZE G d" d#ej@        e5e8e7e6          ZFd$e3d%eGd&eHdz  fd'ZIdS )(    )IterableN)nn)support_torch_compile)CacheConfigModelConfigParallelConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size tensor_model_parallel_all_reduce)init_logger)
SiluAndMul)FusedMoE)KimiDeltaAttention)RMSNorm)ColumnParallelLinearMergedColumnParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)MambaStateCopyFuncMambaStateCopyFuncCalculatorMambaStateDtypeCalculatorMambaStateShapeCalculator)
MLAModulesMultiHeadLatentAttentionWrapper)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors)KimiLinearConfig   )HasInnerStateIsHybridMixtureOfExperts
SupportsPP)PPMissingLayeris_pp_missing_parametermake_layersmaybe_prefixc                   P     e Zd Z	 	 	 ddededededz  ded	ed
df fdZd Z xZ	S )KimiMLPNT hidden_sizeintermediate_size
hidden_actquant_configreduce_resultsprefixreturnc           	         t                                                       t          ||gdz  d|| d          | _        t	          ||d||| d          | _        |dk    rt          d| d	          t                      | _        d S )
N   F.gate_up_projbiasr3   r5   z
.down_proj)r;   r3   r4   r5   siluUnsupported activation: !. Only silu is supported for now.)	super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fn)selfr0   r1   r2   r3   r4   r5   	__class__s          z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/kimi_linear.pyr@   zKimiMLP.__init__;   s     	6!#%+++
 
 
 +%)(((
 
 
 X:XXX   !ll    c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)rA   rD   rB   )rE   xgate_up_s       rG   forwardzKimiMLP.forward[   sD    &&q))
KK  ~~a  1rH   )NTr/   )
__name__
__module____qualname__intstrr   boolr@   rN   __classcell__rF   s   @rG   r.   r.   :   s         37## ## # 	#
 )4/# # # 
# # # # # #@      rH   r.   c            	       b     e Zd Z	 	 	 ddededz  dedef fdZd	ej	        d
ej	        fdZ
 xZS )KimiMoENr/   r   configr3   r5   	layer_idxc                    t                                                       |j        }|j        }|j        }|j        }|j        }	t                      | _        |j	        | _	        |j
        | _
        || _        |j        dk    rt          d|j         d          t          ||dd | d          | _        t!          j        t%          j        |                    | j        _        t+          ||j        ||d|	||j        |j        |j        | d|j        | j        j                  | _        | j
        3|| j
        z  }t9          |j        ||j        |d| d	
          | _        d S d S )Nr<   r=   r>   Fz.gater:   z.experts)num_expertstop_kr0   r1   r4   renormalizer3   use_grouped_topknum_expert_group
topk_groupr5   scoring_funce_score_correction_biasz.shared_experts)r0   r1   r2   r3   r4   r5   )r?   r@   r0   r1   moe_intermediate_sizer\   moe_renormalizer   tp_sizerouted_scaling_factornum_shared_expertsrZ   r2   rC   r   gater   	Parametertorchemptyrc   r   num_experts_per_tokenr_   r`   ra   moe_router_activation_funcexpertsr.   shared_experts)rE   rY   r3   r5   rZ   r0   r1   rd   r\   re   rF   s             rG   r@   zKimiMoE.__init__c   s    	("4 & <( 0;==%+%A""(";"&&26+< 2 2 2   %###
 
 
	 -/L[9Q9Q,R,R	)#.#3 '%#4#4(&&&:$(I$E
 
 
  ". 58O O")"."3!,)$ 111# # #D /.rH   hidden_statesr6   c                 V   |j         \  }}|                    d|          }| j        |                     |          }|                     |          \  }}|                     ||          | j        z  }|||z   }| j        dk    rt          |          }|                    ||          S )N)rq   router_logitsr$   )	shapeviewrh   rp   ri   ro   rg   rf   r   )rE   rq   
num_tokensr0   shared_outputrt   rM   final_hidden_statess           rG   rN   zKimiMoE.forward   s    "/"5
K%**2{;;". //>>M99]33qLL}MLRR() 	 $"5"E<!"BCV"W"W"''
K@@@rH   )Nr/   r   )rO   rP   rQ   r#   r   rS   rR   r@   rk   TensorrN   rU   rV   s   @rG   rX   rX   b   s         37< < < )4/< 	<
 < < < < < <|AU\ Ael A A A A A A A ArH   rX   c                        e Zd ZdZ	 	 	 	 ddedededed	ed
ededz  dedededz  dedz  de	ddf fdZ
dej        dej        dej        ddfdZ xZS )KimiMLAAttentionz8
    Main reference: DeepseekV2 vllm Implementation
    FNr/   rY   r0   	num_headsqk_nope_head_dimqk_rope_head_dim
v_head_dimq_lora_rankkv_lora_rankuse_nopecache_configr3   r5   r6   c                    t                                                       || _        || _        || _        ||z   | _        || _        || _        || _        || _	        t                      }||z  | _        | j        dz  | _        |	| _        | j        du sJ | j        J ||z  dk    sJ t          | j        | j        | j        z   d|| d          | _        t!          | j        | j	        | j        z  d|| d          | _        t%          | j        |j                  | _        t!          | j        | j	        | j        | j        z   z  d|| d	          | _        t-          | j	        | j        z  | j        d|| d
          | _        t1          | j        | j        d | j        d | j        d d | j        d dd           }t3          | j        | j        | j        | j        | j        | j        | j        | j        ||
||          | _        d S )Ng      Tr   Fz.kv_a_proj_with_mqar:   z.q_projepsz
.kv_b_projz.o_proj)kv_a_layernorm	kv_b_proj
rotary_embo_projfused_qkv_a_projkv_a_proj_with_mqaq_a_layernormq_b_projq_projindexer	is_sparsetopk_indices_buffer)r?   r@   r0   r~   r   qk_head_dimr   r   r   r}   r   num_local_headsscalingr   r   r   r   r   r   rms_norm_epsr   r   r   r   r   r   mla_attn)rE   rY   r0   r}   r~   r   r   r   r   r   r   r3   r5   kwargsrf   mla_modulesrF   s                   rG   r@   zKimiMLAAttention.__init__   s{     	& 0 0+.>>$&("688(G3'- }$$$$'''7"a''''"2 55%111#
 #
 #
 +NT--%%%%
 
 
 &#
 
 
 .Nd3doEF%(((
 
 
 (NT_,%%%%
 
 
 !.n;!#6; $
 
 
 8 L!!O
 
rH   	positionsrq   outputc                 <    |                      ||          |d d <   d S rJ   )r   )rE   r   rq   r   s       rG   rN   zKimiMLAAttention.forward  s#     MM)];;qqq			rH   )FNNr/   )rO   rP   rQ   __doc__r#   rR   rT   r   r   rS   r@   rk   rz   rN   rU   rV   s   @rG   r|   r|      s=         +/26\
 \
 \
 \
 	\

 \
 \
 \
 4Z\
 \
 \
 "D(\
 )4/\
 \
 
\
 \
 \
 \
 \
 \
|<<< |< 	<
 
< < < < < < < <rH   r|   c                        e Zd Z	 	 	 	 	 ddedededz  dedz  dedz  dedz  d	e	d
df fdZ
dej        dej        dej        dz  d
eej        ej        f         fdZ xZS )KimiDecoderLayerNr/   rY   rZ   r   r3   parallel_configmodel_configr5   r6   c                    t                                                       |j        | _        |j        | _        |                    |          r#t          ||j        |||| d          | _        nMt          || j        |j        |||| d||j	        |j
        |j        |j        |j        |j                  | _        | j        rG|j        @||j        k    r5||j        z  dk    r't%          ||| d          | _        | j        | _        n+t+          | j        |j        |j        || d          | _        t1          |j        |j        	          | _        t1          |j        |j        	          | _        d S )
Nz
.self_attn)rZ   r0   r3   r   r   r5   )rZ   r0   r}   r3   r   r   r5   rY   r~   r   r   r   r   r   r   z.block_sparse_moe)rY   r3   r5   z.mlp)r0   r1   r2   r3   r5   r   )r?   r@   r0   is_moeis_kda_layerr   	self_attnr|   num_attention_headsr~   r   r   r   r   mla_use_noper\   first_k_dense_replacemoe_layer_freqrX   block_sparse_moemlpr.   r1   r2   r   r   input_layernormpost_attention_layernorm)
rE   rY   rZ   r   r3   r   r   r5   r   rF   s
            rG   r@   zKimiDecoderLayer.__init__   s    	!-my)) 	/#".))# ,,,  DNN .# , 4))) ,,,!'!8!'!8!,".#0,  DN$ K	".V999F11Q66$+) 333% % %D!
 ,DHH ,"(":!,)   DH  'v'9v?RSSS(/F$7)
 )
 )
%%%rH   r   rq   residualc                 (   ||}|                      |          }n|                      ||          \  }}t          j        |          }|                     |||           |}|                     ||          \  }}|                     |          }||fS )N)rq   r   r   )r   rk   
empty_liker   r   r   )rE   r   rq   r   r   attn_outputs         rG   rN   zKimiDecoderLayer.forwardd  s     $H 00??MM&*&:&:=(&S&S#M8&}55' 	 	
 	
 	

 $ #'"?"?x"X"Xx//h&&rH   )NNNNr/   )rO   rP   rQ   r#   rR   r   r   r   r   rS   r@   rk   rz   tuplerN   rU   rV   s   @rG   r   r     s       
 ,02615+/B
 B
 B
 B
 "D(	B

 )4/B
 ($.B
 "D(B
 B
 
B
 B
 B
 B
 B
 B
H'<' |' ,%	' 
u|U\)	*' ' ' ' ' ' ' 'rH   r   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d	z  d
ej        de	d	z  dej        d	z  dej        f
dZ
 xZS )KimiLinearModelr/   r5   vllm_configr5   c                  	
 t                                                       |j        j        |j        |j        |j        
|j        	| _        j        | _	        j
        | _
        t                      j        r%t          j
        j        | d          | _        nt!                      | _        i dt"          f	
fd}t%          j        || d          \  | _        | _        | _        t                      j        r!t1          j        j                  | _        nt!                      | _        t7                      }j        |z  dk    s
J d            d S )	Nz.embed_tokensr   r5   c           	      |    t          |                     dd          d                   }t          || fi S )N.r$   )rR   rsplitr   )r5   rZ   r   rY   extra_kwargsr   r   r3   s     rG   	get_layerz+KimiLinearModel.__init__.<locals>.get_layer  sY    FMM#q11!455I#	 	 	 	 	rH   z.layersr   r   z3num_attention_heads must be divisible by world_size)r?   r@   r   hf_text_configr   r3   r   rY   pad_token_idpadding_idx
vocab_sizer
   is_first_rankr   r0   embed_tokensr)   rS   r+   num_hidden_layersstart_layer	end_layerlayersis_last_rankr   r   normr   r   )rE   r   r5   r   
world_sizer   rY   r   r   r   r3   rF   s        @@@@@@rG   r@   zKimiLinearModel.__init__  s   )8"/"/"/%5!. +>>' 	1 6!" ///! ! !D !/ 0 0D	c 	 	 	 	 	 	 	 	 	 	 	 9D$%%%9
 9
 9
5$.$+ >>& 	) 28KLLLDII&((DI9;;
)J6!;;;A <;;;;rH   	input_idsr6   c                 ,    |                      |          S rJ   )r   rE   r   s     rG   embed_input_idszKimiLinearModel.embed_input_ids  s      +++rH   Nr   intermediate_tensorsinputs_embedsc                    t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                           D ]\  }}	 |	|||          \  }}t                      j        st          ||d          S | 	                    ||          \  }}|S )Nrq   r   )r   rq   r   )rq   r   )
r
   r   r   	enumerater   r   r   r   r"   r   )
rE   r   r   r   r   r   rq   r   rM   layers
             rG   rN   zKimiLinearModel.forward  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7H!$+d.>.O"PQQ 	 	HAu&+e#+!' ' '#M88 ~~* 	&"/XFF    99]H==qrH   rJ   )rO   rP   rQ   r	   rS   r@   rk   rz   r   r"   rN   rU   rV   s   @rG   r   r     s        AC 3
 3
 3
z 3
3 3
 3
 3
 3
 3
 3
j, ,%, , , , , .2   <$&  <  2D8	 
 |d*  
               rH   r   c                   
    e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
edddeej        ej        ej        ej        f         fd            Zedddeeedf         eedf         eedf         eedf         f         fd            Zedeeeeef         fd            Zdej        dej        d	z  fdZdeeeej        f                  fdZ xZS )KimiLinearForCausalLMr/   r   r   r5   c          	      "   t                                                       |j        | _        || _        | j        j        | _        |j        }|| _        t          |t          |d                    | _	        t                      j        r;t          | j        j        | j        j        |t          |d                    | _        nt!                      | _        t#          | j        dd          }t%          | j        j        |          | _        d S )Nmodel)r   r5   lm_head)r3   r5   logit_scaleg      ?)scale)r?   r@   r   r   	hf_configrY   r3   r   r,   r   r
   r   r   r   r0   r   r)   getattrr   logits_processor)rE   r   r5   r3   r   rF   s        rG   r@   zKimiLinearForCausalLM.__init__  s    '4&'1"/($#L,I,I
 
 

 >>& 	,)&')#FI66	  DLL *++DLdk=#>> /K"+!
 !
 !
rH   r   r6   c                 6    | j                             |          S rJ   )r   r   r   s     rG   r   z%KimiLinearForCausalLM.embed_input_ids  s    z)))444rH   Nr   r   r   c                 (     | j         ||||fi |}|S rJ   )r   )rE   r   r   r   r   r   rq   s          rG   rN   zKimiLinearForCausalLM.forward  s6     #
y"6
 
IO
 
 rH   r	   c                 T    t          j        |j        j        |j        j                  S rJ   )r   kda_state_dtyper   dtyper   mamba_cache_dtype)clsr   s     rG   !get_mamba_state_dtype_from_configz7KimiLinearForCausalLM.get_mamba_state_dtype_from_config  s*    
 )8$*K,D,V
 
 	
rH   .c                     |j         }|j        j        }|j        }|j        r|j        j        nd}t          j        ||j        d         |j        d         |j        d         |          S )Nr   r}   head_dimshort_conv_kernel_size)conv_kernel_sizenum_spec)	r   r   r   tensor_parallel_sizespeculative_confignum_speculative_tokensr   kda_state_shapelinear_attn_config)r   r   r   r   rf   r   s         rG   !get_mamba_state_shape_from_configz7KimiLinearForCausalLM.get_mamba_state_shape_from_config  s     &5,6	!6 -K*AA 	
 )8(5(4&9:RS
 
 
 	
rH   c                 (    t          j                    S rJ   )r   kda_state_copy_func)r   s    rG   get_mamba_state_copy_funcz/KimiLinearForCausalLM.get_mamba_state_copy_func%  s     ,?AAArH   rq   c                 8    |                      | j        |          S rJ   )r   r   )rE   rq   s     rG   compute_logitsz$KimiLinearForCausalLM.compute_logits-  s     $$T\=AAArH   weightsc           	      H   ddg}| j         j        r$t          j        | ddd| j         j                  }ng }t          |                                           }t                      }|D ]}|d d         \  }}t          |          dk    r|d         ni }	d|v r2t          | j         |          }
|
Jd	|v sd
|v rS|D ]r\  }}}||vrd|v r||vr|
                    ||          }|                    d          r||vrDt          ||           rU||         }|j        } ||||            nt          |          D ]V\  }\  }}}}||vr|
                    ||          }t          ||           r6||         }|j        } ||||||            nu|                    d          r||vr| j         j        sUt!          ||          }|it          ||           r{||         }t#          |dt$                    } |||fi |	 |                    |           d S )N)r9   z
.gate_projr   )r9   z.up_projr$   w1w2w3)ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namer\   r8   zrotary_emb.inv_freqzrotary_emb.cos_cachedzrotary_emb.sin_cachedzmlp.experts.z.bias)	expert_idshard_idweight_loader)rY   r   r   make_expert_params_mappingr\   dictnamed_parameterssetlen#get_spec_layer_idx_from_weight_namereplaceendswithr*   r   r   is_linear_attnr!   r   r    add)rE   r   stacked_params_mappingexpert_params_mappingparams_dictloaded_paramsargsnameloaded_weightr   
spec_layer
param_nameweight_namer   paramr   idxr   s                     rG   load_weightsz"KimiLinearForCausalLM.load_weights3  s    /,"

 ; 	' %-$G$($("& K3% % %!! %'!4002233"%%% I	$ I	$D"&rr(D- #D		AT!WW2F$,,<T[$OOJ%&$..2IT2Q2Q 5K ;B ;B1
Kd** #d**K0G0G||K<<==)) d+.E.E*466 #D) % 3e]H===KT)L L %B %BGCG*k9h #$.. <<Z@@D.tT:: ! '-E$)$7M!M%"+!)    E g..! 33 $ : 4 !4T;GGD| .tT:: ! '-E$+0E% %M "M%AA&AAAd####SI	$ I	$rH   )NN)rO   rP   rQ   r	   rS   r@   rk   rz   r   r"   rN   classmethodr   r   r   rR   r   r   r   r   r   r  rU   rV   s   @rG   r   r     s=        BD 
 
 
z 
3 
 
 
 
 
 
05 5%, 5 5 5 5 <@-1 < < 2D8	
 |d* 
+	+    
!
 
u{EKekA	B
 
 
 [
 
&
	uS#Xc3hsCx%S/Q	R
 
 
 [
& B	.0BDVV
B B B [BB|B 
	B B B B]$HU33D-E$F ]$ ]$ ]$ ]$ ]$ ]$ ]$ ]$rH   r   rY   r  r6   c                     t          | d          rL| j        dk    rA| j        }t          | j                  D ]%}|                    d||z    d          r||z   c S &d S )Nnum_nextn_predict_layersr   zmodel.layers.r   )hasattrr  r   range
startswith)rY   r  rZ   is       rG   r  r    s     v122 %'!++,	v677 	% 	%A%%&Fi!m&F&F&FGG % 1}$$$%4rH   )Jcollections.abcr   rk   r   vllm.compilation.decoratorsr   vllm.configr   r   r   r	   vllm.distributedr
   r   r   vllm.loggerr   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.fused_moer   vllm.model_executor.layers.kdar   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   r   +vllm.model_executor.layers.logits_processorr   ,vllm.model_executor.layers.mamba.mamba_utilsr   r   r   r   vllm.model_executor.layers.mlar   r   3vllm.model_executor.layers.quantization.base_configr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr    r!   vllm.sequencer"   +vllm.transformers_utils.configs.kimi_linearr#   
interfacesr%   r&   r'   r(   utilsr)   r*   r+   r,   rO   loggerModuler.   rX   r|   r   r   r   rS   rR   r   rH   rG   <module>r6     s(   % $ $ $ $ $        = = = = = = L L L L L L L L L L L L         
 $ # # # # # < < < < < < 9 9 9 9 9 9 = = = = = = 8 8 8 8 8 8            H G G G G G            W V V V V V V V R R R R R R               . - - - - - H H H H H H M M M M M M M M M M M M            
X		% % % % %bi % % %PNA NA NA NA NAbi NA NA NAbi< i< i< i< i<ry i< i< i<X^' ^' ^' ^' ^'ry ^' ^' ^'B Y Y Y Y Ybi Y Y Yxs$ s$ s$ s$ s$I}j*:Hs$ s$ s$l

+.
4Z
 
 
 
 
 
rH   