
    .`i|F                        d dl Z d dlmZmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZ d dlmZmZ d dlm Z  d dl!m"Z" ddl#m$Z$m%Z%m&Z&m'Z' ddl(m)Z)  ee*          Z+ G d dej,                  Z- G d dej,                  Z. G d dej,                  Z/e G d dej,        e%                      Z0dS )    N)CallableIterable)PretrainedConfig)rocm_aiter_ops)support_torch_compile)
VllmConfig)init_logger)SharedFusedMoE)RMSNorm)LogitsProcessor)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)current_platform)IntermediateTensors   )DeepseekV2DecoderLayerDeepseekV2MixtureOfExpertsDeepseekV2MoE#get_spec_layer_idx_from_weight_name)maybe_prefixc            	       ^     e Zd Z	 d	dedededz  ddf fdZdej        dej        fdZ	 xZ
S )

SharedHeadNconfigprefixquant_configreturnc           	          t                                                       t          |j        |j                  | _        t          |j        |j        |t          |d                    | _	        d S )Nepshead)r   r   )
super__init__r   hidden_sizerms_norm_epsnormr   
vocab_sizer   r#   )selfr   r   r   	__class__s       {/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/deepseek_mtp.pyr%   zSharedHead.__init__)   sk     	F.F4GHHH	"%//	
 
 
			    hidden_statesc                 ,    |                      |          S N)r(   )r*   r.   s     r,   forwardzSharedHead.forward8   s    yy'''r-   r0   )__name__
__module____qualname__r   strr   r%   torchTensorr1   __classcell__r+   s   @r,   r   r   (   s        
 37	
 
 
 
 )4/	

 

 
 
 
 
 
(U\ (el ( ( ( ( ( ( ( (r-   r   c                        e Zd Zdededdf fdZ	 	 ddej        dej        d	ej        d
ej        dz  dedej        fdZ	 xZ
S ) DeepSeekMultiTokenPredictorLayervllm_configr   r   Nc                    t                                                       |j        j        j        }|| _        |j        }t          |j        |j	                  | _
        t          |j        |j	                  | _        t          j        |j        dz  |j        d          | _        t          j        | _        t%          |d          | _        | j        r9|j        }t+          j        |j        j        |t*          j        | j                  }nd }t5          |||          | _        t9          ||| j        |          | _        d S )	Nr!      F)bias
index_topk)dtypedevice)r   r   r   )r   topk_indices_buffer)r$   r%   speculative_configdraft_model_config	hf_configr   r   r   r&   r'   enormhnormnnLineareh_projr   device_typerB   hasattris_v32r@   r6   emptyscheduler_configmax_num_batched_tokensint32r   shared_headr   	mtp_block)r*   r<   r   r   r   topk_tokensrC   r+   s          r,   r%   z)DeepSeekMultiTokenPredictorLayer.__init__=   s;   /BL"/V/V5HIII
V/V5HIII
y!3a!79KRWXXX&2fl33; 		' +K"'+,Ck{	# # # #'%&|
 
 
 0; 3	
 
 
r-   r   	input_ids	positionsprevious_hidden_statesinputs_embedsspec_step_indexc                 R   |J t          j        |                    d          dk    d|          }|                     |          }|                     |          }|                     t          j        ||gd                    }|                     ||d           \  }}||z   }|S )Nr   )dim)rW   r.   residual)r6   where	unsqueezerG   rH   rK   catrT   )r*   rV   rW   rX   rY   rZ   r.   r^   s           r,   r1   z(DeepSeekMultiTokenPredictorLayer.forward`   s     (((I$7$7$;$;q$@!]SS

=11!%,B!C!CI}&<=2FFF
 
 #'..}t #1 #
 #
x !=0r-   Nr   )r2   r3   r4   r   r5   r%   r6   r7   intr1   r8   r9   s   @r,   r;   r;   <   s        !
J !
 !
 !
 !
 !
 !
 !
 !
P .2  < < !&	
 |d*  
       r-   r;   c                        e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        dej        dej        dej        d	z  de	dej        fdZ
	 ddej        de	dej        fdZ xZS )DeepSeekMultiTokenPredictor r   r<   r   c                   t                                                       j        j        }|j        | _        |j        | _        t          j	        
                    fdt          | j        | j        | j        z             D                       | _        t          |j        |j        t!          d                    | _        t%          |j                  | _        d S )Nc           	      V    i | ]%}t          |          t           d |           &S )z.layers.)r5   r;   ).0idxr   r<   s     r,   
<dictcomp>z8DeepSeekMultiTokenPredictor.__init__.<locals>.<dictcomp>   sQ         C:F!9!9C!9!9   r-   embed_tokensrg   )r$   r%   model_configrF   num_hidden_layersmtp_start_layer_idxnum_nextn_predict_layersnum_mtp_layersr6   rI   
ModuleDictrangelayersr   r)   r&   r   rm   r   logits_processor)r*   r<   r   r   r+   s    `` r,   r%   z$DeepSeekMultiTokenPredictor.__init__z   s    )3#)#; $= h))     !,,t/BB 	  

 

 377
 
 

 !00A B Br-   rV   r   c                 ,    |                      |          S r0   )rm   r*   rV   s     r,   embed_input_idsz+DeepSeekMultiTokenPredictor.embed_input_ids   s      +++r-   Nr   rW   rX   rY   spec_step_idxc                     ||                      |          }|| j        z  } | j        t          | j        |z                      |||||          S r0   )rm   rr   ru   r5   rp   )r*   rV   rW   rX   rY   rz   current_step_idxs          r,   r1   z#DeepSeekMultiTokenPredictor.forward   sf       --i88M(4+>>Lt{3t7:JJKKL"
 
 	
r-   r.   c                     || j         z  }| j        t          | j        |z                      }|                     |j        j        |                    |                    }|S r0   )rr   ru   r5   rp   rv   rS   r#   )r*   r.   rz   r|   	mtp_layerlogitss         r,   compute_logitsz*DeepSeekMultiTokenPredictor.compute_logits   sd    
 )4+>>KD$<?O$O P PQ	&&!&	(=(=m(L(L
 
 r-   rb   r   )r2   r3   r4   r   r5   r%   r6   r7   ry   rc   r1   r   r8   r9   s   @r,   re   re   y   s0       AC C C Cz C3 C C C C C C2, ,%, , , , , .2
 
<
 <
 !&	

 |d*
 
 

 
 
 
, 
 
|
 
 
	
 
 
 
 
 
 
 
r-   re   c                   J    e Zd Zdddedef fdZd Zdej        dej        fd	Z		 	 	 ddej        dej        dej        de
d
z  dej        d
z  dedej        fdZ	 ddej        dedej        d
z  fdZdeeeej        f                  dee         fdZdededefdZ xZS )DeepSeekMTPrf   rg   r<   r   c                    t                                                       |j        j        | _        t          |t          |d                    | _        |                                  d S )Nmodel)r<   r   )	r$   r%   rn   rF   r   re   r   r   set_moe_parameters)r*   r<   r   r+   s      r,   r%   zDeepSeekMTP.__init__   se    !.80#L,I,I
 
 

 	!!!!!r-   c                    g | _         | j        j        | _        | j        j        | _        g | _        g | _        d }| j        j	        
                                D ]}t          |t                    sJ |j        }t          |t                    sJ t          |j        t                     rJ|j        }| j                            |j                   | j                            |j        j                   |                     |           d S r0   )expert_weightsr   rq   num_moe_layersn_groupnum_expert_groups
moe_layersmoe_mlp_layersr   ru   values
isinstancer;   rT   r   mlpr   appendexpertsextract_moe_parameters)r*   example_moelayers      r,   r   zDeepSeekMTP.set_moe_parameters   s     "kB!%!4 Z&--// 	: 	:Ee%EFFFFFOEe%;<<<<<%)]33 :#i#**59555&&uy'8999##K00000r-   rV   r   c                 6    | j                             |          S r0   )r   ry   rx   s     r,   ry   zDeepSeekMTP.embed_input_ids   s    z)))444r-   Nr   rW   r.   intermediate_tensorsrY   rz   c                 8    |                      |||||          }|S r0   )r   )r*   rV   rW   r.   r   rY   rz   s          r,   r1   zDeepSeekMTP.forward   s*     

y-
 
 r-   c                 8    | j                             ||          S r0   )r   r   )r*   r.   rz   s      r,   r   zDeepSeekMTP.compute_logits   s    
 z((FFFr-   weightsc                     t          j                    }g d}t          j        | ddd| j        j        |r| j        j        ndz             }t          |                                           }t                      }|D ]\  }}d|v rt          | j        |          }	|	#|od|v }
|                     |	|          }|D ]r\  }}}||vrd	|v r||vr|
r|                    ||          }|d
k    r||vr8|}|                    d          r||vrT||         }|j        } ||||            nd}|
rKt          | j        dd          pd}d|v rdnd}|j        |         }||z  dk    sJ d| d|             ||z  }t#          |          D ]d}|}|}|
r[|dk    r|||z  |dz   |z  d d f         }n|d d ||z  |dz   |z  f         }|                    dd	| j        j        |z              }d}|D ]}|\  }}}}||vrd}|                    ||          }||         }t%          j        t(          dt*          f         |j                  } ||||||d          }|r|
s|}n|                    |            ns|r|                    d          r||vrt/          ||          }|$|	| j        j        k    rd|vr:||         }t          |dt4                    } |||           f|
s|                    |           |S )N))gate_up_proj	gate_projr   )r   up_projr   )fused_qkv_a_projq_a_projr   )r   kv_a_proj_with_mqar   r   	down_projr   r   )ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namenum_expertszrotary_emb.inv_freqzmlp.shared_expertszmlp.experts.r   z.biasr   n_shared_expertszdown_proj.weightzShared expert weight dim z not divisible by num_chunks FT.)shard_id	expert_idreturn_successz.layersweight_loader)r   $is_fusion_moe_shared_experts_enabledr
   make_expert_params_mappingr   n_routed_expertsr   dictnamed_parameterssetr   _rewrite_spec_layer_namereplaceendswithr   getattrshapert   typingcastr   booladdr   r   rp   r   )r*   r   $rocm_aiter_moe_shared_expert_enabledstacked_params_mappingexpert_params_mappingparams_dictloaded_paramsnameloaded_weight
spec_layer"is_fusion_moe_shared_experts_layer
param_nameweight_namer   name_mappedparamr   
num_chunks	split_dimtotal
chunk_sizej
chunk_nameweight_to_loadis_expert_weightmappingr   successs                               r,   load_weightszDeepSeekMTP.load_weights   s   ?AA 	-"
 "
 "
 !/ I + +'4 8,,	!
 !
 !
 4002233"%%%#* Y	( Y	(D-$,,<T[$OOJ!4W:NRV:V / 00TBBD5K M< M<1
Kd** #d**K0G0G5 "ll;
CC
 "444!44&D ==)) d+.E.E#D) % 3e]H=== 
5 5!(6H!!L!L!QPQJ &84%?%?QI)/	:E :-222DE D D7AD D 322 "'*!4Jz** U< U<A!%J%2N9 $>>-: !J!a%:1E Eqqq H.NN .; !1z>QUj4H#H H.N
 &*\\0M4;+G!+KMM& &
 (-$#8 =< =<GND
KH&j88$ ,0( '1&8&8j&Q&Q +K 8 )/$S$Y/1D) ) #0-!*'%-&/+/# # # # "#E ?'2 - 1 1+ > > >!E" , % %  ==11 %d+6M6M$8{KK<$
 '$**HHH ) 5 5$ +D 1(/!?4I) ) &e];;;5 (!!$'''r-   r   r   c                     g d}dg}d}d}|D ]}||v r
d}||v rd} n|s|                     d| dd| d          }n|r|                     d| dd          }|S )	z
        Rewrite the weight name to match the format of the original model.
        Add .mtp_block for modules in transformer layer block for spec layer
        and rename shared layer weights to be top level.
        )rm   rG   rH   rK   rS   rm   FTzmodel.layers..z.mtp_block.zmodel.)r   )r*   r   r   spec_layer_weight_namesshared_weight_namesspec_layer_weightshared_weightr   s           r,   r   z$DeepSeekMTP._rewrite_spec_layer_name  s    #
 #
 #
  ..!2 	 	Kd""$(!"555$(M	 #
 ! 	I<<-
---/Vz/V/V/V DD  	I<< =
 = = =xHHDr-   )NNr   r   )r2   r3   r4   r   r5   r%   r   r6   r7   ry   r   rc   r1   r   r   tupler   r   r   r8   r9   s   @r,   r   r      s       AC " " "z "3 " " " " " "1 1 1$5 5%, 5 5 5 5 <@-1 < < |	
 2D8 |d*  
   " G G|G G 
		G G G GtHU33D-E$F t3s8 t t t tl3 c c        r-   r   )1r   collections.abcr   r   r6   torch.nnrI   transformersr   vllm._aiter_opsr   vllm.compilation.decoratorsr   vllm.configr   vllm.loggerr	   $vllm.model_executor.layers.fused_moer
   $vllm.model_executor.layers.layernormr   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.platformsr   vllm.sequencer   deepseek_v2r   r   r   r   utilsr   r2   loggerModuler   r;   re   r    r-   r,   <module>r      s    . . . . . . . .        ) ) ) ) ) ) * * * * * * = = = = = = " " " " " " # # # # # # ? ? ? ? ? ? 8 8 8 8 8 8 G G G G G G F F F F F F               , + + + + + - - - - - -                  	X		( ( ( ( ( ( ( ((: : : : :ry : : :z: : : : :") : : :z H H H H H")7 H H H H Hr-   