
    .`i5+                         d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ ddlmZ  G d dej                  Z G d dej                  Z G d dej                  Z dS )zInference-only MiMo-MTP model.    )IterableN)PretrainedConfig)CacheConfigModelConfig
VllmConfig)RMSNorm)LogitsProcessor)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)Qwen2DecoderLayer)IntermediateTensors   )maybe_prefixc                        e Zd Z	 	 ddededededz  dedz  ddf fdZ	 dd
e	j
        de	j
        de	j
        dede	j
        f
dZ xZS )MiMoMultiTokenPredictorLayerNconfigprefixmodel_configcache_configquant_configreturnc                    t                                                       t          |j        |j                  | _        t          |j        |j                  | _        t          j        |j        dz  |j        d          | _	        t          ||||          | _        t          |j        |j                  | _        d S )N)eps   F)bias)r   r   r   r   )super__init__r   hidden_sizerms_norm_epstoken_layernormhidden_layernormnnLinear
input_projr   	mtp_blockfinal_layernorm)selfr   r   r   r   r   	__class__s         w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/mimo_mtp.pyr   z%MiMoMultiTokenPredictorLayer.__init__-   s     	&v'9v?RSSS '(:@S T T T)"F$6U
 
 
 +%%	
 
 
  'v'9v?RSSS    r   inputs_embeds	positionsprevious_hidden_statesspec_step_indexc                 0   |J d||dk    <   |                      |          }|                     |          }|                     t          j        ||gd                    }|                     ||d           \  }}||z   }|                     |          S )Nr   )dim)r.   hidden_statesresidual)r"   r#   r&   torchcatr'   r(   )r)   r-   r.   r/   r0   r4   r5   s          r+   forwardz$MiMoMultiTokenPredictorLayer.forwardD   s     (((()i1n%,,];;!%!6!67M!N!NI-}=2FFF
 
 #'..}t #1 #
 #
x !=0##M222r,   )NNr   )__name__
__module____qualname__r   strr   r   r
   r   r6   Tensorintr8   __classcell__r*   s   @r+   r   r   ,   s         ,026T T T T "	T
 "D(T )4/T 
T T T T T T8  !3 3|3 <3 !&	3
 3 
3 3 3 3 3 3 3 3r,   r   c                        e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        dej        dej        dej        d	z  de	dej        fdZ
	 ddej        dede	dej        fdZ xZS )MiMoMultiTokenPredictor r   vllm_configr   c                   t                                                       j        j        j        | _        j        | _        t          j	        j
                  | _        t          j                            fdt          | j        | j        | j        z             D                       | _        t#          j	                  | _        d S )Nc           
      |    i | ]8}t          |          t           d | j        j        j                  9S )z.layers.)r   r   r   )r=   r   r   r   r   ).0idxr   r   rF   s     r+   
<dictcomp>z4MiMoMultiTokenPredictor.__init__.<locals>.<dictcomp>j   si         C6,,s,,!,!9!,!9!,!9    r,   )r   r   r   	hf_confignum_hidden_layersmtp_start_layer_idxnum_nextn_predict_layersnum_mtp_layersr   
vocab_sizer    embed_tokensr6   r$   
ModuleDictrange
mtp_layersr	   logits_processor)r)   rF   r   r   r*   s    ``@r+   r   z MiMoMultiTokenPredictor.__init__]   s    )3#)#; $=2
 

  (--      !,,t/BB   
 
  !00A B Br,   	input_idsr   c                 ,    |                      |          S N)rR   r)   rW   s     r+   embed_input_idsz'MiMoMultiTokenPredictor.embed_input_ids{   s      +++r,   Nr   r.   r/   r-   spec_step_idxc                     ||                      |          } | j        t          | j        |z                      ||||          S rY   )rR   rU   r=   rN   )r)   rW   r.   r/   r-   r\   s         r+   r8   zMiMoMultiTokenPredictor.forward~   sT       --i88MMts4#;m#KLLM"	
 
 	
r,   r4   lm_headc                 v    | j         t          | j        |z                       |                     ||          }|S rY   )rU   r=   rN   rV   )r)   r4   r^   r\   logitss        r+   compute_logitsz&MiMoMultiTokenPredictor.compute_logits   s;     	D4}DEEFF&&w>>r,   )Nr   r9   )r:   r;   r<   r   r=   r   r6   r>   r[   r?   r8   r   ra   r@   rA   s   @r+   rC   rC   \   s:       AC C C Cz C3 C C C C C C<, ,%, , , , , .2
 
<
 <
 !&	

 |d*
 
 

 
 
 
* 	 |   	
 
       r,   rC   c                   T    e Zd Zdddedef fdZdej        dej        fdZ	 	 	 ddej        dej        dej        de	d	z  dej        d	z  de
dej        fdZ	 ddej        de
dej        d	z  fdZdeeeej        f                  dee         fdZdedefdZde
dedefdZ xZS )MiMoMTPrD   rE   rF   r   c                $   t                                                       |j        j        | _        t          |t          |d                    | _        t          | j        j	        | j        j
        t          |d                    | _        d S )Nmodel)rF   r   r^   rE   )r   r   r   rL   r   rC   r   re   r   rQ   r    r^   )r)   rF   r   r*   s      r+   r   zMiMoMTP.__init__   s    !.8,#L,I,I
 
 

 &K"K#	22
 
 
r,   rW   r   c                 6    | j                             |          S rY   )re   r[   rZ   s     r+   r[   zMiMoMTP.embed_input_ids   s    z)))444r,   Nr   r.   r4   intermediate_tensorsr-   r\   c                 X    |dk    s
J d            |                      |||||          }|S )Nr   z+mimo_mtp only support predict one token now)re   )r)   rW   r.   r4   rg   r-   r\   s          r+   r8   zMiMoMTP.forward   sD     !!!#P!!!

y-
 
 r,   c                 D    | j                             || j        |          S rY   )re   ra   r^   )r)   r4   r\   s      r+   ra   zMiMoMTP.compute_logits   s     
 z((mTTTr,   weightsc                 P   g d}t          |                                           }t                      }|D ]\  }}d|v r
|                     |          }|D ]g\  }}}	||vrd|vr nd|v r||vr|                    ||          }|                    d          r||vrJ||         }
|
j        } ||
||	            nQ|                    d          r||vrd|vr	d|vrd|vr||         }
t          |
dt                    } ||
|           |	                    |           |S )	N))qkv_projq_projq)rl   k_projk)rl   v_projv)gate_up_proj	gate_projr   )rs   up_projr   zrotary_emb.inv_freqrU   zmlp.experts.z.biasrR   r^   weight_loader)
dictnamed_parametersset map_model_name_to_mtp_param_namereplaceendswithrv   getattrr   add)r)   rj   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamrv   s               r+   load_weightszMiMoMTP.load_weights   s   "
 "
 "
 4002233"%%%#* '	$ '	$D-$,,88>>D5K !4 !41
Kd**t++E #d**K0G0G||K<<==)) d+.E.E#D) % 3e]H=== ==)) d+.E.Et++"$..9D3H3H#D) '@U V Ve]333d####r,   r   c                    dd l }d}|                    ||          }|rqt          |                    d                    }|| j        j        z   }|                    |                                |                    d           | d          }g d}|D ]
}||v r|c S d}|                    ||          }|r=|                    |                                |                                dz             }|S )	Nr   z(model\.mtp_layers\.)(\d+)(\.)r   r   .)r"   r#   r&   r(   z(model\.mtp_layers\.\d+\.)z
mtp_block.)regexmatchr?   groupr   rM   r{   )	r)   r   repatternr   original_numnew_numname_without_prefixsub_names	            r+   rz   z(MiMoMTP.map_model_name_to_mtp_param_name   s    4$'' 	Nu{{1~~..L"T[%BBG<<%++a../L'/L/L/LMMD
 
 
 , 	 	H4   0$'' 	M<<u{{}}|/KLLDr,   
spec_layerc                 l    g d}d}|D ]
}||v rd} n|s|                     d| dd| d          }|S )z
        Rewrite the weight name to match the format of the original model.
        Add .mtp_block for modules in transformer layer block for spec layer
        )rR   enormhnormeh_projshared_headFTzmodel.layers.r   z.mtp_block.)r{   )r)   r   r   spec_layer_weight_namesspec_layer_weightr   s         r+   _rewrite_spec_layer_namez MiMoMTP._rewrite_spec_layer_name  s    
#
 #
 #
 "2 	 	Kd""$(! # ! 	<<-
---/Vz/V/V/V D r,   )NNr   r9   )r:   r;   r<   r   r=   r   r6   r>   r[   r   r?   r8   ra   r   tuplery   r   rz   r   r@   rA   s   @r+   rc   rc      s       AC 

 

 

z 

3 

 

 

 

 

 

5 5%, 5 5 5 5 <@-1 < < |	
 2D8 |d*  
   $ U U|U U 
		U U U U3HU33D-E$F 33s8 3 3 3 3jS S    63 c c        r,   rc   )!__doc__collections.abcr   r6   torch.nnr$   transformersr   vllm.configr   r   r   $vllm.model_executor.layers.layernormr   +vllm.model_executor.layers.logits_processorr	   'vllm.model_executor.layers.quantizationr
   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr    vllm.model_executor.models.qwen2r   vllm.sequencer   utilsr   Moduler   rC   rc    r,   r+   <module>r      s  * % $ $ $ $ $ $ $        ) ) ) ) ) ) < < < < < < < < < < 8 8 8 8 8 8 G G G G G G F F F F F F        P O O O O O > > > > > > - - - - - -      -3 -3 -3 -3 -329 -3 -3 -3`; ; ; ; ;bi ; ; ;|L L L L Lbi L L L L Lr,   