
    .`iXQ                     &   d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZmZ dd
lmZmZmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z&m'Z' ddl(m)Z)m*Z* ddl+m,Z, ddl-m.Z.m/Z/ ddl0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6  G d dej7                  Z8 G d dej7                  Z9 G d dej7                  Z:e G d dej7                              Z; G d  d!ej7        e.e/          Z<d"ed#e=d$e>dz  fd%Z?dS )&zInference-only MiniMaxM2 model.    )Iterable)AnyN)nn)PretrainedConfig)	Attention)support_torch_compile)CacheConfigModelConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size tensor_model_parallel_all_reduce)FusedMoE)RMSNorm)QKVParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)MiniMaxText01RMSNormTP)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors   )SupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayeris_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                        e Zd Z	 	 ddededz  def fdZedej	        de
j        d	dfd
            Zde
j        d	e
j        fdZ xZS )MiniMaxM2MoEN configquant_configprefixc                    t                                                       t                      | _        | j        |j        k    r t          d| j         d|j         d          t          |dd          | _        | j        rSt          j	        t          j        |j        t          j                            | _        t          j        | j        _        nd | _        t#          |j        |j        |j        | j        |j        |j        dd|| d	
  
        | _        t/          |j        |j        dt          j        d | d
          | _        d S )NzTensor parallel size z' is greater than the number of experts .use_routing_biasF)dtypeTz.experts)
num_expertstop_kscoring_funce_score_correction_biashidden_sizeintermediate_sizereduce_resultsrenormalizer*   r+   z.gate)biasparams_dtyper*   r+   )super__init__r   tp_sizenum_local_experts
ValueErrorgetattrr.   r   	Parametertorchemptyfloat32r3   r'   ebias_weight_loaderweight_loaderr   num_experts_per_tokr2   r4   r5   expertsr   gate)selfr)   r*   r+   	__class__s       y/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/minimax_m2.pyr;   zMiniMaxM2MoE.__init__J   ss    	;==<&222E E E)/)AE E E   !(0BE J J  	0+-<F4EMJJJ, ,D( 0 (66 ,0D(0,,$($@*$6 %&&&
 
 
 %$###
 
 
			    paramloaded_weightreturnc                     |                                  |                                 k    sJ | j                            |                    t          j                             d S N)sizedatacopy_torA   rC   )rM   rN   s     rK   rD   z MiniMaxM2MoE.ebias_weight_loadery   sS    zz||}11333333
))%-8899999rL   hidden_statesc                 >   |j         \  }}|                    d|          }|                     |                    t          j                            \  }}|                     ||          }|}| j        dk    rt          |          }|                    ||          S )N)rV   router_logitsr   )	shapeviewrH   rU   rA   rC   rG   r<   r   )rI   rV   
num_tokens
hidden_dimrY   _final_hidden_statess          rK   forwardzMiniMaxM2MoE.forward~   s    !.!4
J%**2z::  99]%5%5em%D%DEEq"ll'} + 
 
 2<!"BCV"W"W"''
J???rL   )Nr(   )__name__
__module____qualname__r   r   strr;   staticmethodr   r@   rA   TensorrD   r`   __classcell__rJ   s   @rK   r'   r'   I   s         37	-
 -
 -
 )4/-
 	-
 -
 -
 -
 -
 -
^ :2< : :QU : : : \:@U\ @el @ @ @ @ @ @ @ @rL   r'   c                        e Zd Z	 	 	 	 	 	 	 	 	 ddededed	ed
eeef         dz  dedz  dededz  dedede	dz  de
dz  deddf fdZdej        dej        dej        fdZ xZS )MiniMaxM2AttentionN    ư>Fr(   r4   	num_headsnum_kv_heads
rotary_dimrope_parametersattn_window_sizemax_position_embeddingshead_dimrms_norm_epsqkv_biascache_configr*   r+   rO   c                    t                                                       || _        t                      }|| _        | j        |z  dk    sJ | j        |z  | _        || _        | j        |k    r| j        |z  dk    sJ n|| j        z  dk    sJ t          d| j        |z            | _        |p	|| j        z  | _	        | j        | j	        z  | _
        | j        | j	        z  | _        | j	        dz  | _        || _        t          || j	        | j        | j        |
|| d          | _        t!          | j        | j	        z  |d|| d          | _        |d|vr|| j	        z  |d<   t%          | j	        ||	          | _        t)          | j        | j	        | j        | j        |||| d
          | _        t-          | j	        | j        z  |	          | _        t-          | j	        | j        z  |	          | _        d S )Nr   r   g      z	.qkv_proj)r8   r*   r+   Fz.o_projpartial_rotary_factor)max_positionrp   z.attn)rn   per_layer_sliding_windowrv   r*   r+   eps)r:   r;   r4   r   total_num_headsrm   total_num_kv_headsmaxrn   rs   q_sizekv_sizescalingrr   r   qkv_projr   o_projr   
rotary_embr   attnr   q_normk_norm)rI   r4   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   r*   r+   r<   rJ   s                  rK   r;   zMiniMaxM2Attention.__init__   sb     	&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF I[D4H%Hnt}4(4=8}d*'>$)M #%'''
 
 
 ( 4=0%%%%
 
 
 ''>>7ADM7QO34"M0+
 
 

 NML*%5%%###	
 	
 	
	 -MD00l
 
 
 -MD33
 
 
rL   	positionsrV   c                    |                      |          \  }}|                    | j        | j        | j        gd          \  }}}t	          j        | j        | j        |                                |                                          \  }}| 	                    |||          \  }}| 
                    |||          }|                     |          \  }	}|	S )NrX   )dim)r   splitr   r   r   
forward_qkr   r   
contiguousr   r   r   )
rI   r   rV   qkvr^   qkvattn_outputoutputs
             rK   r`   zMiniMaxM2Attention.forward   s    
 }--Q))T[$,E2)NN1a%0Kallnnallnn
 
1 y!Q//1ii1a((KK,,	rL   )	NNrk   Nrl   FNNr(   )ra   rb   rc   intdictrd   r   floatboolr	   r   r;   rA   rf   r`   rg   rh   s   @rK   rj   rj      sT        26'+'+##+/26R
 R
R
 R
 	R

 R
 c3h$.R
 *R
 "%R
 *R
 R
 R
 "D(R
 )4/R
 R
 
R
 R
 R
 R
 R
 R
h< | 
	       rL   rj   c                        e Zd Z	 	 ddededededz  dedz  ddf fdZd	e	j
        d
e	j
        de	j
        dz  de	j
        fdZ xZS )MiniMaxM2DecoderLayerNr)   r+   model_configrv   r*   rO   c                    t                                                       |j        | _        t          |dd          }t	          |d          r4t          |j        t                    rt          |j	        |j                  }t          |
                    d          d                   }|| _        t          | j        |j        |j        |j        |j        ||j        t          |dd          t          |d	d           ||| d
          | _        t'          ||| d          | _        t+          |j        |j                  | _        t+          |j        |j                  | _        d S )Nrr   rk   max_model_lenr-   )seprX   attention_biasFrs   z
.self_attn)r4   rm   rn   ro   rp   rr   rt   ru   rs   rv   r*   r+   z.mlp)r)   r*   r+   r{   )r:   r;   r4   r?   hasattr
isinstancer   r   r   rr   r   	layer_idxrj   num_attention_headsnum_key_value_headsro   rp   rt   	self_attnr'   block_sparse_moer   input_layernormpost_attention_layernorm)	rI   r)   r+   r   rv   r*   rr   r   rJ   s	           rK   r;   zMiniMaxM2DecoderLayer.__init__   s    	!-")&2KT"R"R6?++ 	
6;OQT0U0U 	&).0D' '#
 --b122	"+(03("2$;,V%5u==VZ66%%(((
 
 
 !-%???!
 !
 !

  'v'9v?RSSS(/F$7)
 )
 )
%%%rL   r   rV   residualc                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS )N)r   rV   )r   r   r   r   )rI   r   rV   r   s       rK   r`   zMiniMaxM2DecoderLayer.forward!  s     $H 00??MM&*&:&:=(&S&S#M8' ' 
 
 #'"?"?x"X"Xx--m<<h&&rL   NN)ra   rb   rc   r   rd   r
   r	   r   r;   rA   rf   r`   rg   rh   s   @rK   r   r      s         ,026+
 +
 +
 +
 "	+

 "D(+
 )4/+
 
+
 +
 +
 +
 +
 +
Z'<' |' ,%	'
 
' ' ' ' ' ' ' 'rL   r   c                       e Zd ZdZdddedef fdZdej        dej        fd	Z		 ddej        dej        de
d
z  dej        d
z  dej        e
z  f
dZdeeeeeef                  fdZdeeeej        f                  dee         fdZ xZS )MiniMaxM2ModelFr(   r+   vllm_configr+   c                |   t                                                       |j        j        |j        |j        |j        | _        j        | _        t                      j	        r&t          j        j        d | d          | _        nt                      | _        t          j        fd| d          \  | _        | _        | _        t                      j        r!t)          j        j                  | _        nt                      | _        t/          ddgj                  | _        d S )	Nz.embed_tokens)r*   r+   c                 ,    t          |           S )N)r   rv   r*   )r   )r+   rv   r)   r   r*   s    rK   <lambda>z)MiniMaxM2Model.__init__.<locals>.<lambda>U  s&    0)))   rL   z.layersr   r{   rV   r   )r:   r;   r   	hf_configrv   r*   r)   
vocab_sizer   is_first_rankr   r4   embed_tokensr!   r$   num_hidden_layersstart_layer	end_layerlayersis_last_rankr   rt   normr#   make_empty_intermediate_tensors)rI   r   r+   rv   r)   r   r*   rJ   s      @@@@rK   r;   zMiniMaxM2Model.__init__>  s]   )3"/"/"/ +>>' 	1 6!"! ///	! ! !D !/ 0 0D8C$       %%%
9
 
9
 
9
5$.$+ >>& 	) 28KLLLDII&((DI/Vj)6+=0
 0
,,,rL   	input_idsrO   c                 ,    |                      |          S rQ   )r   rI   r   s     rK   embed_input_idszMiniMaxM2Model.embed_input_idsg  s      +++rL   Nr   intermediate_tensorsinputs_embedsc                 b   t                      j        r||}n|                     |          }d }n|J |d         }|d         }| j        | j        | j                 D ]} ||||          \  }}t                      j        st          ||d          S |                     ||          \  }}|S )NrV   r   )rV   r   )	r   r   r   r   r   r   r   r   r   )	rI   r   r   r   r   rV   r   layerr^   s	            rK   r`   zMiniMaxM2Model.forwardj  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7H[!1DN!BC 	P 	PE&+eI}h&O&O#M88~~* 	&"/XFF    99]H==qrL   c                 H    t          j        | ddd| j        j                  S )Nw1w2w3)ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namer0   )r   make_expert_params_mappingr)   r=   rI   s    rK   get_expert_mappingz!MiniMaxM2Model.get_expert_mapping  s0    2 $ $"5
 
 
 	
rL   weightsc           	      |   g d}|                                  }t          |                                           }t                      }|D ]p\  }}d|v rt	          | j        |          }|#|D ]r\  }	}
}|
|vrd|v r||vr|                    |
|	          }|                    d          r||vrDt          ||           rU||         }|j	        } ||||            n|D ]U}|\  }	}
}}|
|vr|                    |
|	          }t          ||           r5||         }|j	        } ||||||            nk|                    d          r||vrt          ||          }|t          ||           r1||         }t          |dt                    } |||           |                    |           r|S )N))r   q_projr   )r   k_projr   )r   v_projr   zrotary_emb.inv_freqzmlp.experts.z.bias)shard_id	expert_idrE   )r   r   named_parametersset#get_spec_layer_idx_from_weight_namer)   replaceendswithr"   rE   r   r?   r   add)rI   r   stacked_params_mappingexpert_params_mappingparams_dictloaded_paramsnamerN   
spec_layer
param_nameweight_namer   rM   rE   mappingr   s                   rK   load_weightszMiniMaxM2Model.load_weights  sq   "
 "
 "
 !% 7 7 9 94002233"%%%#* F	$ F	$D-$,,<T[$OOJ%5K =8 =81
Kd** #d**K0G0G||K<<==)) d+.E.E*466 #D) % 3e]H===4 $8 $8GCJ@JY"$.. <<Z@@D.tT:: ! '-E$)$7M!M%!)"+    E }}W-- !$k2I2I  5T;GGD| .tT:: ! '-E$+0E% %M "M%777d####rL   rQ   )ra   rb   rc   fall_back_to_pt_during_loadr   rd   r;   rA   rf   r   r   r`   listtupler   r   r   r   r   rg   rh   s   @rK   r   r   :  sZ       "'AC '
 '
 '
z '
3 '
 '
 '
 '
 '
 '
R, ,%, , , , , .2 < < 2D8	
 |d* 
+	+   8
DsCc/A)B$C 
 
 
 
UHU33D-E$F U3s8 U U U U U U U UrL   r   c                   R    e Zd Zdg diZdddedef fdZdej        d	ej        fd
Z		 	 ddej        dej        de
dz  dej        dz  d	ej        e
z  f
dZdej        d	ej        dz  fdZdeeeej        f                  d	ee         fdZd	eeeeeef                  fdZ xZS )MiniMaxM2ForCausalLMr   )r   r   r   r(   r   r   r+   c                   t                                                       |j        j        }|j        }|| _        || _        t          |j        d          r|j        j        | j        _        t          |t          |d                    | _
        t                      j        r"t          |j        |j        d           | _        nt#                      | _        t%          |j                  | _        | j
        j        | _        d S )Nr   model)r   r+   )r*   )r:   r;   r   r   r*   r)   r   r   r   r%   r   r   r   r   r   r4   lm_headr!   r   logits_processorr   )rI   r   r+   r)   r*   rJ   s        rK   r;   zMiniMaxM2ForCausalLM.__init__  s    )3"/(;+_== 	O(3(@(NDK%##L,I,I
 
 

 >>& 	,)!6#5D  DLL *++DL /0A B BJ6 	,,,rL   r   rO   c                 6    | j                             |          S rQ   )r   r   r   s     rK   r   z$MiniMaxM2ForCausalLM.embed_input_ids  s    z)))444rL   Nr   r   r   c                 6    |                      ||||          }|S rQ   )r   )rI   r   r   r   r   kwargsrV   s          rK   r`   zMiniMaxM2ForCausalLM.forward	  s)     

y"6
 
 rL   rV   c                 <    |                      | j        |          }|S rQ   )r   r   )rI   rV   logitss      rK   compute_logitsz#MiniMaxM2ForCausalLM.compute_logits  s      &&t|]CCrL   r   c                 J    t          |           }|                    |          S rQ   )r    r   )rI   r   loaders      rK   r   z!MiniMaxM2ForCausalLM.load_weights  s#    "4((""7+++rL   c                 4    | j                                         S rQ   )r   r   r   s    rK   r   z'MiniMaxM2ForCausalLM.get_expert_mapping!  s    z,,...rL   r   )ra   rb   rc   packed_modules_mappingr   rd   r;   rA   rf   r   r   r`   r   r   r   r   r   r   r   r   rg   rh   s   @rK   r   r     s        
 
 
 BD 
 
 
z 
3 
 
 
 
 
 
,5 5%, 5 5 5 5 <@-1 < < 2D8	
 |d* 
+	+   | 
	   ,HU33D-E$F ,3s8 , , , ,/DsCc/A)B$C / / / / / / / /rL   r   r)   r   rO   c                     t          | d          rL| j        dk    rA| j        }t          | j                  D ]%}|                    d||z    d          r||z   c S &d S )Nnum_mtp_modulesr   zmodel.layers.r-   )r   r   r   range
startswith)r)   r   r   is       rK   r   r   %  s     v()) %v/E/I/I,	v-.. 	% 	%A%%&Fi!m&F&F&FGG % 1}$$$%4rL   )@__doc__collections.abcr   typingr   rA   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   r   vllm.distributedr   r   r   $vllm.model_executor.layers.fused_moer   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   ,vllm.model_executor.layers.mamba.linear_attnr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   
interfacesr   r   utilsr    r!   r"   r#   r$   r%   Moduler'   rj   r   r   r   rd   r   r    rL   rK   <module>r     s  0 & % $ $ $ $ $ $              ) ) ) ) ) ) * * * * * * = = = = = = < < < < < < < < < <         
 : 9 9 9 9 9 8 8 8 8 8 8         
 H G G G G G O O O O O O F F F F F F @ @ @ @ @ @               . - - - - - 0 0 0 0 0 0 0 0               B@ B@ B@ B@ B@29 B@ B@ B@Jb b b b b b b bJD' D' D' D' D'BI D' D' D'N i i i i iRY i i iX;/ ;/ ;/ ;/ ;/29lJ ;/ ;/ ;/|+.4Z     rL   