
    .`i                     F   d Z ddlmZ ddlmZ ddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZmZ ddlmZ ddlmZmZmZ  ee           Z! e	ddddd           G d de                      Z" G d deej#                  Z$dS )z>Inference-only MiMo model compatible with HuggingFace weights.    )Iterable)isliceN)support_torch_compile)
VllmConfig)get_pp_group)init_logger)LogitsProcessor)ParallelLMHead)default_weight_loadermaybe_remap_kv_scale_name)Qwen2ForCausalLM
Qwen2Model)IntermediateTensors   )PPMissingLayeris_pp_missing_parametermaybe_prefix)	input_ids	positionsintermediate_tensorsinputs_embeds)dynamic_arg_dimsc                       e Zd Z	 	 d
dej        dej        dedz  dej        dz  dej        ez  f
dZdeee	ej        f                  de
e	         fd	ZdS )	MiMoModelNr   r   r   r   returnc                 H   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S ||z   }|S )Nhidden_statesresidual)r   r   )	r   is_first_rankembed_input_idsr   layersstart_layer	end_layeris_last_rankr   )selfr   r   r   r   r   r   layers           s/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/mimo.pyforwardzMiMoModel.forward>   s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDK)94>JJ 	 	E&+e' '#M88
 ~~* 	&"/XFF   &0    weightsc                 z   g d}t          |                     d                    }t                      }|D ]\  }}d|v rd|v r| j        ~| j                            |          x}rb||         }t          |dt                    }	|                                dk    r|n|d         } |	||           |                    |           |D ]i\  }
}}||vr|	                    ||
          }|
                    d          r||vr;t          ||           rL||         }|j        }	 |	|||            nk|
                    d          r||vrt          ||          }|0t          ||           rB||         }t          |dt                    }	 |	||           |                    |           |S )	N))qkv_projq_projq)r-   k_projk)r-   v_projv)gate_up_proj	gate_projr   )r4   up_projr   F)remove_duplicate
mtp_layerszrotary_emb.inv_freqweight_loaderr   z.bias)dictnamed_parameterssetquant_configget_cache_scalegetattrr   dimaddreplaceendswithr   r9   r   )r&   r+   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr9   
param_nameweight_nameshard_ids                r(   load_weightszMiMoModel.load_weights\   s7   "
 "
 "
 400%0HHII"%%%#* +	$ +	$D-t##$,, ,"/??EEE
 - $J/ '@U V V%2%6%6%8%8A%=%=MM=QRCS  e]333!!*---5K 4 41
Kd**||K<<==)) d+.E.E*466 #D) % 3e]H=== ==)) d+.E.E0{CC<*466 #D) '@U V Ve]333d####r*   )NN)__name__
__module____qualname__torchTensorr   r)   r   tuplestrr<   rN    r*   r(   r   r   5   s         <@-1 < < 2D8	
 |d* 
+	+   <6HU33D-E$F 63s8 6 6 6 6 6 6r*   r   c                   N    e Zd ZdddedefdZdej        dej        dz  fd	ZdS )
MiMoForCausalLM )prefixvllm_configrZ   c          	         t           j                            |            |j        j        }|j        }|| _        || _        t          |t          |d                    | _	        t                      j        rJ|j        r| j	        j        | _        nDt          |j        |j        |t          |d                    | _        nt%                      | _        t'          |j                  | _        | j	        j        | _        d S )Nmodel)r[   rZ   lm_head)r=   rZ   )nnModule__init__model_config	hf_configr=   configr   r   r]   r   r%   tie_word_embeddingsembed_tokensr^   r
   
vocab_sizehidden_sizer   r	   logits_processormake_empty_intermediate_tensors)r&   r[   rZ   rd   r=   s        r(   ra   zMiMoForCausalLM.__init__   s    
	4   )3"/(#L,I,I
 
 

 >>& 	,) #z6-%&!-'	::	      *++DL /0A B B J6 	,,,r*   r   r   Nc                 p    | j                             |          }|                     | j        |          }|S )N)r]   normri   r^   )r&   r   logitss      r(   compute_logitszMiMoForCausalLM.compute_logits   s3     
66&&t|]CCr*   )	rO   rP   rQ   r   rU   ra   rR   rS   rn   rV   r*   r(   rX   rX      ss        AC 
 
 
z 
3 
 
 
 
@| 
	     r*   rX   )%__doc__collections.abcr   	itertoolsr   rR   torch.nnr_   vllm.compilation.decoratorsr   vllm.configr   vllm.distributedr   vllm.loggerr   +vllm.model_executor.layers.logits_processorr	   3vllm.model_executor.layers.vocab_parallel_embeddingr
   -vllm.model_executor.model_loader.weight_utilsr   r    vllm.model_executor.models.qwen2r   r   vllm.sequencer   utilsr   r   r   rO   loggerr   r`   rX   rV   r*   r(   <module>r~      s  6 E D $ $ $ $ $ $              = = = = = = " " " " " " ) ) ) ) ) ) # # # # # # G G G G G G N N N N N N        J I I I I I I I - - - - - - H H H H H H H H H H	X		  !	   U U U U U
 U U Up' ' ' ' '&	 ' ' ' ' 'r*   