
    .`iW                        d Z ddlZddlmZmZ ddlmZ ddlZddlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZmZmZ dd
lmZmZmZ ddlmZ ddlmZ ddlmZmZmZ ddlm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z&m'Z' ddl(m)Z)m*Z* ddl+m,Z, ddl-m.Z.m/Z/m0Z0 ddl1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7  G d dej8                  Z9 G d dej8                  Z: G d dej8                  Z;e G d dej8                              Z< G d d ej8        e/e0e.          Z=dS )!zInference-only Mixtral model.    N)CallableIterable)islice)nn)MixtralConfig)	Attention)support_torch_compile)CacheConfig
VllmConfigget_current_vllm_config)get_ep_groupget_pp_group$get_tensor_model_parallel_world_size)FusedMoE)RMSNorm)QKVParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors   )MixtureOfExpertsSupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayeris_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                        e Zd ZdZ	 	 	 	 	 	 ddedededed	ej        dz  d
edz  dedz  dedz  dede	f fdZ
dej        dej        fdZ xZS )
MixtralMoEa  A tensor-parallel MoE implementation for Mixtral that shards each expert
    across all ranks.

    Each expert's weights are sharded across all ranks and a fused MoE
    kernel is used for the forward pass, and finally we reduce the outputs
    across ranks.
    N Fnum_expertstop_khidden_sizeintermediate_sizeparams_dtypequant_configtp_sizedp_sizeprefixenable_eplbc                    t                                                       || _        t                      j        | _        t                      j        | _        | j                                        | _	        t                      }|j        }|
| _        || _        || _        |j        j        | _        | j        | j        z   | _        | j        | j	        z  | _        | j        | j        z  | _        | j        | j        z   | _        t-          ||d|d |	 d          | _        t1          |||||dd||||	 d| j        | j                  | _        d S )NFz.gate)biasr.   r/   r2   Tz.experts)r*   r+   r,   r-   r.   reduce_resultsrenormalizer/   r0   r1   r2   r3   num_redundant_experts)super__init__r,   r   device_groupep_grouprank_in_groupep_ranksizeep_sizer   parallel_configr3   n_routed_expertsn_logical_expertseplb_configr8   n_redundant_expertsn_physical_expertsn_local_physical_expertsphysical_expert_startphysical_expert_endr   gater   experts)selfr*   r+   r,   r-   r.   r/   r0   r1   r2   r3   vllm_configrA   	__class__s                v/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/mixtral.pyr:   zMixtralMoE.__init__S   sa    	&$3#~~3}))++ .//%5& +!,#2#>#T "&"84;S"S(,(?4<(O%%)\D4Q%Q"&)FF 	  %%###
 
 
	  ##/%%&&&("&":
 
 
    hidden_statesreturnc                     |j         }|                    d| j                  }|                     |          \  }}|                     ||          }|                    |          S )N)shapeviewr,   rJ   rK   )rL   rQ   
orig_shaperouter_logits_final_hidden_statess         rO   forwardzMixtralMoE.forward   s`    "(
%**2t/?@@99]33q"ll=-HH"''
333rP   )NNNNr)   F)__name__
__module____qualname____doc__inttorchdtyper   strboolr:   Tensorr[   __classcell__rN   s   @rO   r(   r(   J   s          ,026""!<
 <
<
 <
 	<

 <
 kD(<
 )4/<
 t<
 t<
 <
 <
 <
 <
 <
 <
 <
|4U\ 4el 4 4 4 4 4 4 4 4rP   r(   c                        e Zd Z	 	 	 	 ddededededed	edz  d
edz  deddf fdZde	j
        de	j
        de	j
        fdZ xZS )MixtralAttention   Nr)   configr,   	num_headsnum_kv_headsmax_positioncache_configr/   r2   rR   c	           
         t                                                       || _        t                      }	|| _        | j        |	z  dk    sJ | j        |	z  | _        || _        | j        |	k    r| j        |	z  dk    sJ n|	| j        z  dk    sJ t          d| j        |	z            | _        t          |dd           | _
        | j
        | j        | j        z  | _
        | j        | j
        z  | _        | j        | j
        z  | _        | j
        dz  | _        t          || j
        | j        | j        d|| d          | _        t!          | j        | j
        z  |d|| d          | _        t%          | j
        ||j        d	
          | _        t+          | j        | j
        | j        | j        ||| d          | _        d S )Nr   r   head_dimg      Fz	.qkv_proj)r5   r/   r2   z.o_projT)rn   rope_parametersis_neox_stylez.attn)rm   ro   r/   r2   )r9   r:   r,   r   total_num_headsrl   total_num_kv_headsmaxrm   getattrrq   q_sizekv_sizescalingr   qkv_projr   o_projr   rr   
rotary_embr   attn)rL   rk   r,   rl   rm   rn   ro   r/   r2   r0   rN   s             rO   r:   zMixtralAttention.__init__   s    	&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF
D99=  ,0DDDMnt}4(4=8}d*)M #%'''
 
 
 ( 4=0%%%%
 
 
 #M%"2	
 
 
 NML*%%###
 
 
			rP   	positionsrQ   c                 "   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )NrT   )dim)r{   splitrx   ry   r}   r~   r|   )
rL   r   rQ   qkvrY   qkvattn_outputoutputs
             rO   r[   zMixtralAttention.forward   s    
 }--Q))T[$,E2)NN1ay!Q//1ii1a((KK,,	rP   )rj   NNr)   )r\   r]   r^   r   r`   r
   r   rc   r:   ra   re   r[   rf   rg   s   @rO   ri   ri      s         &+/26A
 A
A
 A
 	A

 A
 A
 "D(A
 )4/A
 A
 
A
 A
 A
 A
 A
 A
F
<
 |
 
	
 
 
 
 
 
 
 
rP   ri   c                        e Zd Z	 	 	 	 ddededz  dedz  deded	df fd
Zde	j
        de	j
        de	j
        dz  d	e	j
        fdZ xZS )MixtralDecoderLayerNr)   Frk   ro   r/   r2   r3   rR   c                    t                                                       |j        | _        t          || j        |j        |j        |j        ||| d          | _        t          |j	        |j
        |j        |j        || d|          | _        t          |j        |j                  | _        t          |j        |j                  | _        d S )Nz
.self_attn)rk   r,   rl   rn   rm   ro   r/   r2   z.block_sparse_moe)r*   r+   r,   r-   r/   r2   r3   eps)r9   r:   r,   ri   num_attention_headsmax_position_embeddingsnum_key_value_heads	self_attnr(   num_local_expertsnum_experts_per_tokr-   block_sparse_moer   rms_norm_epsinput_layernormpost_attention_layernorm)rL   rk   ro   r/   r2   r3   rN   s         rO   r:   zMixtralDecoderLayer.__init__   s     	!-)(073%%(((	
 	
 	
 !+0,*$6%///#!
 !
 !
  'v'9v?RSSS(/F$7)
 )
 )
%%%rP   r   rQ   residualc                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS )N)r   rQ   )r   r   r   r   )rL   r   rQ   r   s       rO   r[   zMixtralDecoderLayer.forward  s     $H 00??MM&*&:&:=(&S&S#M8' ' 
 
 #'"?"?x"X"Xx--m<<h&&rP   )NNr)   F)r\   r]   r^   r   r
   r   rc   rd   r:   ra   re   r[   rf   rg   s   @rO   r   r      s         ,026! 
  
 
 "D( 
 )4/	 

  
  
 
 
  
  
  
  
  
D'<' |' ,%	'
 
' ' ' ' ' ' ' 'rP   r   c                       e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeeeef                  fdZdeeeej        f                  dee         fdZ xZS )MixtralModelr)   r2   rM   r2   c                4    t                                                       |j        j        |j        |j        |j        } _         _        j         _        j         _	        t           j        j                   _        |j         _        |j        j         _        t!          j         fd| d          \   _         _         _        t+          j        j                   _        t1          ddgj                   _        d S )Nc                 6    t          | j                  S )N)r/   r2   r3   )r   r3   )r2   ro   rk   r/   rL   s    rO   <lambda>z'MixtralModel.__init__.<locals>.<lambda>A  s)    .) ,   rP   z.layersr   r   rQ   r   )r9   r:   model_config	hf_configro   r/   rA   rk   
vocab_sizeorg_vocab_sizer   r,   embed_tokensr3   rD   r8   r%   num_hidden_layersstart_layer	end_layerlayersr   r   normr$   make_empty_intermediate_tensors)rL   rM   r2   rA   ro   rk   r/   rN   s   `   @@@rO   r:   zMixtralModel.__init__(  s6   )3"/"/%5( +$/2O
 

 +6%4%@%V"8C$       %%%
9
 
9
 
9
5$.$+ F.F4GHHH	/Vj)6+=0
 0
,,,rP   	input_idsrR   c                 ,    |                      |          S N)r   rL   r   s     rO   embed_input_idszMixtralModel.embed_input_idsP  s      +++rP   Nr   intermediate_tensorsinputs_embedsc                 p   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S | 	                    ||          \  }}|S )NrQ   r   )rQ   r   )
r   is_first_rankr   r   r   r   r   is_last_rankr   r   )	rL   r   r   r   r   rQ   r   layerrY   s	            rO   r[   zMixtralModel.forwardS  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDK)94>JJ 	P 	PE&+eI}h&O&O#M88~~* 	&"/XFF    99]H==qrP   c                 T    t          j        | ddd| j        j        | j                  S )Nw1w2w3)ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namer*   r8   )r   make_expert_params_mappingrk   r   r8   rL   s    rO   get_expert_mappingzMixtralModel.get_expert_mappingm  s8     2 $ $"5"&"<
 
 
 	
rP   weightsc           
         g d}t          |                                           }t                      }|                                 }|D ]\  }}| j        ~| j                            |          x}rb||         }	t          |	dt                    }
|                                dk    r|n|d         } |
|	|           |	                    |           |D ]\  }}}||vr|
                    ||          }|                    d          s|                    d          r||vrPt          ||           ra|                    d          rt          ||          }|||         }	|	j        }
 |
|	||            n:d}|D ]}|\  }}}}||vrd}|
                    ||          }t          ||           r7|                    d          s|                    d          r||vrf||         }	t          j        t"          d	t$          f         |	j                  }
 |
|	||||d
          }|r|} n|r|                    d          s|                    d          r||vrt          ||           r1t          ||          }|E||         }	t          |	dt                    }
 |
|	|           |	                    |           |S )N))r{   q_projr   )r{   k_projr   )r{   v_projr   weight_loaderr   z.bias_biasscaleFT.)shard_id	expert_idreturn_success)dictnamed_parameterssetr   r/   get_cache_scalerw   r   r   addreplaceendswithr#   r   r   typingcastr   rd   )rL   r   stacked_params_mappingparams_dictloaded_paramsexpert_params_mappingnameloaded_weight
scale_nameparamr   
param_nameweight_namer   is_expert_weightmappingr   name_mappedsuccesss                      rO   load_weightszMixtralModel.load_weightsy  s   "
 "
 "
 4002233"%%% $ 7 7 9 9#* [	$ [	$D- ,"/??EEE
 - $J/ '@U V V%2%6%6%8%8A%=%=MM=QRCS  e]333!!*---5K L8 L81
Kd**||K<< MM'**.2mmG.D.D+--*466 ==)) !4T;GGD| #D) % 3e]H===#( 4 58 58GCJ@JY"$.. '+$"&,,{J"G"GK /{DAA !  $,,W55!9D9M9Mg9V9V!%[88 '4E$*K d+U-@% %M ,m%#!)"+'+  G  * ( !  g..!26--2H2H!k11 .tT:: ! 4T;GGD| '-E$+0E% %M "M%777d####rP   r   )r\   r]   r^   r   rc   r:   ra   re   r   r   r[   listtupler`   r   r   r   r   rf   rg   s   @rO   r   r   &  sT       AC &
 &
 &
z &
3 &
 &
 &
 &
 &
 &
P, ,%, , , , , .2 < < 2D8	
 |d* 
+	+   4

DsCc/A)B$C 

 

 

 

gHU33D-E$F g3s8 g g g g g g g grP   r   c                   t    e Zd ZdZdg diZdddZddd	ed
ef fdZde	de	ddfdZ
dej        dej        fdZ	 	 ddej        dej        dedz  dej        dz  dej        ez  f
dZdej        dej        dz  fdZdeeeej        f                  dee         fdZdeeeee	ef                  fdZ xZS )MixtralForCausalLMFr{   )r   r   r   input_embeddingsoutput_embeddings)r   lm_headr)   r   rM   r2   c          	         t                                                       |j        j        }|j        }|| _        || _        t          |t          |d                    | _        t          |j
        |j        |t          |d                    | _        | j        j        r| j        j        j        | j        _        t!          |j
                  | _        | j        j        | _        g | _        g | _        d }| j        j        D ]}t-          |t.                    rt-          |t0                    sJ t3          |d          rEt-          |j        t6                    r+|j        }| j                            |j        j                   t=          | j                  | _        |tA          d          |j!        | _"        |j#        | _$        |j%        | _&        |j'        | _(        |j)        | _*        d| _+        d| _,        d S )	Nmodel)rM   r2   r   )r/   r2   r   z+No MixtralMoE layer found  in model.layers.r   r   )-r9   r:   r   r   r/   rk   r   r&   r   r   r   r,   r   tie_word_embeddingsr   weightr   logits_processorr   expert_weights
moe_layersr   
isinstancer"   r   hasattrr   r(   appendrK   lennum_moe_layersRuntimeErrorrC   num_logical_expertsrF   num_physical_expertsrG   num_local_physical_expertsrB   num_routed_expertsrE   r8   num_expert_groupsnum_shared_experts)rL   rM   r2   rk   r/   example_moer   rN   s          rO   r:   zMixtralForCausalLM.__init__  s   )3"/(!#L,I,I
 
 

 &%	22	
 
 
 ;* 	A"&*"9"@DL /0A B BJ6 	, !Z& 	G 	GE%00 e%899999u011 Gj&
7 7 G $4&&u'='EFFF!$/22LMMM#.#@ $/$B!*5*N'"-">%0%D"!""#rP   r   r   rR   Nc                 @   | j         |k    sJ || _        || _         || j        z
  | _        | j        j        D ]f}t          |d          rTt          |j        t                    r:|j        }||_
        ||_        | j        |_        |j                                         gd S )Nr   )r   r   r   r8   r   r   r   r   r   r(   rG   rF   rE   rK   update_expert_map)rL   r   r   r   moes        rO    update_physical_experts_metadataz3MixtralForCausalLM.update_physical_experts_metadata)  s    
 .2LLLLL$8!*D'%9D<T%T"Z& 	0 	0Eu011 0j&
7 7 0 ,/I,)=&*.*D'--///	0 	0rP   r   c                 6    | j                             |          S r   )r   r   r   s     rO   r   z"MixtralForCausalLM.embed_input_ids<  s    z)))444rP   r   r   r   c                 6    |                      ||||          }|S r   )r   )rL   r   r   r   r   rQ   s         rO   r[   zMixtralForCausalLM.forward?  s)     

y"6
 
 rP   rQ   c                 <    |                      | j        |          }|S r   )r   r   )rL   rQ   logitss      rO   compute_logitsz!MixtralForCausalLM.compute_logitsK  s      &&t|]CCrP   r   c                 J    t          |           }|                    |          S r   )r!   r   )rL   r   loaders      rO   r   zMixtralForCausalLM.load_weightsR  s#    "4((""7+++rP   c                 4    | j                                         S r   )r   r   r   s    rO   r   z%MixtralForCausalLM.get_expert_mappingV  s    z,,...rP   )NN)r\   r]   r^   fall_back_to_pt_during_loadpacked_modules_mappingembedding_modulesr   rc   r:   r`   r  ra   re   r   r   r[   r  r   r   r   r   r   r   rf   rg   s   @rO   r   r     s       "' 	 
 
 
 +& 
 BD 3$ 3$ 3$z 3$3 3$ 3$ 3$ 3$ 3$ 3$j0!0 %(0 
	0 0 0 0&5 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , ,/DsCc/A)B$C / / / / / / / /rP   r   )>r_   r   collections.abcr   r   	itertoolsr   ra   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr	   vllm.configr
   r   r   vllm.distributedr   r   r   $vllm.model_executor.layers.fused_moer   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   
interfacesr   r   r    utilsr!   r"   r#   r$   r%   r&   Moduler(   ri   r   r   r    rP   rO   <module>r!     sd  2 $ #  . . . . . . . .              & & & & & & * * * * * * = = = = = = H H H H H H H H H H         
 : 9 9 9 9 9 8 8 8 8 8 8         
 H G G G G G F F F F F F @ @ @ @ @ @               . - - - - - B B B B B B B B B B               N4 N4 N4 N4 N4 N4 N4 N4bN N N N Nry N N Nb7' 7' 7' 7' 7'") 7' 7' 7't y y y y y29 y y yxt/ t/ t/ t/ t/L*>N t/ t/ t/ t/ t/rP   