
    .`iZ                     X   d Z ddlmZ ddlmZ ddlZddlmc mZ	 ddlmZ ddl
mZ ddlmZ ddlmZ dd	lmZmZ dd
lmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/m0Z0 ddl1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7  G d dej8                  Z9 G d dej8                  Z: G d dej8                  Z; G d dej8                  Z<e G d  d!ej8                              Z= G d" d#ej8        e0e/          Z> G d$ d%e>          Z?dS )&zDInference-only BailingMoE model compatible with HuggingFace weights.    )Iterable)isliceN)nn)PretrainedConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_groupget_tensor_model_parallel_rank$get_tensor_model_parallel_world_size)
SiluAndMul)SharedFusedMoE)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )SupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayeris_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   |     e Zd Z	 	 	 	 ddededz  dedz  dedef
 fd	Zd
e	j
        de	j
        de	j
        fdZ xZS )BailingAttentionNT configcache_configquant_configreduce_resultsprefixc           
      >   t                                                       |j        | _        |j        | _        |j        | _        t                      }| j        |z  dk    sJ | j        | j        k    sJ | j        |z  | _        |j	        p| j        | j        z  | _	        | j	        | j        z  | _
        t          d| j        |z            | _        | j        | j	        z  | _        | j	        dz  | _        t          |dd          | _        t          |dd          | _        t%          | j        | j	        | j        | j        |j        p|j        || d          | _        | j        r| j        rt-          | j	        |j        	          nt1          j        | j	        d
	          | _        | j        rt-          | j	        |j        	          nt1          j        | j	        d
	          | _        t9          | j        | j	        z  | j        |j        ||| d          | _        t          |d| j	                  }|| j	        z  |j        d<   t?          | j	        |j         |j        d          | _!        tE          | j        | j	        | j        | j        || d          | _#        d S )Nr   r   g      use_qk_normFuse_rmsnormz.query_key_valuebiasr)   r+   epsgư>z.denser0   r)   r*   r+   
rotary_dimpartial_rotary_factorT)max_positionrope_parametersis_neox_stylez.attn)num_kv_headsr(   r+   )$super__init__hidden_sizenum_attention_headstotal_num_headsnum_key_value_headstotal_kv_headsr   	num_headshead_dimq_size_per_rankmaxr9   kv_size_per_rankscalegetattrr-   r.   r   use_biasuse_qkv_biasquery_key_valuer   rms_norm_epsr   	LayerNormquery_layernormkey_layernormr   denser7   r   max_position_embeddings
rotary_embr   attn)	selfr'   r(   r)   r*   r+   tp_sizer4   	__class__s	           z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/bailing_moe.pyr;   zBailingAttention.__init__I   s    	!-%9$8688#g-2222#t':::::-8UD,<@T,T#}t~=4#6'#ABB $ 1DM A]D(
"6=%@@"6=%@@0M /8V%8%... 
  
  
  
	 #;6+>????\$-T:::   #;6+>????\$-T:::  ' 4=0%)$$$
 
 

 V\4=AA
:Dt}:T67"M7"2	
 
 
 NMJ*%###
 
 
			    hidden_statesposition_idsreturnc                 t   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}| j        r|                    d| j        | j                  }|                    d| j        | j                  }| 	                    |          }| 
                    |          }|                    d| j                  }|                    d| j                  }|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )N)dim)rJ   splitrC   rE   r-   viewrA   rB   r9   rM   rN   rQ   rR   rO   )
rS   rX   rY   qkv_qkvcontext_layerattn_outputs
             rV   forwardzBailingAttention.forward   s*   
 %%m44Q))!4#8$:OPVX  
 
1a  	2r4>4=99Ar4,dm<<A$$Q''A""1%%Ar4/00Ar4011A|Q221		!Q**M22QrW   )NNTr&   )__name__
__module____qualname__r   r	   r   boolstrr;   torchTensorrg   __classcell__rU   s   @rV   r%   r%   H   s         ,026#J
 J
 J
 "D(J
 )4/	J

 J
 J
 J
 J
 J
 J
 J
X| l 
	       rW   r%   c                   R     e Zd Z	 	 	 ddedededz  dedz  ded	df fd
Zd Z	 xZ
S )
BailingMLPNTr&   intermediate_sizer'   r)   r*   r+   rZ   c           	         t                                                       t          |j        |gdz  |j        || d          | _        t          ||j        |j        ||| d          | _        t                      | _	        d S )N   z.gate_up_projr/   z
.down_projr3   )
r:   r;   r   r<   rH   gate_up_projr   	down_projr   act_fn)rS   rs   r'   r)   r*   r+   rU   s         rV   r;   zBailingMLP.__init__   s     	6!#%+++
 
 
 +%)(((
 
 
 !llrW   c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)rv   rx   rw   )rS   xra   s      rV   rg   zBailingMLP.forward   sB      ##1KKNN~~a  1rW   NTr&   )rh   ri   rj   intr   r   rk   rl   r;   rg   ro   rp   s   @rV   rr   rr      s        
 37&*# ## !# )4/	#
 t# # 
# # # # # #4      rW   rr   c                   l     e Zd Z	 	 	 ddedededz  dedz  def
 fd	Zd
e	j
        de	j
        fdZ xZS )
BailingMoENTr&   rs   r'   r)   r*   r+   c                    t                                                       t                      | _        t	                      | _        |j        | _        |j        | _        |j	        | _
        |j        | _        || _        |j        | _        t          |dd           | _        t          |dd           | _        t          |dd           | _        | j        d uo| j        d u| _        t          |dd          | _        t          |dd           }|d | _        n)|dk    rt*          j        | _        nt*          j        | _        t1          j        | j        | j        d| j        	          | _        t          |d
d          rCt1          j        t+          j        |j        ft*          j                            | j        _        nd | j        _        | j        j        | j        j        j        nd | _        | j        /| j        dk    r| j        | j        dk    r| j        
J d            nd| _        | j        dk    rFtA          |d          r|j!        }n|j"        }||j        z  }tG          |||d| d          | _$        nd | _$        tK          | j$        | j        | j        | j        |j"        d| j
        || d| j        | j        j        | j        | j        | j                  | _&        d S )Nscore_functionn_group
topk_grouprouted_scaling_factorg      ?router_dtypefp32F)r0   dtypemoe_router_enable_expert_bias)r   softmaxsigmoidzdscore_function and correction_bias should be in 2 combination (softmax, None) or (sigmoid, not None)r   #moe_shared_expert_intermediate_sizez.shared_experts)rs   r'   r)   r*   r+   z.experts)shared_expertsnum_expertstop_kr<   rs   r*   renormalizer)   r+   scoring_funce_score_correction_biasnum_expert_groupr   use_grouped_topk)'r:   r;   r   rT   r   tp_rankr   num_experts_per_tokr   norm_topk_probnorm_expert_probr<   r)   num_shared_expertsrG   r   r   r   r   r   r   rm   float32bfloat16r   Lineargate	Parameteremptyexpert_biasdatacorrection_biashasattrr   moe_intermediate_sizerr   r   r   experts)rS   rs   r'   r)   r*   r+   r   rU   s          rV   r;   zBailingMoE.__init__   s    	;==577!-/
 & 5!-("(";%f.>EEvy$77!&,== $D 8 XT_TX=X%,V5Lc%R%R"v~t<< $DV## %D %DI#	
 
 
	 6:EBB 	)$&LV/1GGG% %DI!! %)DI! +/)*?*KDI!&&QU 	 *#y00T5I5Q#y00T5I5U5Uv 6V5U #,D"Q&&vDEE A$*$N!!$*$@!!::","3)$ 111# # #D #'D%.(*($: -%&&&,$(I$9!\!2
 
 
rW   rX   rZ   c                    |j         \  }}|                    d|          }|                     |                    | j                            }|                    |j                  }|                     ||          }| j        |\  }}nd }|| j        z  }|||z   }| j	        dk    r| j        
                    |          }|                    ||          S )Nr\   )rX   router_logitsr   )shaper_   r   tor   r   r   r   r   rT   &maybe_all_reduce_tensor_model_parallel)rS   rX   
num_tokensr<   r   final_hidden_statesshared_outputs          rV   rg   zBailingMoE.forward1  s    "/"5
K%**2{;; 		-"2"243D"E"EFF%(()<=="ll'} + 
 
 *1D.M.. Mt99$"5"E<!"&,"U"U## # #''
K@@@rW   r|   )rh   ri   rj   r}   r   r   rk   rl   r;   rm   rn   rg   ro   rp   s   @rV   r   r      s        
 37&*]
 ]
]
 !]
 )4/	]

 t]
 ]
 ]
 ]
 ]
 ]
 ]
~AU\ Ael A A A A A A A ArW   r   c            	            e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        d
ej	        dz  dej	        fdZ
 xZS )BailingMoeBlockNr&   r'   r(   r)   r+   c                    t                                                       t          |                    d          d                   }|| _        |j        }|j        }t          ||j                  | _	        t          |||| d          | _        t          ||j                  | _        ||j        k     rt          }nt          } ||||d| d          | _        d S )N.r\   r1   z
.attentionr+   Tz.mlp)r:   r;   r}   r^   r'   r<   rs   r   rK   input_layernormr%   	attentionpost_attention_layernormfirst_k_dense_replacerr   r   mlp)
rS   r'   r(   r)   r+   	layer_idxr<   rs   	mlp_classrU   s
            rV   r;   zBailingMoeBlock.__init__O  s     	S))"-..	("4&{8KLLL)L,&7L7L7L
 
 
 )0AT(U(U(U% v333"II"I9v|TV///
 
 
rW   rX   rY   residualrZ   c                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS )N)rX   rY   )r   r   r   r   )rS   rX   rY   r   s       rV   rg   zBailingMoeBlock.forwardl  s     $H 00??MM&*&:&:=(&S&S#M8'% ' 
 

 #'"?"?x"X"Xx//h&&rW   )NNr&   )rh   ri   rj   r   r	   r   rl   r;   rm   rn   rg   ro   rp   s   @rV   r   r   N  s         ,026
 
 
 "D(
 )4/	

 
 
 
 
 
 
:'|' l' ,%	'
 
' ' ' ' ' ' ' 'rW   r   c                       e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeeeef                  fdZdeeeej        f                  dee         fdZ xZS )BailingMoeModelr&   r   vllm_configr+   c                6   t                                                       |j        j        |j        |j        | _        j        | _        j        | _	        t          dd          | _        t                      j        s| j        r9t                      j        r&t          | j        | j	        | d          | _        nt#                      | _        t$          j                            j                  | _        t-          j        fd| d          \  | _        | _        | _        t7          dd	gj                  | _        t                      j        r"t;          | j	        j        
          | _        d S t#                      | _        d S )Ntie_word_embeddingsFz.word_embeddingsr)   r+   c                 *    t          |           S )N)r'   r(   r)   r+   )r   )r+   r(   r'   r)   s    rV   <lambda>z*BailingMoeModel.__init__.<locals>.<lambda>  s"    ?))	   rW   z.layersr   rX   r   r1   ) r:   r;   model_config	hf_configr(   r)   r'   
vocab_sizer<   	embed_dimrG   r   r   is_first_rankis_last_rankr   word_embeddingsr   rm   r   Dropoutembedding_dropoutr"   num_hidden_layersstart_layer	end_layerlayersr!   make_empty_intermediate_tensorsr   rK   norm)rS   r   r+   r(   r'   r)   rU   s      @@@rV   r;   zBailingMoeModel.__init__  s    	)3"/"/ ++#*63H%#P#P >>' 
	4$
	4)5)D
	4 $:) 222	$ $ $D   $2#3#3D !&!1!1&2J!K!K8C$      %%%	9
 	9
 	9
5$.$+ 0Wj)6+=0
 0
, >>& 	)F4GHHHDIII&((DIIIrW   	input_idsrZ   c                 ,    |                      |          S rz   )r   rS   r   s     rV   embed_input_idszBailingMoeModel.embed_input_ids  s    ##I...rW   NrY   intermediate_tensorsinputs_embedsc                    t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S || 	                    |          }n| 	                    ||          \  }}|S )NrX   r   )rX   r   )
r   r   r   r   r   r   r   r   r   r   )	rS   r   rY   r   r   rX   r   layerra   s	            rV   rg   zBailingMoeModel.forward  s    >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDK)94>JJ 	 	E&+e' '#M88 ~~* 	F&"/XFF    $		- 8 8#'99]H#E#E qrW   c                 H    t          j        | ddd| j        j                  S )N	gate_projrw   up_proj)ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namer   )r   make_expert_params_mappingr'   r   rS   s    rV   get_expert_mappingz"BailingMoeModel.get_expert_mapping  s0    8 + +'/
 
 
 	
rW   weightsc           	         ddg}t          |                     d                    }t                      }|                                 }|D ]\  }}t	          | j        d          r(| j        j        rd|v rt          j        |ddd	
          }|D ]s\  }}	}
|	|vrd|v r|	                    |	|          }|
                    d          r||vr@||vrEt          ||           rV||         }|j        } ||||
            n|D ]Z}|\  }}	}}
|	|vr|	                    |	|          }t          ||           r5||vr:||         }|j        } |||||
|            n]|
                    d          r||vr1||vr7t          ||           rI||         }t          |dt                    } |||           |                    |           |S )N)rv   r   r   )rv   r   r   F)remove_duplicate	norm_headzlm_head.weightr   ru   gHz>)r]   pr2   zmlp.expertsz.bias)shard_id	expert_idweight_loader)dictnamed_parameterssetr   r   r'   r   F	normalizereplaceendswithr    r   rG   r   add)rS   r   stacked_params_mappingparams_dictloaded_paramsexpert_params_mappingnameloaded_weight
param_nameweight_namer   paramr   mappingr   s                  rV   load_weightszBailingMoeModel.load_weights  s    -*"
 400%0HHII"%%% $ 7 7 9 9#* >	$ >	$D-[11QK)Q %,, !MqA4 P P P5K 58 581
Kd** D((||K<<==)) d+.E.E{***466 #D) % 3e]H===4 !8 !8GCJ@JY"$.. <<Z@@D.tT:: ! ;.. '-E$)$7M!M%!)"+    E}}W-- !$k2I2I ;.. .tT:: ! '-E$+0E% %M "M%777d####rW   rz   )rh   ri   rj   r
   rl   r;   rm   rn   r   r   rg   listtupler}   r   r   r   r   ro   rp   s   @rV   r   r     s]        	0) 0) 0)  0) 	0) 0) 0) 0) 0) 0)d/ /%, / / / / .2" "<" l" 2D8	"
 |d*" 
+	+" " " "H
DsCc/A)B$C 
 
 
 
IHU33D-E$F I3s8 I I I I I I I IrW   r   c                   Z    e Zd ZdgddgdZdddeded	d
f fdZdej        d	ej        fdZ		 	 ddej        dej        de
d
z  dej        d
z  d	ej        e
z  f
dZdej        d	ej        d
z  fdZdeeeej        f                  d	ee         fdZd	eeeeeef                  fdZ xZS )BailingMoeForCausalLMrJ   r   r   )rJ   rv   r&   r   r   r+   rZ   Nc          	         t                                                       |j        j                                        }||j        _        |j        }|| _        || _        |j        | _        t          |t          |d                    | _
        t          |dd          | _        t                      j        rc| j        r| j
        j        | _        n0t#          |j        |j        |t          |d                    | _        t)          |j                  | _        nt-                      | _        | j
        j        | _        d S )Nmodel)r   r+   r   Flm_headr   )r:   r;   r   r   get_text_configr)   r'   rP   r   r#   r  rG   r   r   r   r   r  r   r   r<   r   logits_processorr   r   )rS   r   r+   r'   r)   rU   s        rV   r;   zBailingMoeForCausalLM.__init__;  s2    	)3CCEE-3 *"/('-'E$$#L,I,I
 
 

 $+63H%#P#P >>& 	,' #z9-%&!-'	::	      %4F4E$F$FD!!)++DL J6 	,,,rW   r   c                 6    | j                             |          S rz   )r  r   r   s     rV   r   z%BailingMoeForCausalLM.embed_input_idsa  s    z)))444rW   	positionsr   r   c                 6    |                      ||||          }|S rz   )r  )rS   r   r  r   r   model_outputs         rV   rg   zBailingMoeForCausalLM.forwardd  s)     zzy"6
 
 rW   rX   c                 <    |                      | j        |          }|S rz   )r  r  )rS   rX   logitss      rV   compute_logitsz$BailingMoeForCausalLM.compute_logitsp  s      &&t|]CCrW   r   c                 b    t          | | j        rdgnd           }|                    |          S )Nzlm_head.)skip_prefixes)r   r   r   )rS   r   loaders      rV   r   z"BailingMoeForCausalLM.load_weightsw  s@    "+/+CMJ<<
 
 
 ""7+++rW   c                 4    | j                                         S rz   )r  r   r   s    rV   r   z(BailingMoeForCausalLM.get_expert_mapping~  s    z,,...rW   )NN)rh   ri   rj   packed_modules_mappingr
   rl   r;   rm   rn   r   r   rg   r  r   r   r   r   r   r}   r   ro   rp   s   @rV   r  r  2  s       -.
  	$
 $
 $
  $
 	$

 
$
 $
 $
 $
 $
 $
L5 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , ,/DsCc/A)B$C / / / / / / / /rW   r  c                       e Zd ZdS )BailingMoeV2ForCausalLMN)rh   ri   rj    rW   rV   r  r    s        DrW   r  )@__doc__collections.abcr   	itertoolsr   rm   torch.nn.functionalr   
functionalr    transformers.configuration_utilsr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   vllm.distributedr   r   r   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.fused_moer   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   r   utilsr   r   r    r!   r"   r#   Moduler%   rr   r   r   r   r  r  r  rW   rV   <module>r-     s  2 K J $ $ $ $ $ $                       = = = = = = * * * * * * = = = = = = / / / / / / / /         
 = < < < < < ? ? ? ? ? ? 8 8 8 8 8 8         
 H G G G G G F F F F F F @ @ @ @ @ @        P O O O O O - - - - - - 0 0 0 0 0 0 0 0               d d d d dry d d dN       DzA zA zA zA zA zA zA zAz1' 1' 1' 1' 1'bi 1' 1' 1'h l l l l lbi l l l^M/ M/ M/ M/ M/BIz< M/ M/ M/`	 	 	 	 	3 	 	 	 	 	rW   