
    .`iZ                        d dl Z d dlmZmZ d dl mZ d dlZd dlmZ d dlmZ d dl	m
Z
mZ d dlmZ d dlmZmZmZ d d	lmZmZmZmZmZmZ d d
lmZ d dlmZ d dlmZ d dlm Z  d dl!m"Z"m#Z#m$Z$m%Z%m&Z& d dl'm(Z( d dl)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1m2Z2 d dl3m4Z4m5Z5 d dl6m7Z7m8Z8m9Z9 d dl:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZB d dlCmDZD d dlEmFZF d dlGmHZH d dlImJZJ d dlKmLZL deMfdZN G d dejO                  ZP G d  d!ejO                  ZQ G d" d#ejO                  ZR G d$ d%ejO                  ZS G d& d'ejO                  ZT G d( d)ejO                  ZUe G d* d+ejO                              ZV G d, d-ejO        e9e8          ZW G d. d/eWe7          ZX G d0 d1eW          ZY G d2 d3eY          ZZ G d4 d5eX          Z[ G d6 d7eX          Z\dS )8    N)CallableIterable)Any)nn)PretrainedConfig)	AttentionAttentionType)support_torch_compile)CacheConfigParallelConfig
VllmConfig)get_ep_groupget_pp_groupget_tensor_model_parallel_rank$get_tensor_model_parallel_world_sizeget_tp_group tensor_model_parallel_all_gather)
SiluAndMul)StaticSinkAttention)SharedFusedMoE)RMSNorm)ColumnParallelLinearMergedColumnParallelLinearQKVParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)
MLAModulesMultiHeadLatentAttentionWrapper)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)MixtureOfExpertsSupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayerextract_layer_indexis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixsequence_parallel_chunk)set_weight_attrs)current_platform)IntermediateTensors)set_default_rope_theta)FlashAttentionDiffKVBackendact_fnc                 8    | dk    rt          d|  d          d S )NsiluzUnsupported activation: z!. Only silu is supported for now.)
ValueError)r6   s    x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/openpangu.pycheck_ffn_act_fnr;   Z   s5    PvPPP
 
 	
     c                   v     e Zd Z	 	 	 	 	 ddededededz  d	ed
ededdf fdZdej	        dej	        fdZ
 xZS )OpenPanguMLPNFT hidden_sizeintermediate_size
hidden_actquant_configbiasreduce_resultsprefixreturnc	           
      
   t                                                       t          ||gdz  |||| d          | _        t	          ||||||| d          | _        t          |           t                      | _        d S )N   .gate_up_proj)rD   rC   
disable_tprF   z
.down_proj)rD   rC   rE   rK   rF   )	super__init__r   gate_up_projr   	down_projr;   r   r6   )
selfr@   rA   rB   rC   rD   rE   is_sequence_parallelrF   	__class__s
            r:   rM   zOpenPanguMLP.__init__b   s     	6!#%++++
 
 
 +%)+(((
 
 
 	$$$ llr<   xc                     |                      |                     |                     |          d                             d         S )Nr   )rO   r6   rN   )rP   rS   s     r:   forwardzOpenPanguMLP.forward   s8    ~~dkk$*;*;A*>*>q*ABBCCAFFr<   )NFTFr?   )__name__
__module____qualname__intstrr    boolrM   torchTensorrU   __classcell__rR   s   @r:   r>   r>   a   s         37#"# ## # 	#
 )4/# # # # 
# # # # # #BG G%, G G G G G G G Gr<   r>   c            	       `     e Zd Z	 	 ddedededz  def fdZdej	        d	ej	        fd
Z
 xZS )OpenPanguMoENr?   configparallel_configrC   rF   c           
      n   t                                                       t                      | _        t	                      j        | _        |j        | _        t                      j	        | _
        | j
                                        | _        | j
                                        | _        |j        | _        |j        | _        |j        | _        t'          |j                   t+          |j        |j        dd | d          | _        t1          |d          rI|j        rBt5          j        t9          j        | j        t8          j                            | j        _        nd | j        _        |j         }|j!        | _!        |j"        | _#        | j        | _$        | j$        | j#        z   | _%        | j%        | j        z  | _&        | j        | j&        z  | _'        | j'        | j&        z   | _(        |j        =|j)        |j        z  }tU          |j        ||j        || j        d| d          | _+        nd | _+        tY          di d| j+        d	|j        d
|j-        d|j        d|j)        ddd|j.        d|ddddddd| dddddd| j        j        d| j!        d| j#        d| j        | _/        d S ) NFz.gaterD   rC   rF   router_enable_expert_biasdtypez.shared_experts)r@   rA   rB   rC   rQ   rE   rF   shared_expertsnum_expertstop_kr@   rA   rE   renormalizerC   use_grouped_topkTnum_expert_group   
topk_grouprF   z.expertsscoring_funcsigmoidrouted_scaling_factor      ?e_score_correction_biasenable_eplbnum_redundant_expertsrQ    )0rL   rM   r   tp_sizer   rank_in_grouptp_rankrs   r   device_groupep_grouprankep_ranksizeep_sizen_routed_expertsn_shared_expertsuse_sequence_parallel_moerQ   r;   rB   r   r@   gatehasattrrf   r   	Parameterr\   emptyfloat32ru   eplb_configrv   rw   n_redundant_expertsn_logical_expertsn_physical_expertsn_local_physical_expertsphysical_expert_startphysical_expert_endmoe_intermediate_sizer>   ri   r   num_experts_per_toknorm_topk_probexperts)rP   rb   rc   rC   rF   r   rA   rR   s          r:   rM   zOpenPanguMoE.__init__   s>    	;==#~~3%+%A"$3}))++}))++%+%<%+%<$3$M!*+++$####
 
 
	 F788	50	5 13D1GGG1 1DI-- 15DI- &1*6#.#D !%!6"&"84;S"S(,(?4<(O%%)\D4Q%Q"&)FF 	  ". & <v?V V"."."3!,)%)%>$ 111# # #D #'D% 
 
 
..
//
 ,,
 **	

 %::
 !5
 --
 &
 "T
 Q
 q
 &&&&
 #
 #&#
  %)I$E$E!
" ((#
$ #'":":%
& "&!:!:'
r<   hidden_statesrG   c                 6   |j         \  }}|                    d|          }| j        rt          |          }|                     |          \  }}|                     ||          }|\  }}| j        |J |j        t          j	        k    r|| j
        z  }n| j        |J |d| j
        z  z  }| j        	|J ||z  }| j        rt          |d          }|d |         }n%| j        dk    r| j                            |          }|                    ||          S )N)r   router_logitsrt   r   ro   )shapeviewrQ   r0   r   r   ri   rh   r\   float16rs   r   ry   &maybe_all_reduce_tensor_model_parallel)	rP   r   
num_tokens
hidden_dimr   _fused_moe_outshared_outputfinal_hidden_statess	            r:   rU   zOpenPanguMoE.forward   sp    "/!4
J%**2z::$ 	C3MBBM99]33q'} % 
 
 .;**& (((%-//4#== , ,,,S4#===M* ,,,=0$ 	"B#Q# # #6kzk"B\A"&,"U"U## # #''
J???r<   )Nr?   )rV   rW   rX   r   r   r    rZ   rM   r\   r]   rU   r^   r_   s   @r:   ra   ra      s        
 37V
 V
 V
 (V
 )4/	V

 V
 V
 V
 V
 V
 V
p(@|(@ 
(@ (@ (@ (@ (@ (@ (@ (@r<   ra   c                        e Zd Z	 	 	 	 ddededededed	ed
edz  dedededz  dedz  deddf fdZde	j
        de	j
        de	j
        fdZ xZS )OpenPanguMLAAttention    Nr?   rb   r@   	num_headsqk_nope_head_dimqk_rope_head_dim
v_head_dimq_lora_rankkv_lora_rankmax_position_embeddingscache_configrC   rF   rG   c                 T   t                                                       || _        || _        || _        || _        ||z   | _        || _        || _        || _	        t                      | _        || j        z  dk    rt          d| d| j         d          || j        z  | _        | j        dz  | _        |	| _        || _        | j        t#          | j        | j        | j	        | j        z   gd|| dd	          | _        t'          | j        |j        
          | _        t-          || j        | j        z  d|| d          | _        n\t-          | j        | j        | j        z  d|| d          | _        t3          | j        | j	        | j        z   d|| d          | _        t'          | j	        |j        
          | _        t-          | j	        | j        | j        | j        z   z  d|| d          | _        t;          | j        | j        z  | j        d|| d          | _        t?          |d           |j         d         ddddd|	ddd	}tC          ||	|d          | _"        tG          | j        | j        | j"        | j        | j        | j        nd | j        | j        nd | j        | j        nd | j        | j        nd | j        | j        nd d dd           }tI          | j        | j        | j        | j        | j        | j        | j        | j	        ||
||          | _%        d S )Nr   z
num_heads  is not divisible by tp_size .      F.fused_qkv_a_projT)rD   rC   rF   rK   epsz	.q_b_projre   .q_proj.kv_a_proj_with_mqaz
.kv_b_proj.o_proji'  )default_theta
rope_theta    ro   rt   yarndeepseek_yarn)	r   	beta_fast	beta_slowfactormscalemscale_all_dim original_max_position_embeddingstype	rope_typemax_positionrope_parametersis_neox_style)kv_a_layernorm	kv_b_proj
rotary_embo_projfused_qkv_a_projkv_a_proj_with_mqaq_a_layernormq_b_projq_projindexer	is_sparsetopk_indices_buffer)&rL   rM   r@   r   r   r   qk_head_dimr   r   r   r   ry   r9   num_local_headsscalingr   rF   r   r   r   rms_norm_epsr   r   r   r   r   r   r   r   r   r   r4   r   r!   r   r   r   mla_attn)rP   rb   r@   r   r   r   r   r   r   r   r   rC   rF   r   mla_modulesrR   s                  r:   rM   zOpenPanguMLAAttention.__init__  s    	&" 0 0+.>>$&(;==t|#q((TYTTT\TTT    )DL8'-'>$'$> !4#4t7L#LM) 333% % %D! "))9v?R!S!S!SD0!11) +++  DMM / !11) )))  DK '7 !D$99) 555' ' 'D# &d&7V=PQQQ-Nd3doEF%(((
 
 
 (NT_,%%%%
 
 
 	vU;;;; 0>!0G(

 

 #0+	
 
 
 !.n;+ "22'  $66040@0L$,,RV&*&6&BT]]"&"2":4;; $!
 
 
& 8 L!!O
 
r<   	positionsr   c                 .    |                      ||          S N)r   )rP   r   r   s      r:   rU   zOpenPanguMLAAttention.forward  s    
 }}Y666r<   )r   NNr?   )rV   rW   rX   r   rY   r   r    rZ   rM   r\   r]   rU   r^   r_   s   @r:   r   r     s)        (,+/26J
 J
 J
 J
 	J

 J
 J
 J
 4ZJ
 J
 "%J
 "D(J
 )4/J
 J
 
J
 J
 J
 J
 J
 J
X7<7 |7 
	7 7 7 7 7 7 7 7r<   r   c                        e Zd Zddddddej        fdedededed	ed
edz  dedede	dz  de
de
ddf fdZdej        dej        dej        fdZded
edz  ddfdZ xZS )OpenPanguEmbeddedAttentionr   NFr?   rb   r@   r   num_kv_headsr   rC   rD   bias_o_projr   rF   	attn_typerG   c                 6   t                                                       t          |
          }|| _        t	                      }|| _        | j        |z  dk    rt          d| j         d| d          | j        |z  | _        || _        | j        |k    r)| j        |z  dk    rt          d| j         d| d          | j        |k     r)|| j        z  dk    rt          d| d| j         d          t          d| j        |z            | _
        t          |d	d           }|| j        | j        z  }|| _        | j        | j        z  | _        | j
        | j        z  | _        | j        d
z  | _        || _        t#          || j        | j        | j        |||
 d          | _        t'          | j        | j        z  ||||
 d          | _        |                     ||           t-          |d          rn|j        }t1          |t2                    r|}nQt1          |t4                    r|t7          |          z  }||         }n!t          t9          |           d          d }t;          | j        | j        | j        | j
        |	||||
 d	  	        | _        d S )Nr   total_num_heads r   r   CNumber of KV heads is greater than TP size, but total_num_kv_heads z5Number of KV heads is less than TP size, but tp_size z( is not divisible by total_num_kv_heads ro   head_dimr   	.qkv_proj)r@   	head_sizetotal_num_headstotal_num_kv_headsrD   rC   rF   r   
input_sizeoutput_sizerD   rC   rF   )rC   interleaved_sliding_window1 for interleaved_sliding_window is not supported..attn)r   r   rC   per_layer_sliding_windowr   rF   )rL   rM   r+   r@   r   r   r9   r   r   maxr   getattrr   q_sizekv_sizer   r   r   qkv_projr   r   _init_rotary_embr   r   
isinstancerY   listlenr   r   attn)rP   rb   r@   r   r   r   rC   rD   r   r   rF   r   	layer_idxry   r   r   sliding_windowsw_idxrR   s                     r:   rM   z#OpenPanguEmbeddedAttention.__init__  s^    	'//	&688(')Q..:4#7 : :/6: : :   -8"."W,,1H71RVW1W1W :*.*A: :/6: : :   #g--'D<S2SWX2X2X U U U:>:QU U U    4#:g#EFF6:t44'4+??H nt}4(4=8}d*'>$)#m 0#6%'''
 
 
 (+dm;#%%%%
 
 
 	f<@@@6788 	")/)J&4c:: 	!;6== "S)C%D%DD!;F!C 677 G G G  
 "NNML*%%%3###

 

 

			r<   r   r   c                 "   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )Nr   dim)r   splitr   r   r   r   r   
rP   r   r   qkvr   qkvattn_outputoutputs
             r:   rU   z"OpenPanguEmbeddedAttention.forward  s    
 }--Q))T[$,E2)NN1ay!Q//1ii1a((KK,,	r<   c                     d}|o|                                 dk    }|r|j        dk    rd}t          | j        | j        |j        |          | _        d S )NTggufPanguEmbeddedFr   )get_name
model_typer!   r   r   r   r   )rP   rb   rC   r   is_ggufs        r:   r   z+OpenPanguEmbeddedAttention._init_rotary_emb  sl    
 D<#8#8#:#:f#D 	"v(O;;!M"M5"2'	
 
 
r<   )rV   rW   rX   r	   DECODERr   rY   r    r[   r   rZ   rM   r\   r]   rU   r   r^   r_   s   @r:   r   r     s[        (,26!+/&.b
 b
 b
 b
 	b

 b
 "%b
 )4/b
 b
 b
 "D(b
 b
 b
 
b
 b
 b
 b
 b
 b
H
<
 |
 
	
 
 
 

 
 )4/
 
	
 
 
 
 
 
 
 
r<   r   c                   $    e Zd Zdddddddej        fdedededed	eee	f         dz  d
ede
dz  dedededz  dededdf fdZdej        dej        fdZdej        dej        dej        fdZded	eee	f         dz  de
dz  ddfdZddZ xZS )OpenPanguSinkAttentionNr   Fr?   rb   r@   r   r   r   r   rC   rD   r   r   rF   r   rG   c                 p
   t                                                       t          |          }|| _        t	                      | _        t                      | _        || _        | j        | j        z  dk    r t          d| j         d| j         d          | j        | j        z  | _
        || _        | j        | j        k    r3| j        | j        z  dk    r t          d| j         d| j         d          | j        | j        k     r t          d| j         d| j         d          t          d	| j        | j        z            | _        t          |d
d           | _        t          |dd           | _        t          |dd           | _        | j        | j        z   | _        | j
        | j        z  | _        | j        | j        z  | _        | j        | j        z  | _        | j        dz  | _        || _        t          |dd          | _        t          |dd          | _        t          |dd           | _        t          |dd          | _        t9          || j        | j        z  | j        | j        z  | j        | j        z  g||| d          | _        t=          | j        | j        z  ||	|| d          | _        tA          | j        |j!                  | _"        | #                    |||           tI          |d          rn|j%        }tM          |tN                    r|}nQtM          |tP                    r|tS          |          z  }||         }n!t          tU          |           d          d }tW          j,        | j                   t[          | j
        | j        | j        | j        | j        |
|||| dtV          | j                  | _.        | j        dk    rHt^          j0        1                    t_          j2        | j        | j        | j        ftg          j4                    |j5                            | _6        to          | j6        d	| j8        d           | j        rt^          j0        1                    t_          j2        | j        | j        | j        ftg          j4                    |j5                            | _9        to          | j9        d	| j8        d           nDt_          j:        | j        | j        | j        ftg          j4                    |j5                  | _9        | ;                                 d S )Nr   r   r   r   r   zNumber of KV heads z is less than TP size z*, KV heads replication is not support yet.ro   qk_nope_dimqk_rope_dim
v_channelsr   param_sink_numberparam_sink_with_valueFparam_sink_scalarparam_sink_of_head_dimr   )r   output_sizesrD   rC   rF   r   r   r   )r   rC   r   r   r   )	sink_lenr   r   rC   r   r   rF   attn_backendhead_size_v)devicerh   )
output_dimweight_loader)<rL   rM   r+   r@   r   ry   r   r{   r   r9   r   r   r   r   r   r  r  r  r   r   k_sizev_sizer   r   r  r  r  param_sink_of_head_numr   r   r   r   r   r   k_layernormr   r   r   r   rY   r   r   r   r5   set_head_size_vr   r   r\   r   r   r   r2   current_devicetorch_dtypeparam_sink_keyr1   r!  param_sink_valuezerospost_weight_load)rP   rb   r@   r   r   r   r   rC   rD   r   r   rF   r   r   r   r   r   rR   s                    r:   rM   zOpenPanguSinkAttention.__init__$  s    	'//	&;==577($,.!33?4#7 ? ?/3|? ? ?   -=".#dl22'$,6!;; ?*.*A? ?/3|? ? ?  
 $t|33 Td&= T T<T T T    4#:dl#JKK"6=$??"6=$??!&,==(4+;;nt}4'$-7'$/9}d*'>$!(1Da!H!H%,V5Le%T%T"!(1Dd!K!K&-f6NPU&V&V#2"dl*dl*dl*
 %'''

 

 

 (+do=#%%%%
 
 
 #4=f6IJJJO, 	 	
 	
 	
 6788 	")/)J&4c:: 	!;6== "S)C%D%DD!;F!C 677 G G G  
 "N#3DODDD'NML+*%%%3###4
 
 
	 !A%%"'("4"4.)
 ,:<< ,  
# 
#D #"#%)%7    ) (-(:(:K 2 - O
  0>@@$0  
) 
)% !)&')-);     )..)
 ,:<< ,) ) )% 	r<   paramloaded_weightc                    t          |dd           }t          |dd          }t          |dd          }|p|}t          |dd          }t          |dd          }|r|                                |_        |rut          |t          j                  r[t          |j                  }|)||         | j        z  dk    sJ ||         | j        z  ||<   |	                    ||j
                   |j        }	|0|s.|	j        |         }
| j        |
z  }|                    |||
          }t          |j                  dk    r|                    d	          }|	j        |j        k    sJ |	                    |           d S )
Nr   is_sharded_weightFuse_bitsandbytes_4bitis_gguf_weightis_gguf_weight_typer   rg   ro   )r   itemweight_typer   r   UninitializedParameterr   r   ry   materializerh   datar{   narrowr   reshapecopy_)rP   r-  r.  r   r0  r1  r2  r3  final_shape
param_data
shard_size	start_idxs               r:   r!  z$OpenPanguSinkAttention.weight_loader  s   UL$77
#E+>FF '/F N N .F1F !(8%@@%e-BEJJ 	5 - 2 2 4 4E  	Fj0IJJ 	F}233K%":.=BBBB*5j*AT\*QJ'k1DEEEZ
!*;!#)*5Jz1I)00Y
SSM }"##q(()11!44M=#66666'''''r<   r   r   c           
      n   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |                    d| j        | j                            }| 	                    |||          \  }}|                    d| j                  }|                    d| j                  }| 
                    |||t          j        |j        d         |j        d         | j        z  | j        z  g                    }|                     |          \  }	}|	S )Nr   r   r   ro   )output_shape)r   r  r   r"  r#  r%  r   r   r   r   r   r\   Sizer   r  r   r  s
             r:   rU   zOpenPanguSinkAttention.forward  s   
 }--Q))T[$+t{C)LL1aQVVB(94=IIJJy!Q//1FF2t{##FF2t{##iiQWQZ4=84?JK 	   
 
 KK,,	r<   c                 p    d}d| j         | j        z  i}t          | j        | j        ||          | _        d S )NFpartial_rotary_factorr   )r  r   r!   r   r   )rP   rb   r   rC   r   s        r:   r   z'OpenPanguSinkAttention._init_rotary_emb  sH     2D4Dt}4TU"M5+'	
 
 
r<   c                     t          | d          r"| j        |                     | j                  }n| j        }| j                            || j                   d S )Nr%  )r   r%  r)  r   update_sink_kvr*  )rP   r)  s     r:   r,  z'OpenPanguSinkAttention.post_weight_load  s]    4'' 	1D,<,H!--d.ABBNN!0N	  1FGGGGGr<   rG   N)rV   rW   rX   r	   r  r   rY   dictrZ   r   r    r[   r   rM   r   r   r\   r]   r!  rU   r   r,  r^   r_   s   @r:   r  r  #  s        26'+26!+/&.i  i  i  i  	i 
 i  c3h$.i  "%i  )4/i  i  i  "D(i  i  i  
i  i  i  i  i  i V#(2< #( #( #( #( #(J< | 
	   0
 
 c3h$.
 )4/	

 

 
 
 
 H H H H H H H Hr<   r  c                   v     e Zd Zdedededdf fdZdej        dej        d	ej        dz  dej        fd
Z	 xZ
S )OpenPanguDecoderLayerrb   rF   vllm_configrG   Nc                 z   t                                                       ||j        j        }|j        }|j        }|j        }|j        | _        t          |dd          }t          |
                    d          d                   }|| _        t          |d          o/t          |d          ot          |d          ot          |d	          | _        t          |d
          o
|j        dk    | _        | j        rZt!          || j        |j        |j        |j        |j        t          |d          r|j        nd |j        |||| d          | _        n|| j        rt          |dd          pt          |dd          }	|	}
t          |d          r|j        }	t          |dd          rt2          j        }nt7          d|j         d          t          |dd           }|
d|j        d}t=          || j        |j        t          |d|j                  ||||	|
|| d|          | _        nt          |dd          pt          |dd          }	|	}
t          |d          r|j        }	t          |dd          rt2          j        }nt2          j        }tA          || j        |j        t          |d|j                  |||	|
|| d|          | _        t          |dd           '||j!        k    rtE          |||| d          | _#        n;tI          | j        |j%        |j&        |t          |d d          | d!          | _#        t          |d"d#          | _'        |j(        | _(        t          |d$| j(                  | _!        tS          |j        |j*        %          | _+        tS          |j        |j*        %          | _,        t[                      j.        | _/        t          |d&d          | _0        | j0        rBtS          |j        |j*        %          | _1        tS          |j        |j*        %          | _2        d S d S )'Nr   r   r   )sepr   r   r   r   r   r  r   r   z
.self_attn)rb   r@   r   r   r   r   r   r   r   r   rC   rF   attention_biasFrD   qkv_bias	is_causalTz
is_causal=z' is not support for attention with sinkrope_scalingdefault)r   r   num_key_value_heads)rb   r@   r   r   r   r   rC   rD   r   r   rF   r   )rb   r@   r   r   r   rC   rD   r   r   rF   r   r   z.mlp)rb   rc   rC   rF   mlp_bias)r@   rA   rB   rC   rD   rF   rs   rt   first_k_dense_replacer   sandwich_norm)3rL   rM   model_config	hf_configr   rC   rc   r@   r   rY   r  r   r   use_mlar  use_sink_attentionr   num_attention_headsr   r   r   r   r   	self_attnrO  r	   r  r9   rP  r   r  ENCODER_ONLYr   rU  ra   mlpr>   rA   rB   rs   num_hidden_layersr   r   input_layernormpost_attention_layernormr   r|   tp_grouprV  pre_mlp_layernormpost_mlp_layernorm)rP   rb   rF   rK  r   rC   rc   r   r   rN  r   r   r   rR   s                r:   rM   zOpenPanguDecoderLayer.__init__&  s    	> -7F"/"/%5!-")&2KT"R"R--b122	" F.// 0 2330--0 //	 	 F/00QV5MPQ5Q 	 < R	2 , 4!'!8!'!8!,*1&-*H*HRF&&d#0(?)) ,,,  DNN  $ A	$V-=uEE J JN )Kvz** 1!'v{D11 )1		 .!1 . . .   &fndCCO&!*"("3# # 4 , 4$163M  !0(?)#') ,,,#  DNN" %V-=uEE J JN )Kvz** 1!'
 v{D11 7)1		)6	7 , 4$163M  )@)#') ,,,#  DN" F.55AV999# /) 	  DHH $ ,"(":!,)VZ77   DH &-V5Lc%R%R"!'!9%,+T-C&
 &
"  'v'9v?RSSS(/F$7)
 )
 )
% %3$V_eDD 	%,"(;& & &D" '."(;' ' 'D###		 	r<   r   r   residualc                    |*|                                 }|                     |          }n|                     ||          \  }}|                     ||          }| j        :|j        t
          j        k    r%|d| j        z  z  }| j        dk    r|d| j        z  z  }| j        r/| 	                    |          }| 
                    ||          \  }}n| 	                    ||          \  }}|                     |          }| j        <t          | j        t                    r"|j        t
          j        k    r|d| j        z  z  }| j        r|                     |          }||fS )N)r   r   rt   r   )cloner`  r\  rs   rh   r\   r   r   rV  ra  rc  r^  r   r>   rd  )rP   r   r   re  s       r:   rU   zOpenPanguDecoderLayer.forward  s    $**,,H 00??MM&*&:&:=(&S&S#M8' ' 
 
 &2#u}44
 S4#===M~"" C$"<<< 	 99-HHM&*&<&<]H&U&U#M88&*&C&Cx' '#M8
 // &248\22 3#u}44S4#===M 	C 33MBBMh&&r<   )rV   rW   rX   r   rZ   r   rM   r\   r]   rU   r^   r_   s   @r:   rJ  rJ  %  s        V V V  	V
 
V V V V V Vp3'<3' |3' ,%	3'
 
3' 3' 3' 3' 3' 3' 3' 3'r<   rJ  c                       e Zd ZdZdddedef fdZdej        dej        fd	Z		 ddej        dej        de
d
z  dej        d
z  dej        e
z  f
dZdeeeeef                  deeef         dedej        dee         defdZdeeeeeef                  deeef         dedej        dee         deeef         defdZdeeeej        f                  dee         fdZddZ xZS )OpenPanguModelFr?   rF   rK  rF   c                   t                                                       j        j        j        }j        j        }| _        |j        | _        j	        | _
        j        | _        t                      j        sj        r9t                      j        r&t!          j        j        || d          | _        nt'                      | _        t)          j        fd| d          \  | _        | _        | _        t                      j        r!t3          j        j                  | _        nt'                      | _        t9          ddgj                  | _        d S )	Nz.embed_tokensrC   rF   c                 &    t          |           S r   )rJ  )rF   rb   rK  s    r:   <lambda>z)OpenPanguModel.__init__.<locals>.<lambda>  s    0MM r<   z.layersrj  r   r   re  )rL   rM   rW  rX  rC   rc   r   rb   rw   pad_token_idpadding_idx
vocab_sizer   is_first_ranktie_word_embeddingsis_last_rankr#   r@   embed_tokensr*   r.   r_  start_layer	end_layerlayersr   r   normr-   make_empty_intermediate_tensors)rP   rK  rF   rC   r   rb   rR   s    `   @r:   rM   zOpenPanguModel.__init__  sr   )3"/!1=%0%F"!. +>>' 
	1&
	1+7>>+F
	1 !7!") ///	! ! !D !/ 0 0D8C$MMMMM%%%9
 9
 9
5$.$+ >>& 	) 28KLLLDII&((DI/Vj)6+=0
 0
,,,r<   	input_idsrG   c                 ,    |                      |          S r   )ru  rP   r{  s     r:   embed_input_idszOpenPanguModel.embed_input_ids  s      +++r<   Nr   intermediate_tensorsinputs_embedsc                 ~   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j                  D ]}| j        |         } ||||          \  }} t                      j        st          ||d          S | 	                    ||          \  }}	|S )Nr   re  )r   re  )
r   rr  r~  rangerv  rw  rx  rt  r3   ry  )
rP   r{  r   r  r  r   re  ilayerr   s
             r:   rU   zOpenPanguModel.forward!  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7Ht'88 	P 	PAKNE&+eI}h&O&O#M88~~* 	&"/XFF    99]H==qr<   attn_mlp_replace_mappingparams_dictweight_namer.  loaded_paramsc                 4   |D ]\  }}}||vsd|v r||vr|                     ||          }	|dk    r|	|vr4|	}|                    d          r||vrPt          ||           ra||         }
|
j        } ||
||           |                    |            dS dS )Nzmlp.experts.r   .biasTF)replaceendswithr,   r!  add)rP   r  r  r  r.  r  
param_nameorigin_nameshard_idweight_name_mappedr-  r!  s               r:   load_attn_mlp_weightz#OpenPanguModel.load_attn_mlp_weight?  s     2J 	 	-JX+--;..K{4R4R!,!4!4[*!M!M000&k990##G,, K1O1O&{D99 ,E!/MM%999k***44ur<   expert_merge_mapping	flag_dictc           	      B   |D ]}|\  }}	}
}|	|vrd|d<   |                     |	|          }t          ||           r:||         }t          j        t          dt
          f         |j                  } ||||||
d          }|r|}|                    |            dS dS )NTis_expert_weight.)r  	expert_idreturn_successF)r  r,   typingcastr   r[   r!  r  )rP   r  r  r  r.  r  r  mappingr  r  r  r  r  r-  r!  successs                   r:   load_expert_weightz!OpenPanguModel.load_expert_weight`  s     , 	 	G;B8JY+--,0I()!,!4!4[*!M!M&'94@@  23E"Kd(;U=PQQM#m"!##  G  0!!"4555tt ur<   weightsc           	      P   g d}t          | j        d          }|r)t          j        | ddd| j        j        | j                  }t          |                                           }t                      }|D ]\  }}d|v r| j        j	        rd|v rd	|v rt          | j        d
          rw| j        j
        dk    rgt          |                    d          d                             d          d                   }	|	| j        j        z
  }
|
dk    r|
| j        j
        k     rddi}|                     |||||          s|r|                     ||||||          r|d         r|                    d          r||vr
t#          ||          }|                    d          r|                    dd          }|It'          ||           r[||         }t)          |dt*                    } |||           |                    |           |                                  |S )N))r   r   r  )r   z.k_projr  )r   z.v_projr  )r   z	.q_a_projr   )r   r   ro   )rJ   z
.gate_projr   )rJ   z.up_projro   r   	gate_projrO   up_proj)ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namerj   rw   zrotary_emb.inv_freqzlm_head.weightrx  num_nextn_predict_layersr   zlayers.r   r   r  Fr  ru   zgate.e_score_correction_biasr!  )r   rb   r   make_expert_params_mappingr   rw   rH  named_parameterssetrs  r  rY   r  r_  r  r  r  r%   r  r,   r   r$   r  r,  )rP   r  r  has_expertsr  r  r  namer.  r   mtp_idxr  r-  r!  s                 r:   load_weightszOpenPanguModel.load_weights  s   $
 $
 $
  dk+=>> 	#1#L$/$/"+ K8&*&@$ $ $  4002233"%%%#* 6	( 6	(D-$,,{. 3Ct3K3K D  DK)CDD ![9A==

9 5 5b 9 ? ? D DQ GHH	#dk&CCa<<Gdk.R$R$R+U3I)),!! %( %( ++(!! %($ /0 ==)) d+.E.E0{CC==!:;; <<13Q D <*466 #D) '@U V Ve]333!!$''''r<   c                     |                                  D ].\  }}|| u r
t          |d          r|                                 /d S )Nr,  )named_modulesr   r,  )rP   r  modules      r:   r,  zOpenPanguModel.post_weight_load  s]     ..00 	* 	*LD&~~v122 *'')))		* 	*r<   r   rG  )rV   rW   rX   fall_back_to_pt_during_loadr   rZ   rM   r\   r]   r~  r3   rU   r   tuplerY   rH  r   r  r[   r  r  r   r  r,  r^   r_   s   @r:   ri  ri    s,       "'AC $
 $
 $
z $
3 $
 $
 $
 $
 $
 $
L, ,%, , , , , .2 < < 2D8	
 |d* 
+	+   <"&uS#s]';"< #s(^ 	
 | 3x 
   B"5c3);#<= #s(^ 	
 | 3x T	? 
   BPHU33D-E$F P3s8 P P P Pd* * * * * * * *r<   ri  c                   ,    e Zd Zg dddgdZdddedef fd	Zd
ej        dej        fdZ		 	 dd
ej        dej        de
dz  dej        dz  dej        e
z  f
dZdej        dej        dz  fdZdeeeej        f                  dee         fdZ xZS )OpenPanguModelBase)r   k_projv_projr  r  )r   rN   r?   rj  rK  rF   c          	      z   t                                                       |j        j        }|j        }|| _        || _        t          |d          o|j        d u| _        | j        rddg| j	        d<   t          |t          |d                    | _        t                      j        rSt          |j        |j        |t          |d                    | _        |j        r| j        j        j        | j        _        nt-                      | _        t/          |j                  | _        | j        j        | _        d S )	Nr   q_a_projr   r   modelrK  rF   lm_headrl  )rL   rM   rW  rX  rC   rb   r   r   fuse_qkv_a_projpacked_modules_mappingri  r/   r  r   rt  r"   rq  r@   r  rs  ru  weightr*   r   logits_processorrz  )rP   rK  rF   rb   rC   rR   s        r:   rM   zOpenPanguModelBase.__init__  sC   )3"/( FM**Mv/A/M 	  	$?D'(:;
 $#L,I,I
 
 

 >>& 
	,)!")#FI66	  DL ) E&*j&=&D#)++DL /0A B BJ6 	,,,r<   r{  rG   c                 6    | j                             |          S r   )r  r~  r}  s     r:   r~  z"OpenPanguModelBase.embed_input_ids  s    z)))444r<   Nr   r  r  c                 6    |                      ||||          }|S r   )r  )rP   r{  r   r  r  r   s         r:   rU   zOpenPanguModelBase.forward  s)     

y"6
 
 r<   r   c                 <    |                      | j        |          }|S r   )r  r  )rP   r   logitss      r:   compute_logitsz!OpenPanguModelBase.compute_logits  s      &&t|]CCr<   r  c                 l    t          | | j        j        rdgnd           }|                    |          S )Nzlm_head.)skip_prefixes)r)   rb   rs  r  )rP   r  loaders      r:   r  zOpenPanguModelBase.load_weights  sC    "+/;+JTJ<<PT
 
 
 ""7+++r<   )NN)rV   rW   rX   r  r   rZ   rM   r\   r]   r~  r3   rU   r  r   r  r  r  r^   r_   s   @r:   r  r    sq       222$i0 
 BD !
 !
 !
z !
3 !
 !
 !
 !
 !
 !
F5 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,r<   r  c                   B     e Zd Zdddedef fdZdededd	fd
Z xZS )OpenPanguMoEModelr?   rj  rK  rF   c                p   t                                          ||           |j        j        }g | _        |j        |j        z
  | _        d| _        g | _	        d }| j
        j        D ]t}t          |t                    rt          |t                    sJ t          |j        t                     r+|j        }| j	                            |j        j                   u|t'          d          |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        d S )Nr  ro   z#No MOE layer found in model.layers.)rL   rM   rW  rX  expert_weightsr_  rU  num_moe_layersnum_expert_groups
moe_layersr  rx  r   r*   rJ  r^  ra   appendr   RuntimeErrorr   num_logical_expertsr   num_physical_expertsr   num_local_physical_expertsr   r   r   rw   )rP   rK  rF   rb   example_moer  rR   s         r:   rM   zOpenPanguMoEModel.__init__#  s0   [@@@)3 !$69UU!"Z& 	: 	:E%00 e%:;;;;;%)\22 :#i&&uy'8999DEEE#.#@ $/$B!*5*N' + < + <%0%D"""r<   r  r  rG   Nc                     | j         |k    sJ || _        || _         || j        z
  | _        | j        j        D ]V}t          |j        t                    r:|j        }||_	        ||_
        | j        |_        |j                                         Wd S r   )r  r  r  rw   r  rx  r   r^  ra   r   r   r   r   update_expert_map)rP   r  r  r  moes        r:    update_physical_experts_metadataz2OpenPanguMoEModel.update_physical_experts_metadataB  s    
 .2LLLLL$8!*D'%9D<T%T"Z& 	0 	0E%)\22 0i/I,)=&*.*D'--///	0 	0r<   )	rV   rW   rX   r   rZ   rM   rY   r  r^   r_   s   @r:   r  r  "  s        AC E E Ez E3 E E E E E E>0!0 %(0 
	0 0 0 0 0 0 0 0r<   r  c                   .     e Zd Zdddedef fdZ xZS )OpenPanguEmbeddedModelr?   rj  rK  rF   c                N    t                                          ||           d S )Nr  )rL   rM   )rP   rK  rF   rR   s      r:   rM   zOpenPanguEmbeddedModel.__init__U  s&    [@@@@@r<   )rV   rW   rX   r   rZ   rM   r^   r_   s   @r:   r  r  T  sf        AC A A Az A3 A A A A A A A A A Ar<   r  c                       e Zd ZdS )PanguEmbeddedForCausalLMNrV   rW   rX   rx   r<   r:   r  r  Y          Dr<   r  c                       e Zd ZdS )PanguUltraMoEForCausalLMNr  rx   r<   r:   r  r  ]  r  r<   r  c                       e Zd ZdS )PanguProMoEV2ForCausalLMNr  rx   r<   r:   r  r  a  r  r<   r  )]r  collections.abcr   r   r   r\   r   transformersr   vllm.attention.layerr   r	   vllm.compilation.decoratorsr
   vllm.configr   r   r   vllm.distributedr   r   r   r   r   r   %vllm.model_executor.layers.activationr   :vllm.model_executor.layers.attention.static_sink_attentionr   $vllm.model_executor.layers.fused_moer   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   r   r   +vllm.model_executor.layers.logits_processorr   vllm.model_executor.layers.mlar   r   'vllm.model_executor.layers.quantizationr    +vllm.model_executor.layers.rotary_embeddingr!   3vllm.model_executor.layers.vocab_parallel_embeddingr"   r#   -vllm.model_executor.model_loader.weight_utilsr$   r%   %vllm.model_executor.models.interfacesr&   r'   r(    vllm.model_executor.models.utilsr)   r*   r+   r,   r-   r.   r/   r0   vllm.model_executor.utilsr1   vllm.platformsr2   vllm.sequencer3   vllm.transformers_utils.configr4   ,vllm.v1.attention.backends.flash_attn_diffkvr5   rZ   r;   Moduler>   ra   r   r   r  rJ  ri  r  r  r  r  r  r  rx   r<   r:   <module>r     s  0  . . . . . . . .              ) ) ) ) ) ) 9 9 9 9 9 9 9 9 = = = = = = ? ? ? ? ? ? ? ? ? ?                = < < < < <      @ ? ? ? ? ? 8 8 8 8 8 8              H G G G G G V V V V V V V V F F F F F F @ @ @ @ @ @                       
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 7 6 6 6 6 6 + + + + + + - - - - - - A A A A A A T T T T T T
S 
 
 
 
#G #G #G #G #G29 #G #G #GLA@ A@ A@ A@ A@29 A@ A@ A@HR7 R7 R7 R7 R7BI R7 R7 R7j@
 @
 @
 @
 @
 @
 @
 @
FH H H H HRY H H HDL' L' L' L' L'BI L' L' L'^ c* c* c* c* c*RY c* c* c*LD, D, D, D, D,J D, D, D,N/0 /0 /0 /0 /0*,< /0 /0 /0dA A A A A/ A A A
	 	 	 	 	5 	 	 		 	 	 	 	0 	 	 		 	 	 	 	0 	 	 	 	 	r<   