
    .`i8                     4   d Z ddlZddlmZmZ ddlmZ ddlZddl	Z	ddl	m
Z
 ddlmZ ddlmZ ddlmZ dd	lmZmZmZ dd
lmZmZmZmZ ddlmZ ddlmZ ddlmZ ddl m!Z!m"Z"m#Z#m$Z$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z-m.Z. ddl/m0Z0m1Z1 ddl2m3Z3 ddl4m5Z5 ddl6m7Z7m8Z8m9Z9 ddl:m;Z;m<Z<m=Z=m>Z>m?Z? dede@fdZAdedeBfdZC G d de
jD                  ZE G d d e
jD                  ZF G d! d"e
jD                  ZG G d# d$e
jD                  ZH G d% d&e
jD                  ZI edd'ddd()           G d* d+e
jD                              ZJ G d, d-e
jD        e8e9          ZK G d. d/eKe7          ZL G d0 d1eK          ZM G d2 d3eM          ZN G d4 d5eL          ZOdS )6zAInference-only HunYuan model compatible with HuggingFace weights.    N)CallableIterable)islice)nn)PretrainedConfig)	Attention)support_torch_compile)CacheConfig
VllmConfigget_current_vllm_config)get_ep_groupget_pp_group$get_tensor_model_parallel_world_size tensor_model_parallel_all_reduce)
SiluAndMul)SharedFusedMoE)RMSNorm)ColumnParallelLinearMergedColumnParallelLinearQKVParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors)AttentionType   )MixtureOfExpertsSupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayeris_pp_missing_parametermake_layersmaybe_prefixconfigreturnc                     t          | dd           }t          |t                    r|dk    S t          |t                    r0|r.t	          d |D                       rt          |          dk    S dS dS )Nnum_expertsr"   c              3   @   K   | ]}t          |t                    V  d S N)
isinstanceint).0es     y/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/hunyuan_v1.py	<genexpr>z_is_moe.<locals>.<genexpr>U   s,      77az!S!!777777    F)getattrr1   r2   listallmax)r+   r.   s     r5   _is_moer<   O   s    &-66K+s## Q+t$$  77;77777 	{##a''55r7   c                 J    t          | dd          sdS t          | dd          S )Nuse_claFr"   cla_share_factor)r8   )r+   s    r5   _get_cla_factorr@   \   s.    69e,, q6-q111r7   c                   V     e Zd Z	 	 	 	 ddededededz  d	ed
ededdf fdZd Z xZ	S )
HunYuanMLPNF Thidden_sizeintermediate_size
hidden_actquant_configbiasprefixreduce_resultsr,   c                    t                                                       t          ||gdz  ||| d          | _        t	          ||||| d|          | _        |dk    rt          d| d          t                      | _        d S )	N   .gate_up_proj)
input_sizeoutput_sizesrH   rG   rI   z
.down_proj)rN   output_sizerH   rG   rI   rJ   siluzUnsupported activation: z!. Only silu is supported for now.)	super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fn)	selfrD   rE   rF   rG   rH   rI   rJ   	__class__s	           r5   rS   zHunYuanMLP.__init__c   s     	6"+,q0%+++
 
 
 +(#%((()
 
 
 X:XXX   !llr7   c                     |                      |          \  }}|                     |          }|                     |          \  }}|S r0   )rT   rW   rU   )rX   xgate_up_s       r5   forwardzHunYuanMLP.forward   sD    &&q))
KK  ~~a  1r7   )NFrC   T)
__name__
__module____qualname__r2   strr   boolrS   r^   __classcell__rY   s   @r5   rB   rB   b   s         37## ## # 	#
 )4/# # # # 
# # # # # #@      r7   rB   c                        e Zd Z	 	 	 	 	 	 ddededed	ed
ededz  dededz  dededdf fdZ		 dde
j        de
j        dee
j                 dz  de
j        fdZ xZS )HunYuanAttention    NFrC   r+   rD   	num_headsnum_kv_headsmax_position_embeddingsrG   rH   cache_configrI   layer_idr,   c           
         t                                                       || _        t                      }|| _        | j        |z  dk    sJ | j        |z  | _        || _        | j        |k    r| j        |z  dk    sJ n|| j        z  dk    sJ t          d| j        |z            | _        t          |d          r|j
        r|j
        | _
        n1t          |d          r|j        | _
        n| j        | j        z  | _
        | j        | j
        z  | _        | j        | j
        z  | _        | j
        dz  | _        || _        t!          |dd          | _        |
| _        t'          || j
        | j        | j        |||	 d	          | _        t+          | j        | j
        z  ||||	 d
          | _        t/          | j
        ||j        d          | _        t5          | j        | j
        | j        | j        |||	 d          | _        | j        rBt9          | j
        |j                  | _        t9          | j
        |j                  | _        d S d S )Nr   r"   head_dimattention_head_dim      use_qk_normF	.qkv_proj)rD   	head_sizetotal_num_headstotal_num_kv_headsrH   rG   rI   .o_projrN   rP   rH   rG   rI   Tmax_positionrope_parametersis_neox_style.attn)rk   rm   rG   rI   eps) rR   rS   rD   r   rv   rj   rw   r;   rk   hasattrrp   rq   q_sizekv_sizescalingrl   r8   rs   rn   r   qkv_projr   o_projr   r|   
rotary_embr   attnr   rms_norm_epsquery_layernormkey_layernormrX   r+   rD   rj   rk   rl   rG   rH   rm   rI   rn   tp_sizerY   s               r5   rS   zHunYuanAttention.__init__   s    	&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF6:&& 	E6? 	E"ODMMV122 	E"5DMM ,0DDDMnt}4(4=8}d*'>$"6=%@@ )#m 0#6%'''
 
 
 (+dm;#%%%%
 
 
 #M0"2	
 
 
 NML*%%###
 
 
	  	Q#*4=f>Q#R#R#RD !(F<O!P!P!PD	Q 	Qr7   	positionshidden_states	kv_statesc                    |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          \  }}|}	| j        r|                     |                    d| j        | j	                  
                                          }|                     |                    d| j        | j	                  
                                          }|                     |||          }
|
                    |j        d         d          }
|                     |
          \  }}||	|ffS )Nri   dimr   )r   splitr   r   r   rs   r   viewrj   rp   
contiguousr   rk   r   shaper   )rX   r   r   r   qkvr]   qkvori_kattn_outputoutputs               r5   r^   zHunYuanAttention.forward   s3    }--Q))T[$,E2)NN1ay!Q//1 	$$r4>4=99DDFF A ""r4,dm<<GGII A ii1a((!&&qwqz266KK,,	qz!!r7   rh   NFNrC   ri   r0   r_   r`   ra   r   r2   r   rc   r
   rb   rS   torchTensortupler^   rd   re   s   @r5   rg   rg      sG        (,26+/OQ OQ OQ OQ 	OQ
 OQ "%OQ )4/OQ OQ "D(OQ OQ OQ 
OQ OQ OQ OQ OQ OQj 15	" "<" |" &-	"
 
" " " " " " " "r7   rg   c                        e Zd Z	 	 	 	 	 	 ddededed	ed
ededz  dededz  dededdf fdZ		 dde
j        de
j        dee
j                 dz  de
j        fdZ xZS )HunYuanCrossAttentionrh   NFrC   ri   r+   rD   rj   rk   rl   rG   rH   rm   rI   rn   r,   c           
         t                                                       || _        t                      }|| _        | j        |z  dk    sJ | j        |z  | _        || _        | j        |k    r| j        |z  dk    sJ n|| j        z  dk    sJ t          d| j        |z            | _        t          |d          r|j
        | _
        n1t          |d          r|j        | _
        n| j        | j        z  | _
        | j        | j
        z  | _        | j        | j
        z  | _        | j
        dz  | _        || _        t!          |dd          | _        |
| _        t'          |||||	 d	          | _        t+          | j        | j
        z  ||||	 d
          | _        t/          | j
        ||j        d          | _        t5          | j        | j
        | j        | j        |||	 dt6          j                  | _        | j        rBt=          | j
        |j                  | _         t=          | j
        |j                  | _!        d S d S )Nr   r"   rp   rq   rr   rs   F.q_projrH   rG   rI   rx   ry   Trz   r~   )rk   rm   rG   rI   	attn_typer   )"rR   rS   rD   r   rv   rj   rw   r;   rk   r   rp   rq   r   r   r   rl   r8   rs   rn   r   q_projr   r   r   r|   r   r   r!   ENCODER_DECODERr   r   r   r   r   r   s               r5   rS   zHunYuanCrossAttention.__init__   s    	&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF6:&& 	E"ODMMV122 	E"5DMM ,0DDDMnt}4(4=8}d*'>$"6=%@@ *%%%%
 
 
 (+dm;#%%%%
 
 
 #M0"2	
 
 
 NML*%%####3	
 	
 	
	  	Q#*4=f>Q#R#R#RD !(F<O!P!P!PD	Q 	Qr7   r   r   r   c                 v   |J |\  }}|}|                      |          \  }}t          j        |          }	|                     |||	          \  }}| j        r|                     |                    d| j        | j                  	                                          }| 
                    |                    d| j        | j                  	                                          }|                     |||          }
|
                    |j        d         d          }
|                     |
          \  }}|||ffS )Nri   r   )r   r   
empty_liker   rs   r   r   rj   rp   r   r   rk   r   r   r   )rX   r   r   r   r   r   r   r   r]   k_tmpr   r   s               r5   r^   zHunYuanCrossAttention.forwardF  s0    $$$q{{=))1 ##y!U331 	$$r4>4=99DDFF A ""r4,dm<<GGII A ii1a((!&&qwqz266KK,,	qz!!r7   r   r0   r   re   s   @r5   r   r      sG        (,26+/NQ NQ NQ NQ 	NQ
 NQ "%NQ )4/NQ NQ "D(NQ NQ NQ 
NQ NQ NQ NQ NQ NQh 15	" "<" |" &-	"
 
" " " " " " " "r7   r   c                   h     e Zd Z	 	 	 	 ddededz  deded	ef
 fd
Zde	j
        de	j
        fdZ xZS )HunYuanSparseMoeBlockNri   rC   Fr+   rG   rn   rI   enable_eplbc                    t                                                       t                      | _        t	                      j        | _        t	                      j        | _        | j        	                                | _
        |j        | _        | j        |j        k    r t          d| j         d|j         d          t          |j        t                     r0|dk    sJ t#          |j                  |k    sJ |j        |         }n|j        }|j        }|j        .t          |j        t(                    r|j        n|j        |         }t+                      }|j        j        }	|| _        | j        | _        |	j        | _        | j        | j        z   | _        | j        | j
        z  | _        | j        | j        z  | _        | j        | j        z   | _        tA          |j!        |j        dd | d          | _"        |j#        dk    rt          |j$        t                     r0|dk    sJ t#          |j$                  |k    sJ |j$        |         }
n|j$        }
tK          |j!        |j        |
z  |j&        |d| d	          | _'        nd | _'        tQ          | j'        | j        ||j!        |d|d
k    || d| j        | j                  | _)        d S )NzTensor parallel size z' is greater than the number of experts .r   Fz.gater   z.shared_mlp)rD   rE   rF   rG   rJ   rI   r"   z.experts)shared_expertsr.   top_krD   rE   rJ   renormalizerG   rI   r   num_redundant_experts)*rR   rS   r   r   r   device_groupep_grouprank_in_groupep_ranksizeep_sizer.   n_routed_expertsrV   r1   moe_topkr9   lenrE   moe_intermediate_sizer2   r   parallel_configeplb_configr   n_logical_expertsr   n_redundant_expertsn_physical_expertsn_local_physical_expertsphysical_expert_startphysical_expert_endr   rD   gateuse_mixed_mlp_moenum_shared_expertrB   rF   
shared_mlpr   experts)rX   r+   rG   rn   rI   r   r   rE   vllm_configr   r   rY   s              r5   rS   zHunYuanSparseMoeBlock.__init__b  s    	;==$3#~~3}))++ & 2<&,,,? ? ?)/);? ? ?   fot,, 	$q====v''(2222OH-EEOE #4'3 f:C@@<,,1(;  .//!1=&!%!6#.#D "&"84;S"S(,(?4<(O%%)\D4Q%Q"&)FF 	  %###
 
 
	 #a'' &2D99 =1}}}}6344x????$*$<X$F!!$*$<!("."(":=N"N!,)$ ---  DOO #DO%?-*/ 	%&&&("&":
 
 
r7   r   r,   c                 B   |j         }|j         d         }|                    d|          }|                     |          \  }}|                     ||          }| j        |d         |d         z   }| j        dk    rt          |          }|                    |          S )Nri   )r   router_logitsr   r"   )r   r   r   r   r   r   r   )rX   r   
orig_shape
hidden_dimr   r]   final_hidden_statess          r5   r^   zHunYuanSparseMoeBlock.forward  s    "(
"(,
%**2z::  99]33q"ll'} + 
 
 ?&"5a"8;Nq;Q"Q<!"BCV"W"W"''
333r7   )Nri   rC   F)r_   r`   ra   r   r   r2   rb   rc   rS   r   r   r^   rd   re   s   @r5   r   r   a  s         37!]
 ]
 ]
 )4/]
 	]

 ]
 ]
 ]
 ]
 ]
 ]
 ]
~4U\ 4el 4 4 4 4 4 4 4 4r7   r   c                        e Zd Z	 	 	 	 	 ddededz  dedz  ded	ed
eddf fdZ		 dde
j        de
j        de
j        dz  dee
j                 dz  dee
j        e
j        f         f
dZ xZS )HunYuanDecoderLayerNrC   ri   Fr+   rm   rG   rI   rn   r   r,   c                 h   t                                                       |dk    sJ || _        |j        | _        t	          |j        t                    r|j        n|j        |         | _        t          |dd          }t          |dd          pt          |dd          }t          |          }	|dk    r||	z  dk    rt          j
        nt          j        }
|
t          j        k    r@t          || j        |j        t          |d|j                  ||||| d|	
  
        | _        nb|
t          j
        k    r@t          || j        |j        t          |d|j                  ||||| d|	
  
        | _        nt!          d
|
           t#          |          rt%          |||| d|          | _        n;t)          | j        | j        |j        |t          |dd          | d          | _        t-          |j        |j                  | _        t-          |j        |j                  | _        d S )Nr   rl   rh   attention_biasFrH   num_key_value_headsz
.self_attn)
r+   rD   rj   rk   rl   rG   rH   rm   rI   rn   zUnsupported attention type: z.mlp)r+   rG   rn   rI   r   mlp_bias)rD   rE   rF   rG   rH   rI   r   )rR   rS   rn   rD   r1   rE   r2   r8   r@   r!   r   DECODERrg   num_attention_heads	self_attnr   RuntimeErrorr<   r   mlprB   rF   r   r   input_layernormpost_attention_layernorm)rX   r+   rm   rG   rI   rn   r   rl   r   
cla_factorattention_typerY   s              r5   rS   zHunYuanDecoderLayer.__init__  s    	1}}}} !- &2C884F$$)(3 	
 #*&2KT"R"R )95AA 
WFEF
 F
 %V,,
 1}}J!6!!;!; ))& 	
 ]222- , 4$163M  )@)#) ,,,!  DNN }<<<2 , 4$163M  )@)#) ,,,!  DNN NnNNOOO6?? 	,)! '  DHH " ,"&"8!,)VZ77   DH  'v'9v?RSSS(/F$7)
 )
 )
%%%r7   r   r   residualr   c                    ||}|                      |          }n|                      ||          \  }}|                     |||          \  }}|                     ||          \  }}|                     |          }|||fS )N)r   r   r   )r   r   r   r   )rX   r   r   r   r   ori_kv_statess         r5   r^   zHunYuanDecoderLayer.forward*  s     $H 00??MM&*&:&:=(&S&S#M8'+~~' (6 (
 (
$} #'"?"?x"X"Xx//h55r7   )NNrC   ri   Fr0   )r_   r`   ra   r   r
   r   rb   r2   rc   rS   r   r   r   r^   rd   re   s   @r5   r   r     s#        ,026!R
 R
 R
 "D(R
 )4/	R

 R
 R
 R
 
R
 R
 R
 R
 R
 R
r 156 6<6 |6 ,%	6
 &-6 
u|U\)	*6 6 6 6 6 6 6 6r7   r   ri   )	input_idsr   intermediate_tensorsinputs_embeds)dynamic_arg_dimsc                   &    e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d	z  d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
dej        fdZdeeeeeef                  fdZdeeeej        f                  fdZ xZS )HunYuanModelrC   rI   r   rI   c                   t                                                       |j        j        |j        |j        |j        j        }|j        j        |j	        | _	        | _
        | _        j        | _        j        | _        t                      j        sj        r5t                      j        r"t%          | j        j                  | _        nt+                      | _        t-          j        fd| d          \  | _        | _        | _        t                      j        r"t7          j        j                  | _        d S t+                      | _        d S )N)rG   c                 z    t          t          |                     d          d                   |           S )Nr   ri   )r+   rn   rm   rG   rI   r   )r   r2   r   )rI   rm   r+   r   rG   s    r5   <lambda>z'HunYuanModel.__init__.<locals>.<lambda>k  sB    .V\\#..r233))'   r7   z.layersr   r   )rR   rS   model_config	hf_configrm   rG   r   r   r   r   r+   pad_token_idpadding_idx
vocab_sizer   is_first_ranktie_word_embeddingsis_last_rankr   rD   embed_tokensr'   r)   num_hidden_layersstart_layer	end_layerlayersr   r   norm)	rX   r   rI   r   rm   r+   r   rG   rY   s	       @@@@r5   rS   zHunYuanModel.__init__N  sy   )3"/"/!1=!1=%0%F"(!. +>>' 		1&		1+7>>+F		1 !7")! ! !D !/ 0 0D8C$       %%%9
 9
 9
5$.$+ >>& 	) 28KLLLDIII&((DIIIr7   r   r,   c                 ,    |                      |          S r0   )r   rX   r   s     r5   embed_input_idszHunYuanModel.embed_input_idsz  s      +++r7   Nr   r   r   c                    t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j                  }d }t          t          | j        | j        | j	                            D ];\  }	}
 |
||||          \  }}}t          | j        dd          r|	|z  dk    r|}9d }<t                      j        st          ||d          S |                     ||          \  }}|S )Nr   r   r>   Fr   r   r   )r   r   r  r@   r+   	enumerater   r   r   r   r8   r   r    r   )rX   r   r   r   r   r   r   r   prev_kv_statesilayerr   r]   s                r5   r^   zHunYuanModel.forward}  sG    >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7H$T[11
!4; 0$.AA
 
 	& 	&HAu 27	2 2.M8Y t{Iu55 &!j.A:M:M!*!%~~* 	&"/XFF    99]H==qr7   r   c                 F   | j         j        }t          | j         d| j         j                  }||z  }| j         j        }t	          | j         d          r| j         j        }n1t	          | j         d          r| j         j        }n| j         j        |z  }|                    ||dz   ||          }t          j	        ||ddfd          \  }}}	|                    d|          }|                    d|          }|	                    d|          }	t          j
        |||	f          S )Nr   rp   rq   rL   r"   r   ri   )r+   r   r8   rD   r   rp   rq   reshaper   r   concat)
rX   r   r   rk   num_key_value_groupsrD   rq   r   r   r   s
             r5   _split_qkv_weightzHunYuanModel._split_qkv_weight  s2   "k=K.0O
 
  3lBk-4;
++ 	P!%!5T["677 	P!%!?!%!8<O!Okk.24F
 
 +c$8!Q#?QGGG1aIIb+&&IIb+&&IIb+&&|Q1I&&&r7   c                     t          | j                  r)t          j        | ddd| j        j        | j                  S g S )N	gate_projrU   up_proj)ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namer.   r   )r<   r+   r   make_expert_params_mappingr.   r   rX   s    r5   get_expert_mappingzHunYuanModel.get_expert_mapping  sQ    4; 	 "<$/$/"+ K3&*&@    Ir7   weightsc           
         t          | j                  }g d}| j        j        }t          | j        d| j        j                  }dddddgd fdd||dz  z   d	|fd
|fd|fg| j        fg}t          |                                           }t                      }|                                 }	|D ]\  }
}d|
v rd|
v r|
	                    dd          }
d|
v r|
	                    dd          }
d|
v sd|
v rH| j        j
        rd|
v rY| j        O| j                            |
          x}r3||         }t          |dt                    }|d         } |||           d}|D ]\  }}}||
vrd|
v r|dk    rbt          j        d|
          }|rKt!          |                    d                              d          d                   }|dk    r
||z  dk    rx|
	                    ||          }
|
                    d          r|
|vrt)          |
|           r||
         }|j        } ||||           |                    |
           d} |r|D ]\  }}}}}||
vr|
	                    ||          }
|
                    d          r|
|vr=t)          |
|           rN|j        d         |z  dk    sJ |j        d         |z  }||
         }|j        }d}|D ]E\  }}|||z  z   }|r || ||          ||         |           n |||||         |           |}F n|
                    d          r|
|vrd}|	D ]}|\  }}}}||
vrd}|
	                    ||          }t)          ||           r7||         }t1          j        t4          dt6          f         |j                  } ||||||d           } | r|}
 no|rt9          |
|          }
|
0t)          |
|           rBd!|
v r|
	                    d"d#          }
||
         }t          |dt                    } |||           |                    |
           |S )$N))rt   r   r   )rt   z.k_projr   )rt   z.v_projr   )rM   z
.gate_projr   )rM   z.up_projr"   r   rM   z.gate_and_up_projrL   )r"   r"   )r   r"   rt   r   r   r   zrotary_emb.inv_freqgate_proj_biaszgate_proj.biasup_proj_biaszup_proj.biaszrotary_emb.cos_cachedzrotary_emb.sin_cachedzlm_head.weightweight_loaderr   Fzmlp.expertsr   zlayers\.\d+r   ri   r"   z.biasT.)shard_id	expert_idreturn_successzmlp.gate.wg.zwg.rC   )r@   r+   r   r8   r  dictnamed_parameterssetr  replacer   rG   get_cache_scaler   researchr2   groupr   endswithr(   r  addr   typingcastr   rc   r   )!rX   r  r   stacked_params_mappingr   rk   split_params_mappingparams_dictloaded_paramsexpert_params_mappingnameloaded_weight
scale_nameparamr  is_found
param_nameweight_namer  matchrn   densplit_paramfuncunitsoffsetnum
new_offsetis_expert_weightmappingr  name_mappedsuccesss!                                    r5   load_weightszHunYuanModel.load_weights  s   $T[11
"
 "
 "
 #k=K.0O
 
 11vv6FM#lQ&66*+c<-@3BUV&	 
 4002233"%%% $ 7 7 9 9#* T	$ T	$D-$,,4''||$46FGG%%||NNCC&$..2IT2Q2Q  {. 3Ct3K3K ,"/??EEE
 - $J/ '@U V V -a 0e]333H5K  1
Kd** D(()++Ind;;E %#&u{{1~~';';C'@'@'D#E#E%>>h.Cq.H.H$||K<<==)) d+.E.E*466 #D) % 3e]H===!!$'''  &[8 [8 d**||K<<==)) d+.E.E*466 $*1-3q8888%+A.#5#D) % 3%0 ( (MHc!'#+!5J Y%!44#6#6vj7H#I8    &e]6*;L-MxXXX'FF ==)) d+.E.E#( 4 48 48GCJ@JY"$..  (,$ #',,{J"G"GK.{DAA ! '4E %+K d+U-@% %M ,m%#!)"+'+  G  * ( ! !4T;GGD| .tT:: ! %--#||E266'-E$+0E% %M "M%777d####r7   r0   )r_   r`   ra   r   rb   rS   r   r   r  r    r^   r  r9   r   r2   r  r   rB  rd   re   s   @r5   r   r   C  so        BD *) *) *)z *)3 *) *) *) *) *) *)X, ,%, , , , , .2) )<$&) <) 2D8	)
 |d*) 
+	+) ) ) )V'U\ ' ' ' '0DsCc/A)B$C    rHU33D-E$F r r r r r r r rr7   r   c                   X    e Zd Zg dddgdZdddedef fd	Z	 	 ddej        dej        de	d
z  dej        d
z  dej        e	z  f
dZ
dej        dej        d
z  fdZdedej        dej        de	fdZdeeeej        f                  dee         fdZdej        dej        fdZ xZS )HunyuanV1ModelBase)r   k_projv_projr  r  )r   rT   rC   r   r   rI   c          	         t                                                       |j        j        }|j        }|| _        || _        t          |d          | _        t                      j	        rt          |j        |j        |t          |d                    | _        |j        r| j        j        j        | j        _        t%          |dd          }t'          |j        |          | _        d S t+                      | _        d S )Nmodelr   rI   lm_head)rG   rI   logit_scaleg      ?)scale)rR   rS   r   r   rG   r+   r   rH  r   r   r   r   rD   r*   rJ  r   r   weightr8   r   logits_processorr'   )rX   r   rI   r+   rG   rK  rY   s         r5   rS   zHunyuanV1ModelBase.__init__  s    )3"/(!k'JJJ
>>& 	,)!")#FI66	  DL ) E&*j&=&D#!&-==K$3!% % %D!!! *++DLLLr7   Nr   r   r   r   r,   c                 6    |                      ||||          }|S r0   )rH  )rX   r   r   r   r   model_outputs         r5   r^   zHunyuanV1ModelBase.forward  s)     zzy"6
 
 r7   r   c                 <    |                      | j        |          }|S r0   )rN  rJ  )rX   r   logitss      r5   compute_logitsz!HunyuanV1ModelBase.compute_logits  s      &&t|]CCr7   
batch_sizedtypedevicec                     t          t          j        || j        j        f||          t          j        || j        j        f||          d          S )N)rU  rV  r  )r    r   zerosr+   rD   )rX   rT  rU  rV  s       r5   make_empty_intermediate_tensorsz2HunyuanV1ModelBase.make_empty_intermediate_tensors  sl     #!&!89v" " " "K!89v  	 	
 	
 		
r7   r  c                 l    t          | | j        j        rdgnd           }|                    |          S )Nzlm_head.)skip_prefixes)r&   r+   r   rB  )rX   r  loaders      r5   rB  zHunyuanV1ModelBase.load_weights  sC    "+/;+JTJ<<PT
 
 
 ""7+++r7   c                 6    | j                             |          S r0   )rH  r  r   s     r5   r  z"HunyuanV1ModelBase.embed_input_ids  s    z)))444r7   )NN)r_   r`   ra   packed_modules_mappingr   rb   rS   r   r   r    r^   rS  r2   rU  rV  rY  r   r   r   rB  r  rd   re   s   @r5   rD  rD    s       
 
 
 

 
 BD , , ,z ,3 , , , , , ,< <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   

&+k
;@<
	
 
 
 
,HU33D-E$F ,3s8 , , , ,5 5%, 5 5 5 5 5 5 5 5r7   rD  c                   n     e Zd Zdddedef fdZdededd	fd
Zdee	eeeef                  fdZ
 xZS )HunYuanMoEV1BaserC   r   r   rI   c                J   t                                          ||           g | _        d| _        g | _        d }| j        j        D ]t}t          |t                    rt          |t                    sJ t          |j
        t                    r+|j
        }| j                            |j
        j                   u|t          d          t          | j                  | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        d S )NrI  r"   z*No HunYuanMoE layer found in model.layers.)rR   rS   expert_weightsnum_expert_groups
moe_layersrH  r   r1   r'   r   r   r   appendr   r   r   num_moe_layersr   num_logical_expertsr   num_physical_expertsr   num_local_physical_expertsr   num_routed_expertsr   r   )rX   r   rI   example_layerr  rY   s        r5   rS   zHunYuanMoEV1Base.__init__  s   [@@@ !!"Z& 	: 	:E%00 e%899999%)%:;; : %	&&uy'8999 KLLL!$/22#0#B $1$D!*7*P'"/"@%2%F"""r7   rh  ri  r,   Nc                     | j         |k    sJ || _        || _         || j        z
  | _        | j        j        D ]V}t          |j        t                    r:|j        }||_	        ||_
        | j        |_        |j                                         Wd S r0   )ri  rh  rg  r   rH  r   r1   r   r   r   r   r   r   update_expert_map)rX   rh  ri  r  moes        r5    update_physical_experts_metadataz1HunYuanMoEV1Base.update_physical_experts_metadata  s    
 .2LLLLL$8!*D'%9D<T%T"Z& 	0 	0E%)%:;; 0i/I,)=&*.*D'--///	0 	0r7   c                 4    | j                                         S r0   )rH  r  r  s    r5   r  z#HunYuanMoEV1Base.get_expert_mapping  s    z,,...r7   )r_   r`   ra   r   rb   rS   r2   ro  r9   r   r  rd   re   s   @r5   r`  r`    s        AC G G Gz G3 G G G G G G60!0 %(0 
	0 0 0 0"/DsCc/A)B$C / / / / / / / /r7   r`  c                   .     e Zd Zdddedef fdZ xZS )HunYuanDenseV1BaserC   r   r   rI   c                N    t                                          ||           d S )NrI  )rR   rS   )rX   r   rI   rY   s      r5   rS   zHunYuanDenseV1Base.__init__	  s&    [@@@@@r7   )r_   r`   ra   r   rb   rS   rd   re   s   @r5   rr  rr    sf        AC A A Az A3 A A A A A A A A A Ar7   rr  c                       e Zd ZdS )HunYuanDenseV1ForCausalLMNr_   r`   ra    r7   r5   ru  ru            Dr7   ru  c                       e Zd ZdS )HunYuanMoEV1ForCausalLMNrv  rw  r7   r5   rz  rz    rx  r7   rz  )P__doc__r(  collections.abcr   r   	itertoolsr   regexr#  r   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr	   vllm.configr
   r   r   vllm.distributedr   r   r   r   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.fused_moer   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer    vllm.v1.attention.backendr!   
interfacesr#   r$   r%   utilsr&   r'   r(   r)   r*   rc   r<   r2   r@   ModulerB   rg   r   r   r   r   rD  r`  rr  ru  rz  rw  r7   r5   <module>r     s9  2 H G  . . . . . . . .                  ) ) ) ) ) ) * * * * * * = = = = = = H H H H H H H H H H            = < < < < < ? ? ? ? ? ? 8 8 8 8 8 8              H G G G G G F F F F F F @ @ @ @ @ @               . - - - - - 3 3 3 3 3 3 B B B B B B B B B B             
$ 
 
 
 
 
2, 2 2 2 2 2% % % % % % % %Ph" h" h" h" h"ry h" h" h"Vi" i" i" i" i"BI i" i" i"Xq4 q4 q4 q4 q4BI q4 q4 q4hk6 k6 k6 k6 k6") k6 k6 k6\   ! 	 	 	t t t t t29 t t	 	tn	P5 P5 P5 P5 P5L* P5 P5 P5f./ ./ ./ ./ ./)+; ./ ./ ./bA A A A A+ A A A
	 	 	 	 	 2 	 	 		 	 	 	 	. 	 	 	 	 	r7   