
    .`iU                        d Z ddlZddlmZmZ ddlmZ ddlZddlmZ ddl	m
Z
 ddlmZ ddlmZmZmZ dd	lmZmZmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z m!Z!m"Z" ddl#m$Z$m%Z% ddl&m'Z' ddl(m)Z* ddl(m+Z, ddl-m.Z.m/Z/ ddl0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7  G d dej8                  Z9 G d dej8                  Z:e G d dej8                              Z; G d dej8        e.e/          Z<dS )zLInference-only K-EXAONE-236B-A22B model compatible with HuggingFace weights.    N)CallableIterable)islice)nn)PretrainedConfig)support_torch_compile)CacheConfig
VllmConfigget_current_vllm_config)get_ep_groupget_pp_group$get_tensor_model_parallel_world_size)FusedMoE)RMSNorm)ReplicatedLinear)LogitsProcessor)QuantizationConfig)DEFAULT_VOCAB_PADDING_SIZEParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors   )Exaone4Attention)Exaone4GatedMLP)SupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayerextract_layer_indexis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc            	       b     e Zd Z	 	 	 ddededz  dedef fdZd	ej	        d
ej	        fdZ
 xZS )	ExaoneMoeN Fconfigquant_configprefixenable_eplbc           	      n   t                                                       t                      | _        |j        | _        t                      j        | _        | j                                        | _	        | j        
                                | _        |j        | _        | j        |j        k    r t          d| j         d|j         d          t          |j        |j        dd | d          | _        t%          j        t)          j        |j        t(          j                            | _        t1                      }|j        j        }|| _        | j        | _        |j        |j        nd|_        |j        | _        | j        | j        z   | _        | j        | j        z  | _         | j	        | j         z  | _!        | j!        | j         z   | _"        tG          di d	| j        d
|j$        d|j        d|j%        ddd|j&        d|ddd|j'        d|j(        d| dddd| j        d| j        d| j        d| j        | _)        tU          |dd          dk    rO|j%        |j+        z  }tY          |j        ||j-        || j)        .                                | d          | _/        d S d | _/        d S ) NzTensor parallel size z' is greater than the number of experts .Fz.gate)biasr*   r+   )dtyper   num_expertstop_khidden_sizeintermediate_sizereduce_resultsrenormalizer*   use_grouped_topkTnum_expert_group
topk_groupr+   z.expertsscoring_funcsigmoidrouted_scaling_factore_score_correction_biasr,   num_redundant_expertsnum_shared_expertsz.shared_experts)r3   r4   
hidden_actr*   r5   r+    )0super__init__r   tp_sizer<   r   device_groupep_grouprankep_ranksizeep_sizer1   n_routed_experts
ValueErrorr   r3   gater   	Parametertorchemptyfloat32r=   r   parallel_configeplb_configr,   n_logical_expertsr>   n_redundant_expertsn_physical_expertsn_local_physical_expertsphysical_expert_startphysical_expert_endr   num_experts_per_tokmoe_intermediate_sizenorm_topk_probn_groupr9   expertsgetattrr?   ExaoneMoeGatedMLPr@   !must_reduce_shared_expert_outputsshared_experts)	selfr)   r*   r+   r,   vllm_configrS   r4   	__class__s	           y/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/exaone_moe.pyrC   zExaoneMoe.__init__@   sX    	;==%+%A"$3}))++}))++ & 2<&,,,? ? ?)/);? ? ?  
 %###
 
 
	 (*|K*%-@@@(
 (
$
 .//!1=&!%!6 0< -- 	)
 $/#D "&"84;S"S(,(?4<(O%%)\D4Q%Q"&)FF 	    
 
 
--
,,
 **
 %::	

 !5
 --
 &
 "T
 $^^
 ((
 &&&&
 #
 #'"<"<
 %)$@$@
 ((
  #'":":!
& 6/33a77 & <v?X X"3"."3!,)#|MMOO 111# # #D #'D    hidden_statesreturnc                 j   |j         }|j         d         }|                    d|          }|                     |          \  }}|                     ||          }| j        |                     |          }||z   }| j        dk    r| j                            |          }|                    |          S )N)rh   router_logitsr   )shapeviewrM   r^   rb   rD   &maybe_all_reduce_tensor_model_parallel)rc   rh   
orig_shape
hidden_dimrl   _final_hidden_statesshared_outputs           rf   forwardzExaoneMoe.forward   s    "(
"(,
%**2z::  99]33q"ll'} + 
 
 * //>>M"5"E<!"&,"U"U## # #''
333rg   )Nr(   F)__name__
__module____qualname__r   r   strboolrC   rO   Tensorru   __classcell__re   s   @rf   r'   r'   ?   s         37!U' U' U' )4/U' 	U'
 U' U' U' U' U' U'n4U\ 4el 4 4 4 4 4 4 4 4rg   r'   c                        e Zd Z	 	 	 	 ddededz  dedz  dededdf fd	Zd
e	j
        de	j
        de	j
        dz  dee	j
        e	j
        f         fdZ xZS )ExaoneMoeDecoderLayerNr(   r)   cache_configr*   	mtp_layerr+   ri   c                    t                                                       t          |          }|j        | _        t	          |dd          }t	          |dd          pt	          |dd          }t          || j        |j        t	          |d|j                  ||||| d	  	        | _        |j        |         r|st          ||| d	
          | _
        n;t          | j        |j        |j        |t	          |dd          | d	          | _
        t          |j        |j                  | _        t          |j        |j                  | _        d S )Nmax_position_embeddingsi    attention_biasFr/   num_key_value_headsz
.self_attn)	r)   r3   	num_headsnum_kv_headsr   r*   r/   r   r+   z.mlp)r)   r*   r+   mlp_bias)r3   r4   r@   r*   r/   r+   eps)rB   rC   r!   r3   r_   ExaoneMoeAttentionnum_attention_heads	self_attnis_moe_layerr'   mlpr`   r4   r@   r   rms_norm_epsinput_layernormpost_attention_layernorm)
rc   r)   r   r*   r   r+   	layer_idxr   r   re   s
            rf   rC   zExaoneMoeDecoderLayer.__init__   s    	'//	!-")&2KT"R"R !)95AA 
WFEF
 F
 ,(0 -v/I  %<%%(((
 
 
 y) 	) 	 LF  DHH ) ,"(":!,)VZ77   DH  'v'9v?RSSS(/F$7)
 )
 )
%%%rg   	positionsrh   residualc                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS )N)r   rh   )r   r   r   r   )rc   r   rh   r   s       rf   ru   zExaoneMoeDecoderLayer.forward   s     $H 00??MM&*&:&:=(&S&S#M8 ' ' 
 
 #'"?"?x"X"Xx//h&&rg   )NNNr(   )rv   rw   rx   r   r	   r   rz   ry   rC   rO   r{   tupleru   r|   r}   s   @rf   r   r      s         ,0260
 0
 0
 "D(0
 )4/	0

 0
 0
 
0
 0
 0
 0
 0
 0
d'<' |' ,%	'
 
u|U\)	*' ' ' ' ' ' ' 'rg   r   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d	z  d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeeeef                  fdZdeeeej        f                  dee         fdZ xZS )ExaoneMoeModelr(   r+   rd   r+   c                   t                                                       |j        j        |j        |j        |j        }|j        j        j	        | _	        | _
        | _        |r|j        |j        pdz  nd}j        |z   | _        t                      j        sj        r;t                      j        r(t%          | j        j        j                  | _        nt+                      | _        t-          j        fd| d          \  | _        | _        | _        t                      j        r!t7          j        j                  | _        nt+                      | _        t=          dd	gj                  | _        d S )
Nr   r   )org_num_embeddingsr*   c                 *    t          |           S )N)r)   r   r*   r+   )r   )r+   r   r)   r*   s    rf   <lambda>z)ExaoneMoeModel.__init__.<locals>.<lambda>  s#    0))	   rg   z.layersr   r   rh   r   ) rB   rC   model_config	hf_configr   r*   lora_configrR   rS   r>   r)   lora_extra_vocab_size	max_loras
vocab_sizer   is_first_ranktie_word_embeddingsis_last_rankr   r3   embed_tokensr    r$   num_hidden_layersstart_layer	end_layerlayersr   r   normr#   make_empty_intermediate_tensors)	rc   rd   r+   r   
lora_vocabr   r)   r*   re   s	        @@@rf   rC   zExaoneMoeModel.__init__   s   )3"/"/!-'3I 	" ( [.+2G2L1MM 	
 !+j8>>' 
	1&
	1+7>>+F
	1 !7"#)#4)	! ! !D !/ 0 0D8C$      %%%	9
 	9
 	9
5$.$+ >>& 	) 28KLLLDII&((DI/Vj)6+=0
 0
,,,rg   	input_idsri   c                 ,    |                      |          S N)r   rc   r   s     rf   embed_input_idszExaoneMoeModel.embed_input_ids/  s      +++rg   Nr   intermediate_tensorsinputs_embedsc                 p   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S | 	                    ||          \  }}|S )Nrh   r   )rh   r   )
r   r   r   r   r   r   r   r   r   r   )	rc   r   r   r   r   rh   r   layerrr   s	            rf   ru   zExaoneMoeModel.forward2  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDK)94>JJ 	 	E&+e' '#M88
 ~~* 	&"/XFF    99]H==qrg   c                 T    t          j        | ddd| j        j        | j                  S )N	gate_proj	down_projup_proj)ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namer1   r>   )r   make_expert_params_mappingr)   r1   r>   )rc   s    rf   get_expert_mappingz!ExaoneMoeModel.get_expert_mappingQ  s8     2 + +'/"&"<
 
 
 	
rg   weightsc           
      l   g d}d}t          |                                           }t                      }|                                 }|D ]f\  }}|                    d          rd|v r!d|v sd|v r*| j        ~| j                            |          x}	rb||	         }
t          |
dt                    }|	                                dk    r|n|d         } ||
|           |
                    |	           |D ]t\  }}}||vrd	|v r|                    ||          }|                    d
          r||vr@t          ||           rQ||vrV||         }
|
j        } ||
||            n+d}|D ]}|\  }}}}||vrd}|                    ||          }t          ||           r7|                    |          r||vrQ||         }
t          j        t"          dt$          f         |
j                  } ||
||||d          }|r|} n|r|                    d
          r||vr|                    |          r||vrt'          ||          }|t          ||           r'||         }
t          |
dt                    } ||
|           |
                    |           h|S )N))	.qkv_projz.q_projq)r   z.k_projk)r   z.v_projv).gate_up_projz
.gate_projr   )r   z.up_projr   )
.bias_biasz.k_scale_k_scalez.v_scale_v_scalez.weight_scale_weight_scalez.input_scale_input_scalemtp.zrotary_emb.inv_freqzrotary_emb.cos_cachedzrotary_emb.sin_cachedweight_loaderr   zmlp.expertsr   FT.)shard_id	expert_idreturn_success)dictnamed_parameterssetr   
startswithr*   get_cache_scaler_   r   dimaddreplaceendswithr"   r   typingcastr   rz   r   )rc   r   stacked_params_mappingignore_suffixesparams_dictloaded_paramsexpert_params_mappingnameloaded_weight
scale_nameparamr   
param_nameweight_namer   is_expert_weightmappingr   name_mappedsuccesss                       rf   load_weightszExaoneMoeModel.load_weights]  s   "
 "
 "

 4002233"%%% $ 7 7 9 9#* h	$ h	$D-v&& $,,&$..2IT2Q2Q  ,"/??EEE
 - $J/ '@U V V%2%6%6%8%8A%=%=MM=QRCS  e]333!!*---5K R8 R81
Kd** D((||K<<==)) d+.E.E*466 {**#D) % 3e]H===#( 4 >8 >8GCJ@JY"$..  (,$ #',,{J"G"GK.{DAA !  $,,_==!'{:: '4E %+K d+U-@% %M ,m%#!)"+'+  G  * ( ! }}W-- !$k2I2I }}_55 !$k:Q:Q 4T;GGD| .tT:: ! '-E$+0E% %M "M%777d####rg   r   )rv   rw   rx   r
   ry   rC   rO   r{   r   r   ru   listr   intr   r   r   r   r|   r}   s   @rf   r   r      sY       AC /
 /
 /
z /
3 /
 /
 /
 /
 /
 /
b, ,%, , , , , .2 <$& < 2D8	
 |d* 
+	+   >

DsCc/A)B$C 

 

 

 

DHU33D-E$F D3s8 D D D D D D D Drg   r   c                   <    e Zd Zg dddgdZdddZdgZd	d
dedef fdZde	j
        de	j
        fdZ	 	 dde	j
        de	j
        dedz  de	j
        dz  de	j
        ez  f
dZde	j
        de	j
        dz  fdZdeeee	j
        f                  dee         fdZ xZS )ExaoneMoeForCausalLM)q_projk_projv_projr   r   )qkv_projgate_up_projinput_embeddingsoutput_embeddings)r   lm_headr   r(   r   rd   r+   c                   t                                                       |j        j                                        }|j        }|j        }|| _        || _        || _        t          |t          |d                    | _
        t                      j        r|j        | _        |r| xj        |j        z  c_        t!          | j        |j        |j        |st$          n|j        |          | _        |j        r| j
        j        j        | j        _        t1          |dd          }t3          | j        |j        |          | _        nt7                      | _        | j
        j        | _        d S )Nmodel)rd   r+   )r   padding_sizer*   logit_scaleg      ?)rB   rC   r   r   get_text_configr*   r   r)   r   r%   r   r   r   r   unpadded_vocab_sizer   r   r3   r   lora_vocab_padding_sizer   r   r   weightr_   r   logits_processorr    r   )rc   rd   r+   r)   r*   r   r   re   s          rf   rC   zExaoneMoeForCausalLM.__init__  sv   )3CCEE"/!-&(##00
 
 

 >>& 	,'-'8D$ N((K,MM(()("#)#4 #977 !8)
 
 
DL ) E&*j&=&D#!&-==K$3(&*;[% %D!! *++DL J6 	,,,rg   r   ri   c                 6    | j                             |          S r   )r   r   r   s     rf   r   z$ExaoneMoeForCausalLM.embed_input_ids#  s    z)))444rg   Nr   r   r   c                 6    |                      ||||          }|S r   )r   )rc   r   r   r   r   model_outputs         rf   ru   zExaoneMoeForCausalLM.forward&  s)     zzy"6
 
 rg   rh   c                 <    |                      | j        |          }|S r   )r  r   )rc   rh   logitss      rf   compute_logitsz#ExaoneMoeForCausalLM.compute_logits2  s      &&t|]CCrg   r   c                 p    t          | | j        j        rddgndg          }|                    |          S )Nzlm_head.r   )skip_prefixes)r   r)   r   r   )rc   r   loaders      rf   r   z!ExaoneMoeForCausalLM.load_weights9  sJ    "
 )-(GUV$$fX
 
 
 ""7+++rg   )NN)rv   rw   rx   packed_modules_mappingembedding_modulesembedding_padding_modulesr
   ry   rC   rO   r{   r   r   ru   r	  r   r   r   r   r|   r}   s   @rf   r   r     s       
 
 
 

 
 +&  "+AC )
 )
 )
z )
3 )
 )
 )
 )
 )
 )
V5 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   
,HU33D-E$F 
,3s8 
, 
, 
, 
, 
, 
, 
, 
,rg   r   )=__doc__r   collections.abcr   r   	itertoolsr   rO   r   transformersr   vllm.compilation.decoratorsr   vllm.configr	   r
   r   vllm.distributedr   r   r   $vllm.model_executor.layers.fused_moer   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   exaone4r   r   r   r`   
interfacesr   r   utilsr   r    r!   r"   r#   r$   r%   Moduler'   r   r   r   rA   rg   rf   <module>r#     s     S R  . . . . . . . .              ) ) ) ) ) ) = = = = = = H H H H H H H H H H         
 : 9 9 9 9 9 8 8 8 8 8 8 > > > > > > G G G G G G F F F F F F         
        . - - - - - ; ; ; ; ; ; 9 9 9 9 9 9 0 0 0 0 0 0 0 0                 n4 n4 n4 n4 n4	 n4 n4 n4bI' I' I' I' I'BI I' I' I'X d d d d dRY d d dN_, _, _, _, _,29lJ _, _, _, _, _,rg   