
    .`i*I                        d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z"m#Z# ddl$m%Z%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z,m-Z- ddl.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5  G d dej6                  Z7 G d dej6                  Z8 G d dej6                  Z9e G d dej6                              Z: G d  d!ej6        e,e-          Z;dS )"z@Inference-only Exaone model compatible with HuggingFace weights.    )Iterable)isliceN)nn)Exaone4Config)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
SiluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors)set_default_rope_theta   )SupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayerextract_layer_indexis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   V     e Zd Z	 	 	 	 ddededededz  d	ed
ededdf fdZd Z xZ	S )Exaone4GatedMLPNTF hidden_sizeintermediate_size
hidden_actquant_configreduce_resultsbiasprefixreturnc           	         t                                                       t          ||gdz  ||| d          | _        t	          |||||| d          | _        |dk    rt          d| d          t                      | _        d S )	N   .gate_up_proj)
input_sizeoutput_sizesr-   r+   r.   z
.down_proj)r3   output_sizer-   r+   r,   r.   siluzUnsupported activation: z!. Only silu is supported for now.)	super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fn)	selfr(   r)   r*   r+   r,   r-   r.   	__class__s	           v/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/exaone4.pyr8   zExaone4GatedMLP.__init__E   s     	6"+,q0%+++
 
 
 +(#%)(((
 
 
 X:XXX   !ll    c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r9   r<   r:   )r=   xgate_up_s       r?   forwardzExaone4GatedMLP.forwarde   sD    &&q))
KK  ~~a  1r@   )NTFr'   )
__name__
__module____qualname__intstrr   boolr8   rF   __classcell__r>   s   @r?   r&   r&   D   s         37## ## # 	#
 )4/# # # # 
# # # # # #@      r@   r&   c                        e Zd Z	 	 	 	 	 ddedededed	ed
edz  dededz  deddf fdZ	de
j        de
j        de
j        fdZ xZS )Exaone4Attention    NFr'   configr(   	num_headsnum_kv_headsmax_position_embeddingsr+   r-   cache_configr.   r/   c
                    t                                                       || _        t                      }
|| _        | j        |
z  dk    sJ | j        |
z  | _        || _        | j        |
k    r| j        |
z  dk    sJ n|
| j        z  dk    sJ t          d| j        |
z            | _        t          |dd           | _
        | j
        | j        | j        z  | _
        | j        | j
        z  | _        | j        | j
        z  | _        | j
        dz  | _        || _        t          || j
        | j        | j        |||	 d          | _        t#          | j        | j
        z  ||||	 d          | _        t'          | j
        |j        	          | _        t'          | j
        |j        	          | _        d
}||                                dk    rd}t1          |	          }|j        |         dk    }|r|j        nd | _        d|j        v| _        t9          |d           t;          | j
        ||j        |          | _        tA          | j        | j
        | j        | j        ||| j        |	 d          | _!        d S )Nr   r   head_dimg      	.qkv_proj)r(   	head_sizetotal_num_headstotal_num_kv_headsr-   r+   r.   z.o_proj)r3   r5   r-   r+   r.   epsTggufFsliding_attentioni@B )default_theta)max_positionrope_parametersis_neox_stylez.attn)rT   rV   r+   per_layer_sliding_windowr.   )"r7   r8   r(   r   r[   rS   r\   maxrT   getattrrX   q_sizekv_sizescalingrU   r   qkv_projr   o_projr   rms_norm_epsq_normk_normget_namer    layer_typessliding_windowapply_rope_all_layersr   r   rc   
rotary_embr   attn)r=   rR   r(   rS   rT   rU   r+   r-   rV   r.   tp_sizerd   	layer_idx
is_slidingr>   s                 r?   r8   zExaone4Attention.__init__m   s    	&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF
D99=  ,0DDDMnt}4(4=8}d*'>$)#m 0#6%'''
 
 
 (+dm;#%%%%
 
 
 dm1DEEEdm1DEEE#(=(=(?(?6(I(I!M'//	'	26II
7AKf33t &9@R%R"vW===="M0"2'	
 
 
 NML*%%%)%8###	
 	
 	
			r@   	positionshidden_statesc                 r   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                    d| j        | j        f          }|                     |          }|                    dd          }|                    d| j	        | j        f          }| 
                    |          }|                    dd          }| j        s| j        r|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )N)dim)rk   splitrh   ri   	unflattenrS   rX   rn   flattenrT   ro   rr   rs   rt   ru   rl   )
r=   ry   rz   qkvrE   qkvattn_outputoutputs
             r?   rF   zExaone4Attention.forward   s   
 }--Q))T[$,E2)NN1aKKT^T];<<KKNNIIb"KKT.>??KKNNIIb" 	4$"< 	4??9a33DAqii1a((KK,,	r@   )rQ   NFNr'   )rG   rH   rI   r   rJ   r   rL   r	   rK   r8   torchTensorrF   rM   rN   s   @r?   rP   rP   l   s        (,26+/U
 U
U
 U
 	U

 U
 "%U
 )4/U
 U
 "D(U
 U
 
U
 U
 U
 U
 U
 U
n< | 
	       r@   rP   c                        e Zd Z	 	 	 ddededz  dedz  deddf
 fdZd	ej	        d
ej	        dej	        dz  de
ej	        ej	        f         fdZ xZS )Exaone4DecoderLayerNr'   rR   rV   r+   r.   r/   c                 8   t                                                       |j        | _        t          |dd          }t          |dd          pt          |dd          }t	          || j        |j        t          |d|j                  ||||| d	  	        | _        t          | j        |j        |j	        |t          |d	d          | d
          | _
        t          |j        |j                  | _        t          |j        |j                  | _        d S )NrU   rQ   attention_biasFr-   num_key_value_headsz
.self_attn)	rR   r(   rS   rT   rU   r+   r-   rV   r.   mlp_biasz.mlp)r(   r)   r*   r+   r-   r.   r]   )r7   r8   r(   rg   rP   num_attention_heads	self_attnr&   r)   r*   mlpr   rm   post_attention_layernormpost_feedforward_layernorm)r=   rR   rV   r+   r.   rU   r   r>   s          r?   r8   zExaone4DecoderLayer.__init__   sS    	!-")&2KT"R"R !)95AA 
WFEF
 F
 *(0 -v/I  %<%%(((
 
 
 #($6(%U33???
 
 
 )0F$7)
 )
 )
% +2F$7+
 +
 +
'''r@   ry   rz   residualc                     |}|                      ||          }|                     |          }||z   }|}|                     |          }|                     |          }||z   }||fS )N)ry   rz   )r   r   r   r   )r=   ry   rz   r   s       r?   rF   zExaone4DecoderLayer.forward  s     ! ' ' 
 
 55mDD =0  // 77FF =0h&&r@   )NNr'   )rG   rH   rI   r   r	   r   rK   r8   r   r   tuplerF   rM   rN   s   @r?   r   r      s         ,026*
 *
*
 "D(*
 )4/	*

 *
 
*
 *
 *
 *
 *
 *
X'<' |' ,%	'
 
u|U\)	*' ' ' ' ' ' ' 'r@   r   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d	z  d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeej        f                  dee         fdZ xZS )Exaone4Modelr'   r.   vllm_configr.   c                   t                                                       |j        j        |j        |j        | _        | _        j        | _        t                      j	        sj
        r5t                      j        r"t          | j        j                  | _        nt                      | _        t!          j        fd| d          \  | _        | _        | _        t                      j        r!t+          j        j                  | _        nt                      | _        t1          ddgj                  | _        d S )N)r+   c                 *    t          |           S )N)rR   rV   r+   r.   )r   )r.   rV   rR   r+   s    r?   <lambda>z'Exaone4Model.__init__.<locals>.<lambda>>  s#    .))	   r@   z.layersr   r]   rz   r   )r7   r8   model_config	hf_configrV   r+   rR   
vocab_sizer   is_first_ranktie_word_embeddingsis_last_rankr   r(   embed_tokensr   r#   num_hidden_layersstart_layer	end_layerlayersr   rm   normr"   make_empty_intermediate_tensors)r=   r   r.   rV   rR   r+   r>   s      @@@r?   r8   zExaone4Model.__init__'  sf   )3"/"/( +>>' 		1&		1+7>>+F		1 !7")! ! !D !/ 0 0D8C$      %%%	9
 	9
 	9
5$.$+ >>& 	) 28KLLLDII&((DI/Vj)6+=0
 0
,,,r@   	input_idsr/   c                 ,    |                      |          S rB   )r   r=   r   s     r?   embed_input_idszExaone4Model.embed_input_idsO  s      +++r@   Nry   intermediate_tensorsinputs_embedsc                 h   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S | 	                    |          }|S )Nrz   r   )rz   r   )
r   r   r   r   r   r   r   r   r   r   )r=   r   ry   r   r   rz   r   layers           r?   rF   zExaone4Model.forwardR  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDK)94>JJ 	 	E&+e' '#M88 ~~* 	&"/XFF   		-00r@   weightsc                 ~   g d}t          |                                           }t                      }|D ]\  }}d|v rd|v sd|v r| j        ~| j                            |          x}rb||         }t          |dt                    }	|                                dk    r|n|d         } |	||           |                    |           |D ]i\  }
}}||vr|	                    ||
          }|
                    d          r||vr;t          ||           rL||         }|j        }	 |	|||            nk|
                    d          r||vr t          ||          }|4t          ||           rF||         }t          |dt                    }	 |	||           |                    |           |S )N))rY   z.q_projr   )rY   z.k_projr   )rY   z.v_projr   )r2   z
.gate_projr   )r2   z.up_projr   zrotary_emb.inv_freqzrotary_emb.cos_cachedzrotary_emb.sin_cachedweight_loaderr   z.bias)dictnamed_parameterssetr+   get_cache_scalerg   r   r}   addreplaceendswithr!   r   r   )r=   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr   
param_nameweight_nameshard_ids                r?   load_weightszExaone4Model.load_weightss  s?   "
 "
 "
 4002233"%%%#* 2	$ 2	$D-$,,&$..2IT2Q2Q  ,"/??EEE
 - $J/ '@U V V%2%6%6%8%8A%=%=MM=QRCS  e]333!!*---5K 4 41
Kd**||K<<==)) d+.E.E*466 #D) % 3e]H=== ==)) d+.E.E0{CC<*466 #D) '@U V Ve]333d####r@   rB   )rG   rH   rI   r
   rK   r8   r   r   r   r   rF   r   r   r   r   rM   rN   s   @r?   r   r   %  s"       AC &
 &
 &
z &
3 &
 &
 &
 &
 &
 &
P, ,%, , , , , .2 <$& < 2D8	
 |d* 
+	+   B>HU33D-E$F >3s8 > > > > > > > >r@   r   c                   6    e Zd Zg dddgdZdddZdd	d
edef fdZdej	        dej	        fdZ
	 	 ddej	        dej	        dedz  dej	        dz  dej	        ez  f
dZdej	        dej	        dz  fdZdeeeej	        f                  dee         fdZ xZS )Exaone4ForCausalLM)q_projk_projv_proj	gate_projup_proj)rk   r9   input_embeddingsoutput_embeddings)r   lm_headr'   r   r   r.   c          	      >   t                                                       |j        j        }|j        }|| _        || _        t          |t          |d                    | _        t                      j
        rt          |j        |j        |t          |d                    | _        |j        r| j        j        j        | j        _        t%          |dd          }t'          |j        |          | _        nt+                      | _        | j        j        | _        d S )Nmodel)r   r.   r   )r+   r.   logit_scaleg      ?)scale)r7   r8   r   r   r+   rR   r   r$   r   r   r   r   r   r(   r   r   r   weightrg   r   logits_processorr   r   )r=   r   r.   rR   r+   r   r>   s         r?   r8   zExaone4ForCausalLM.__init__  s   )3"/(!#00
 
 

 >>& 	,)!")#FI66	  DL ) E&*j&=&D#!&-==K$3!% % %D!! *++DL J6 	,,,r@   r   r/   c                 6    | j                             |          S rB   )r   r   r   s     r?   r   z"Exaone4ForCausalLM.embed_input_ids  s    z)))444r@   Nry   r   r   c                 6    |                      ||||          }|S rB   )r   )r=   r   ry   r   r   model_outputs         r?   rF   zExaone4ForCausalLM.forward  s)     zzy"6
 
 r@   rz   c                 <    |                      | j        |          }|S rB   )r   r   )r=   rz   logitss      r?   compute_logitsz!Exaone4ForCausalLM.compute_logits  s      &&t|]CCr@   r   c                 l    t          | | j        j        rdgnd           }|                    |          S )Nzlm_head.)skip_prefixes)r   rR   r   r   )r=   r   loaders      r?   r   zExaone4ForCausalLM.load_weights  sE    " ,0;+JTJ<<PT
 
 
 ""7+++r@   )NN)rG   rH   rI   packed_modules_mappingembedding_modulesr
   rK   r8   r   r   r   r   rF   r   r   r   r   r   rM   rN   s   @r?   r   r     s       
 
 
 

 
 +& 
 BD 
 
 
z 
3 
 
 
 
 
 
B5 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,r@   r   )<__doc__collections.abcr   	itertoolsr   r   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   vllm.distributedr   r   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   vllm.transformers_utils.configr   
interfacesr   r   utilsr   r   r    r!   r"   r#   r$   Moduler&   rP   r   r   r    r@   r?   <module>r      sG  , G F $ $ $ $ $ $              & & & & & & * * * * * * = = = = = = / / / / / / / / O O O O O O O O < < < < < < 8 8 8 8 8 8         
 H G G G G G F F F F F F @ @ @ @ @ @               . - - - - - A A A A A A 0 0 0 0 0 0 0 0                 % % % % %bi % % %Pk k k k kry k k k\H' H' H' H' H'") H' H' H'V K K K K K29 K K K\R, R, R, R, R,L* R, R, R, R, R,r@   