
    .`iyI                        d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z"m#Z# ddl$m%Z%m&Z& ddl'm(Z( ddl)m*Z*m+Z+ ddl,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2  G d dej3                  Z4 G d dej3                  Z5 G d dej3                  Z6 G d dej3                  Z7e G d d ej3                              Z8 G d! d"ej3        e*e+          Z9dS )#z@Inference-only Exaone model compatible with HuggingFace weights.    )Iterable)isliceN)nn)PretrainedConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
SiluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors   )SupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayeris_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   P     e Zd Z	 	 	 ddededededz  ded	ed
df fdZd Z xZ	S )ExaoneGatedMLPNF hidden_sizeintermediate_size
hidden_actquant_configbiasprefixreturnc                    t                                                       t          ||gdz  ||| d          | _        t	          ||||| d          | _        |dk    rt          d| d          t                      | _        d S )	N   .gate_up_proj)
input_sizeoutput_sizesr*   r)   r+   z.c_projr0   output_sizer*   r)   r+   siluzUnsupported activation: z!. Only silu is supported for now.)	super__init__r   gate_up_projr   c_proj
ValueErrorr   act_fn)selfr&   r'   r(   r)   r*   r+   	__class__s          u/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/exaone.pyr6   zExaoneGatedMLP.__init__G   s     	6"+,q0%+++
 
 
 ((#%%%%
 
 
 X:XXX   !ll    c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r7   r:   r8   )r;   xgate_up_s       r=   forwardzExaoneGatedMLP.forwarde   sB    &&q))
KK  {{1~~1r>   )NFr%   )
__name__
__module____qualname__intstrr   boolr6   rD   __classcell__r<   s   @r=   r$   r$   F   s         37# ## # 	#
 )4/# # # 
# # # # # #<      r>   r$   c                        e Zd Z	 	 	 	 	 ddedededed	ed
edz  dededz  deddf fdZ	de
j        de
j        de
j        fdZ xZS )ExaoneAttention    NFr%   configr&   	num_headsnum_kv_headsmax_position_embeddingsr)   r*   cache_configr+   r,   c
           
         t                                                       || _        t                      }
|| _        | j        |
z  dk    sJ | j        |
z  | _        || _        | j        |
k    r| j        |
z  dk    sJ n|
| j        z  dk    sJ t          d| j        |
z            | _        t          |dd           | _
        | j
        | j        | j        z  | _
        | j        | j
        z  | _        | j        | j
        z  | _        | j
        dz  | _        || _        t          || j
        | j        | j        |||	 d          | _        t#          | j        | j
        z  ||||	 d          | _        d	}||                                d
k    rd}t)          | j
        ||j        |          | _        t/          | j        | j
        | j        | j        |||	 d          | _        d S )Nr   r   head_dimg      	.qkv_proj)r&   	head_sizetotal_num_headstotal_num_kv_headsr*   r)   r+   z	.out_projr2   TggufF)max_positionrope_parametersis_neox_style.attn)rR   rT   r)   r+   )r5   r6   r&   r   rY   rQ   rZ   maxrR   getattrrV   q_sizekv_sizescalingrS   r   qkv_projr   out_projget_namer   r]   
rotary_embr   attn)r;   rP   r&   rQ   rR   rS   r)   r*   rT   r+   tp_sizer^   r<   s               r=   r6   zExaoneAttention.__init__m   s3    	&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF
D99=  ,0DDDMnt}4(4=8}d*'>$)#m 0#6%'''
 
 
 *+dm;#%'''
 
 
 #(=(=(?(?6(I(I!M"M0"2'	
 
 
 NML*%%###
 
 
			r>   	positionshidden_statesc                 "   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )N)dim)re   splitrb   rc   rh   ri   rf   )
r;   rk   rl   qkvrC   qkvattn_outputoutputs
             r=   rD   zExaoneAttention.forward   s    
 }--Q))T[$,E2)NN1ay!Q//1ii1a((MM+..	r>   rO   NFNr%   rE   rF   rG   r   rH   r   rJ   r	   rI   r6   torchTensorrD   rK   rL   s   @r=   rN   rN   l   s        (,26+/I
 I
 I
 I
 	I

 I
 "%I
 )4/I
 I
 "D(I
 I
 
I
 I
 I
 I
 I
 I
V
<
 |
 
	
 
 
 
 
 
 
 
r>   rN   c                        e Zd Z	 	 	 	 	 ddedededed	ed
edz  dededz  deddf fdZ	de
j        de
j        de
j        fdZ xZS )ExaoneBlockAttentionrO   NFr%   rP   r&   rQ   rR   rS   r)   r*   rT   r+   r,   c
                     t                                                       t          |||||||||	 d	  	        | _        d S )Nz
.attention	rP   r&   rQ   rR   rS   r)   r*   rT   r+   )r5   r6   rN   	attention)r;   rP   r&   rQ   rR   rS   r)   r*   rT   r+   r<   s             r=   r6   zExaoneBlockAttention.__init__   sY     	(#%$;%%(((

 

 

r>   rk   rl   c                 0    |                      ||          S N)rk   rl   )r   )r;   rk   rl   s      r=   rD   zExaoneBlockAttention.forward   s%    
 ~~'  
 
 	
r>   rw   rx   rL   s   @r=   r|   r|      s         (,26+/
 
 
 
 	

 
 "%
 )4/
 
 "D(
 
 

 
 
 
 
 
2
<
 |
 
	
 
 
 
 
 
 
 
r>   r|   c                        e Zd Z	 	 	 ddededz  dedz  deddf
 fdZd	ej	        d
ej	        dej	        dz  de
ej	        ej	        f         fdZ xZS )ExaoneDecoderLayerNr%   rP   rT   r)   r+   r,   c                 8   t                                                       |j        | _        t          |dd          }t          |dd          pt          |dd          }t	          || j        |j        t          |d|j                  ||||| d	  	        | _        t          | j        |j        |j	        |t          |d	d          | d
          | _
        t          |j        |j                  | _        t          |j        |j                  | _        d S )NrS   rO   attention_biasFr*   num_key_value_headsr_   r~   mlp_biasz.mlp)r&   r'   r(   r)   r*   r+   eps)r5   r6   r&   ra   r|   num_attention_headsri   r$   r'   activation_functionmlpr   layer_norm_epsilonln_1ln_2)r;   rP   rT   r)   r+   rS   r   r<   s          r=   r6   zExaoneDecoderLayer.__init__   s?    	!-")&2KT"R"R !)95AA 
WFEF
 F
 )(0 -v/I  %<%%###
 
 
	 "($61%U33???
 
 
 F.F4MNNN	F.F4MNNN			r>   rk   rl   residualc                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS r   )r   ri   r   r   )r;   rk   rl   r   s       r=   rD   zExaoneDecoderLayer.forward  s     $H IIm44MM&*iix&H&H#M8		' " 
 
 #'))M8"D"Dx//h&&r>   )NNr%   )rE   rF   rG   r   r	   r   rI   r6   ry   rz   tuplerD   rK   rL   s   @r=   r   r      s         ,026%O %O %O "D(%O )4/	%O
 %O 
%O %O %O %O %O %ON'<' |' ,%	'
 
u|U\)	*' ' ' ' ' ' ' 'r>   r   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d	z  d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeej        f                  dee         fdZ xZS )ExaoneModelr%   r+   vllm_configr+   c                   t                                                       |j        j        |j        |j        | _        | _        j        | _        j        | _        t                      j
        sj        r5t                      j        r"t          | j        j                  | _        nt                      | _        t!          j        fd| d          \  | _        | _        | _        t                      j        r!t+          j        j                  | _        nt                      | _        t1          ddgj                  | _        d S )N)r)   c                 *    t          |           S )N)rP   rT   r)   r+   )r   )r+   rT   rP   r)   s    r=   <lambda>z&ExaoneModel.__init__.<locals>.<lambda>C  s#    -))	   r>   z.hr   r   rl   r   )r5   r6   model_config	hf_configrT   r)   rP   
vocab_sizewter   is_first_ranktie_word_embeddingsis_last_rankr   r&   r   r!   num_hidden_layersstart_layer	end_layerhr   r   ln_fr    make_empty_intermediate_tensors)r;   r   r+   rT   rP   r)   r<   s      @@@r=   r6   zExaoneModel.__init__+  sj   )3"/"/( +$>>' 		(&		(+7>>+F		( .")  DHH &''DH3>$      ===	4
 	4
 	4
0$.$& >>& 	) 28QRRRDII&((DI/Vj)6+=0
 0
,,,r>   	input_idsr,   c                 ,    |                      |          S r@   )r   r;   r   s     r=   embed_input_idszExaoneModel.embed_input_idsT  s    xx	"""r>   Nrk   intermediate_tensorsinputs_embedsc                 p   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S | 	                    ||          \  }}|S )Nrl   r   )rl   r   )
r   r   r   r   r   r   r   r   r   r   )	r;   r   rk   r   r   rl   r   layerrC   s	            r=   rD   zExaoneModel.forwardW  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDFD$4dnEE 	 	E&+e' '#M88 ~~* 	&"/XFF    99]H==qr>   weightsc                 ~   g d}t          |                                           }t                      }|D ]\  }}d|v rd|v sd|v r| j        ~| j                            |          x}rb||         }t          |dt                    }	|                                dk    r|n|d         } |	||           |                    |           |D ]i\  }
}}||vr|	                    ||
          }|
                    d          r||vr;t          ||           rL||         }|j        }	 |	|||            nk|
                    d          r||vr t          ||          }|4t          ||           rF||         }t          |dt                    }	 |	||           |                    |           |S )N))rW   z.q_projrr   )rW   z.k_projrs   )rW   z.v_projrt   )r/   z.c_fc_0r   )r/   z.c_fc_1r   zrotary_emb.inv_freqzrotary_emb.cos_cachedzrotary_emb.sin_cachedweight_loaderr   z.bias)dictnamed_parameterssetr)   get_cache_scalera   r   ro   addreplaceendswithr   r   r   )r;   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr   
param_nameweight_nameshard_ids                r=   load_weightszExaoneModel.load_weightsx  s?   "
 "
 "
 4002233"%%%#* 2	$ 2	$D-$,,&$..2IT2Q2Q  ,"/??EEE
 - $J/ '@U V V%2%6%6%8%8A%=%=MM=QRCS  e]333!!*---5K 4 41
Kd**||K<<==)) d+.E.E*466 #D) % 3e]H=== ==)) d+.E.E0{CC<*466 #D) '@U V Ve]333d####r>   r@   )rE   rF   rG   r
   rI   r6   ry   rz   r   r   rD   r   r   r   r   rK   rL   s   @r=   r   r   )  s"       AC '
 '
 '
z '
3 '
 '
 '
 '
 '
 '
R# #%, # # # # .2 <$& < 2D8	
 |d* 
+	+   B>HU33D-E$F >3s8 > > > > > > > >r>   r   c                   6    e Zd Zg dddgdZdddZdd	d
edef fdZdej	        dej	        fdZ
	 	 ddej	        dej	        dedz  dej	        dz  dej	        ez  f
dZdej	        dej	        dz  fdZdeeeej	        f                  dee         fdZ xZS )ExaoneForCausalLM)q_projk_projv_projc_fc_0c_fc_1)re   r7   input_embeddingsoutput_embeddings)r   lm_headr%   r   r   r+   c          	      >   t                                                       |j        j        }|j        }|| _        || _        t          |t          |d                    | _        t                      j
        rt          |j        |j        |t          |d                    | _        |j        r| j        j        j        | j        _        t%          |dd          }t'          |j        |          | _        nt+                      | _        | j        j        | _        d S )Nmodel)r   r+   r   )r)   r+   logit_scaleg      ?)scale)r5   r6   r   r   r)   rP   r   r"   transformerr   r   r   r   r&   r   r   r   weightra   r   logits_processorr   r   )r;   r   r+   rP   r)   r   r<   s         r=   r6   zExaoneForCausalLM.__init__  s!   )3"/(&#00
 
 
 >>& 	,)!")#FI66	  DL ) B&*&6&:&A#!&-==K$3!% % %D!! *++DL < 	,,,r>   r   r,   c                 6    | j                             |          S r@   )r   r   r   s     r=   r   z!ExaoneForCausalLM.embed_input_ids  s    z)))444r>   Nrk   r   r   c                 6    |                      ||||          }|S r@   )r   )r;   r   rk   r   r   model_outputs         r=   rD   zExaoneForCausalLM.forward  s+     ''y"6
 
 r>   rl   c                 <    |                      | j        |          }|S r@   )r   r   )r;   rl   logitss      r=   compute_logitsz ExaoneForCausalLM.compute_logits  s      &&t|]CCr>   r   c                 l    t          | | j        j        rdgnd           }|                    |          S )Nzlm_head.)skip_prefixes)r   rP   r   r   )r;   r   loaders      r=   r   zExaoneForCausalLM.load_weights  sE    " ,0;+JTJ<<PT
 
 
 ""7+++r>   )NN)rE   rF   rG   packed_modules_mappingembedding_modulesr
   rI   r6   ry   rz   r   r   rD   r   r   r   r   r   rK   rL   s   @r=   r   r     s       
 
 
 

 
 "& 
 BD  
  
  
z  
3  
  
  
  
  
  
D5 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,r>   r   ):__doc__collections.abcr   	itertoolsr   ry   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   vllm.distributedr   r   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   
interfacesr   r   utilsr   r   r   r    r!   r"   Moduler$   rN   r|   r   r   r    r>   r=   <module>r      sP  4 G F $ $ $ $ $ $              ) ) ) ) ) ) * * * * * * = = = = = = / / / / / / / / O O O O O O O O < < < < < < 8 8 8 8 8 8         
 H G G G G G F F F F F F @ @ @ @ @ @               . - - - - - 0 0 0 0 0 0 0 0               # # # # #RY # # #LV V V V Vbi V V Vr"
 "
 "
 "
 "
29 "
 "
 "
J<' <' <' <' <' <' <' <'~ L L L L L") L L L^S, S, S, S, S,	< S, S, S, S, S,r>   