
    .`i3                        d Z ddlmZ ddlmZ ddlmZ ddlZddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
lmZmZ ddlmZmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+m,Z,m-Z-m.Z.m/Z/  G d dej0                  Z1 G d dej0                  Z2 G d dej0                  Z3e G d dej0                              Z4 G d d ej0        e)          Z5dS )!zCInference-only Orion-14B model compatible with HuggingFace weights.    )Iterable)islice)AnyN)nn)PretrainedConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
SiluAndMul)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )
SupportsPP)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   J     e Zd Z	 	 ddededededz  deddf fd	Zd
 Z xZS )OrionMLPN hidden_sizeintermediate_size
hidden_actquant_configprefixreturnc                    t                                                       t          ||gdz  d|| d          | _        t	          ||d|| d          | _        |dk    rt          d| d          t                      | _        d S )	N   Fz.gate_up_projbiasr&   r'   z
.down_projsiluzUnsupported activation: z!. Only silu is supported for now.)	super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fn)selfr#   r$   r%   r&   r'   	__class__s         t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/orion.pyr/   zOrionMLP.__init__1   s     	6!#%+++
 
 
 +%(((
 
 
 X:XXX   !ll    c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r0   r3   r1   )r4   xgate_up_s       r6   forwardzOrionMLP.forwardN   sD    &&q))
KK  ~~a  1r7   )Nr"   )	__name__
__module____qualname__intstrr   r/   r=   __classcell__r5   s   @r6   r!   r!   0   s         37# ## # 	#
 )4/# # 
# # # # # #:      r7   r!   c                        e Zd Z	 	 	 	 	 ddedededeeef         dz  ded	edz  d
edz  deddf fdZ	de
j        de
j        de
j        fdZ xZS )OrionAttentionN    r"   r#   	num_headsnum_kv_headsrope_parametersmax_position_embeddingscache_configr&   r'   r(   c	           
      @   t                                                       || _        t                      }	|| _        | j        |	z  dk    sJ | j        |	z  | _        || _        | j        |	k    r| j        |	z  dk    sJ n|	| j        z  dk    sJ t          d| j        |	z            | _        || j        z  | _	        | j        | j	        z  | _
        | j        | j	        z  | _        | j	        dz  | _        || _        t          || j	        | j        | j        d|| d          | _        t!          | j        | j	        z  |d|| d          | _        t%          | j	        ||          | _        t)          | j        | j	        | j        | j        ||| d	
          | _        d S )Nr   r   g      Fz	.qkv_projr+   z.o_proj)max_positionrJ   z.attn)rI   rL   r&   r'   )r.   r/   r#   r   total_num_headsrH   total_num_kv_headsmaxrI   head_dimq_sizekv_sizescalingrK   r   qkv_projr   o_projr   
rotary_embr   attn)r4   r#   rH   rI   rJ   rK   rL   r&   r'   tp_sizer5   s             r6   r/   zOrionAttention.__init__V   s    	&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF#t';;nt}4(4=8}d*'>$)M #%'''
 
 
 ( 4=0%%%%
 
 
 #M0+
 
 

 NML*%%###
 
 
			r7   	positionshidden_statesc                 "   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )N)dim)rV   splitrS   rT   rX   rY   rW   )
r4   r[   r\   qkvr<   qkvattn_outputoutputs
             r6   r=   zOrionAttention.forward   s    
 }--Q))T[$,E2)NN1ay!Q//1ii1a((KK,,	r7   )NrG   NNr"   )r>   r?   r@   rA   dictrB   r   r
   r   r/   torchTensorr=   rC   rD   s   @r6   rF   rF   U   s         26'++/26?
 ?
?
 ?
 	?

 c3h$.?
 "%?
 "D(?
 )4/?
 ?
 
?
 ?
 ?
 ?
 ?
 ?
B
<
 |
 
	
 
 
 
 
 
 
 
r7   rF   c                        e Zd Z	 	 	 ddededz  dedz  deddf
 fdZd	ej	        d
ej	        de
ej	        ej	        f         fdZ xZS )OrionDecoderLayerNr"   configrL   r&   r'   r(   c                    t                                                       |j        | _        t          |dd          }t	          | j        |j        |j        |j        |||| d          | _        t          | j        |j
        |j        || d          | _        t          j        |j        |j                  | _        t          j        |j        |j                  | _        d S )NrK   rG   z
.self_attn)r#   rH   rI   rJ   rK   rL   r&   r'   z.mlp)r#   r$   r%   r&   r'   eps)r.   r/   r#   getattrrF   num_attention_headsnum_key_value_headsrJ   	self_attnr!   r$   r%   mlpr   	LayerNormrms_norm_epsinput_layernormpost_attention_layernorm)r4   rl   rL   r&   r'   rK   r5   s         r6   r/   zOrionDecoderLayer.__init__   s     	!-")&2KT"R"R'(03"2$;%%(((	
 	
 	
 ($6(%???
 
 
  "|F,>FDWXXX(*F$7)
 )
 )
%%%r7   r[   r\   c                     |}|                      |          }|                     ||          }||z   }|}|                     |          }|                     |          }||z   }|S )N)r[   r\   )rw   rs   rx   rt   )r4   r[   r\   residuals       r6   r=   zOrionDecoderLayer.forward   s     !,,];;' ' 
 

 !=0 !55mDD// =0r7   )NNr"   )r>   r?   r@   r   r
   r   rB   r/   rh   ri   tupler=   rC   rD   s   @r6   rk   rk      s         ,026
 
 
 "D(
 )4/	

 
 

 
 
 
 
 
B< | 
u|U\)	*	       r7   rk   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeej        f                  dee         fdZ xZS )
OrionModelr"   r'   vllm_configr'   c                   t                                                       |j        j        |j        |j        | _        j        | _        t          j        j	                  | _
        t          j        fd| d          \  | _        | _        | _        t!          j        j	        j                  | _        t)          dgj	                  | _        d S )Nc                 *    t          |           S )Nr~   )rk   )r'   rL   rl   r&   s    r6   <lambda>z%OrionModel.__init__.<locals>.<lambda>   s     ,l6   r7   z.layersr~   rn   r\   )r.   r/   model_config	hf_configrL   r&   rl   
vocab_sizer   r#   embed_tokensr   num_hidden_layersstart_layer	end_layerlayersr   ru   rv   normr   make_empty_intermediate_tensors)r4   r   r'   rL   rl   r&   r5   s      @@@r6   r/   zOrionModel.__init__   s   )3"/"/ +2
 
 9D$      %%%9
 9
 9
5$.$+ L!39LMMM	/V 	0
 0
,,,r7   	input_idsr(   c                 ,    |                      |          S r9   )r   r4   r   s     r6   embed_input_idszOrionModel.embed_input_ids   s      +++r7   Nr[   intermediate_tensorsinputs_embedsc                 J   t                      j        r||}n"|                     |          }n|J |d         }t          | j        | j        | j                  D ]} |||          }t                      j        st          d|i          S | 	                    |          }|S )Nr\   )
r   is_first_rankr   r   r   r   r   is_last_rankr   r   )r4   r   r[   r   r   r\   layers          r6   r=   zOrionModel.forward   s     >>' 	B( - $ 4 4Y ? ?'3330AMDK)94>JJ 	< 	<E!E)];;MM~~* 	&#]  
 		-00r7   weightsc                 (   g d}t          |                                           }t                      }|D ]\  }}|D ]i\  }}}	||vr|                    ||          }|                    d          r||vr;t          ||           rL||         }
|
j        } ||
||	            nU|                    d          r||vrt          ||           r||         }
t          |
dt                    } ||
|           |	                    |           |S )N))rV   q_projrb   )rV   k_projrc   )rV   v_projrd   )r0   	gate_projr   )r0   up_projr   z.biasweight_loader)
rg   named_parameterssetreplaceendswithr   r   rp   r   add)r4   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamr   s               r6   load_weightszOrionModel.load_weights  sg   "
 "
 "
 4002233"%%%#* 	$ 	$D-5K 4 41
Kd**||K<<==)) d+.E.E*466 #D) % 3e]H=== ==)) d+.E.E*466 #D) '@U V Ve]333d####r7   r9   )r>   r?   r@   r   rB   r/   rh   ri   r   r   r=   r   r{   r   r   rC   rD   s   @r6   r}   r}      s       AC 
 
 
z 
3 
 
 
 
 
 
8, ,%, , , , , .2 < < 2D8	
 |d* 
+	+   4#HU33D-E$F #3s8 # # # # # # # #r7   r}   c                       e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
dej        dej        d	z  fdZdeeeej        f                  dee         fdZ xZS )OrionForCausalLMr"   r~   r   r'   c          	         t                                                       |j        j        }|j        }|| _        || _        t          |t          |d                    | _        t          |j
        |j        |t          |d                    | _        | j        j        r| j        j        j        | j        _        t!          |j
                  | _        | j        j        | _        d S )Nmodel)r   r'   lm_head)r&   r'   )r.   r/   r   r   r&   rl   r}   r   r   r   r   r#   r   tie_word_embeddingsr   weightr   logits_processorr   )r4   r   r'   rl   r&   r5   s        r6   r/   zOrionForCausalLM.__init__?  s    )3"/(#L,I,I
 
 

 &%	22	
 
 
 ;* 	A"&*"9"@DL /0A B BJ6 	,,,r7   r   r(   c                 6    | j                             |          S r9   )r   r   r   s     r6   r   z OrionForCausalLM.embed_input_idsU  s    z)))444r7   Nr[   r   r   c                 6    |                      ||||          }|S r9   )r   )r4   r   r[   r   r   r\   s         r6   r=   zOrionForCausalLM.forwardX  s)     

y"6
 
 r7   r\   c                 <    |                      | j        |          }|S r9   )r   r   )r4   r\   logitss      r6   compute_logitszOrionForCausalLM.compute_logitsd  s      &&t|]CCr7   r   c                 J    t          |           }|                    |          S r9   )r   r   )r4   r   loaders      r6   r   zOrionForCausalLM.load_weightsk  s#    "4((""7+++r7   )NN)r>   r?   r@   r   rB   r/   rh   ri   r   r   r=   r   r   r{   r   r   rC   rD   s   @r6   r   r   >  sN       AC 
 
 
z 
3 
 
 
 
 
 
,5 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,r7   r   )6__doc__collections.abcr   	itertoolsr   typingr   rh   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr	   vllm.configr
   r   vllm.distributedr   r   %vllm.model_executor.layers.activationr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   utilsr   r   r   r   r   Moduler!   rF   rk   r}   r    r7   r6   <module>r      s   J I $ $ $ $ $ $                    ) ) ) ) ) ) * * * * * * = = = = = = / / / / / / / / O O O O O O O O < < < < < <         
 H G G G G G F F F F F F @ @ @ @ @ @        P O O O O O - - - - - - " " " " " "             " " " " "ry " " "JL L L L LRY L L L^6 6 6 6 6	 6 6 6r ] ] ] ] ] ] ] ]@/, /, /, /, /,ry* /, /, /, /, /,r7   