
    .`i>                        d dl mZ d dlmZ d dlmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ d dlmZmZ d dlmZmZ d dlmZ ddlm Z m!Z! ddl"m#Z#m$Z$m%Z%m&Z&m'Z'  G d dej(                  Z) G d dej(                  Z*e G d dej(                              Z+ G d dej(        e e!          Z,dS )    )Iterable)islice)AnyN)nn)LlamaConfig)support_torch_compile)get_pp_group)ReLUSquaredActivation)RMSNorm)ColumnParallelLinearRowParallelLinear)LogitsProcessor)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors   )SupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayeris_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersc                   x     e Zd ZdZ	 	 	 	 ddededed	edz  d
edededdf fdZde	j
        de	j
        fdZ xZS )ArceeMLPzQFeed-forward layer for Arcee using ReLU^2 activation
    (no gating as in LLaMA).NF Thidden_sizeintermediate_size
hidden_actquant_configbiasprefixreduce_resultsreturnc           	         t                                                       t          ||||| d          | _        t	          |||||| d          | _        |dk    rt          d| d          t                      | _        d S )Nz.up_proj)
input_sizeoutput_sizer#   r"   r$   z
.down_proj)r(   r)   r#   r"   r%   r$   relu2zUnsupported activation: z$. Only 'relu2' is supported for AFM.)	super__init__r   up_projr   	down_proj
ValueErrorr
   act_fn)	selfr   r    r!   r"   r#   r$   r%   	__class__s	           t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/arcee.pyr,   zArceeMLP.__init__1   s     	 ,")%&&&
 
 
 +(#%)(((
 
 
   5: 5 5 5  
 ,--    xc                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r-   r0   r.   )r1   r5   _s      r3   forwardzArceeMLP.forwardV   s>    ||A1KKNN~~a  1r4   )NFr   T)__name__
__module____qualname____doc__intstrr   boolr,   torchTensorr9   __classcell__r2   s   @r3   r   r   -   s            $(##. #.#. #. 	#.
 Dj#. #. #. #. 
#. #. #. #. #. #.J %,        r4   r   c                        e Zd ZdZ	 	 	 ddededz  dedz  deddf
 fd	Zd
ej	        dej	        dej	        dz  de
ej	        ej	        f         fdZ xZS )ArceeDecoderLayerzLTransformer decoder block for Arcee, with self-attention and
    ReLU^2 MLP.Nr   configcache_configr"   r$   r&   c                    t                                                       |j        | _        t          |dd          }t          |dd          pt          |dd          }|}t	          |d          r|j        }ddlm}  ||| j        |j        t          |d	|j                  |||||| d
t          |dd                    | _	        t          | j        |j        |j        |t          |dd          | d          | _        t          |j        |j                  | _        t          |j        |j                  | _        d S )Nmax_position_embeddingsi    attention_biasFr#   qkv_biasr   )LlamaAttentionnum_key_value_headsz
.self_attn	attn_typedecoder)rG   r   	num_headsnum_kv_headsrJ   r"   r#   bias_o_projrH   r$   rO   mlp_biasz.mlp)r   r    r!   r"   r#   r$   eps)r+   r,   r   getattrhasattrrL    vllm.model_executor.models.llamarM   num_attention_heads	self_attnr   r    r!   mlpr   rms_norm_epsinput_layernormpost_attention_layernorm)
r1   rG   rH   r"   r$   rJ   rK   rS   rM   r2   s
            r3   r,   zArceeDecoderLayer.__init__a   s    	!-")&2KT"R"R )95AA 
WFEF
 F
 %6:&& 	-#_N	
 	
 	
 	
 	
 	
 ((0 -v/I  %<%#%(((Y 
 
 
$ ($6(%U33???
 
 
  'v'9v?RSSS(/F$7)
 )
 )
%%%r4   	positionshidden_statesresidualc                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS )N)r`   ra   )r^   r[   r_   r\   )r1   r`   ra   rb   s       r3   r9   zArceeDecoderLayer.forward   s     $H 00??MM '+&:&:=(&S&S#M8-XX"&"?"?x"X"Xx//h&&r4   )NNr   )r:   r;   r<   r=   r   r   r?   r,   rA   rB   tupler9   rC   rD   s   @r3   rF   rF   ]   s          $(#'5
 5
5
 Dj5
 Dj	5

 5
 
5
 5
 5
 5
 5
 5
n'<' |' ,%	'
 
u|U\)	*' ' ' ' ' ' ' 'r4   rF   c                   J    e Zd ZdZdeddedeej                 ddf fdZ	d	e
j        de
j        fd
Z	 dd	e
j        dz  de
j        dedz  de
j        dz  de
j        ez  ee
j        ee
j                 f         z  f
dZdeeee
j        f                  dee         fdZ xZS )
ArceeModelzeThe transformer model backbone for Arcee (embedding layer + stacked
    decoder blocks + final norm).r   )r$   
layer_typer$   rg   r&   Nc                   t                                                       |j        j        |j        |j        | _        | _        j        | _        t                      j	        sj
        r5t                      j        r"t          | j        j                  | _        nt                      | _        t!          j        fd| d          \  | _        | _        | _        t                      j        r!t+          j        j                  | _        nt                      | _        t1                      | _        t5          ddgj                  | _        d S )N)r"   c                 "     |           S )N)rG   rH   r"   r$    )r$   rH   rG   rg   r"   s    r3   <lambda>z%ArceeModel.__init__.<locals>.<lambda>   s$    ::))	   r4   z.layersr$   rU   ra   rb   )r+   r,   model_config	hf_configrH   r"   rG   
vocab_sizer	   is_first_ranktie_word_embeddingsis_last_rankr   r   embed_tokensr   r   num_hidden_layersstart_layer	end_layerlayersr   r]   normrd   aux_hidden_state_layersr   make_empty_intermediate_tensors)r1   vllm_configr$   rg   rH   rG   r"   r2   s      `@@@r3   r,   zArceeModel.__init__   s    	)6@"/"/( + >>' 		1&		1+7>>+F		1 !7")! ! !D !/ 0 0D 9D$       %%%	9
 	9
 	9
5$.$+ >>& 	) 28KLLLDII&((DI 9>$ 0Wj)6+=0
 0
,,,r4   	input_idsc                 ,    |                      |          S r7   )rs   r1   r|   s     r3   embed_input_idszArceeModel.embed_input_ids   s      +++r4   r`   intermediate_tensorsinputs_embedsc                    t                      j        r||n|                     |          }d }n|
J d            |d         }|d         }g }t          t	          | j        | j        | j                            D ]6\  }}	|| j        v r|	                    ||z               |	|||          \  }}7t                      j
        st          ||d          S |                     ||          \  }}
t          |          dk    r||fS |S )NzAIntermediateTensors must be provided for non-first pipeline ranksra   rb   )ra   rb   r   )r	   rp   r   	enumerater   rw   ru   rv   ry   appendrr   r   rx   len)r1   r|   r`   r   r   ra   rb   aux_hidden_statesidxlayerr8   s              r3   r9   zArceeModel.forward   sa    >>' 	8 !, )))44 
 HH'33S 433 1AM+J7H02#4; 0$.AA
 
 	P 	PJC d222!((!H,   ',eI}h&O&O#M88~~* 	&"/XFF    99]H==q !!A%% "333r4   weightsc                    g d}t          |                                           }t                      }|D ]\  }}d|v rd|v sd|v r| j        ~| j                            |          x}rb||         }t          |dt                    }	|                                dk    r|n|d         } |	||           |                    |           d|v sd	|v rt          ||          }
|
|
}d
}|D ]\  }}}||vr|
                    ||          }|                    d          r||vrd} nHt          ||           rd} n4||         }|j        }	 |	|||           |                    |           d} |rD|                    d          r||vr_t          ||           rq||         }t          |dt                    }	 |	||           |                    |           |S )z:Load weights, mapping q/k/v projections to fused qkv_proj.))	.qkv_projz.q_projq)r   z.k_projk)r   z.v_projvzrotary_emb.inv_freqzrotary_emb.cos_cachedzrotary_emb.sin_cachedNweight_loaderr   scale
zero_pointFz.biasT)dictnamed_parameterssetr"   get_cache_scalerW   r   dimaddr   replaceendswithr   r   )r1   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr   remapped_namemapped
param_nameweight_nameshard_ids                  r3   load_weightszArceeModel.load_weights  s   "
 "
 "
 4002233"%%%#* :	$ :	$D-$,,&$..2IT2Q2Q ,"/??EEE
 - $J/ '@U V V%2%6%6%8%8A%=%=MM=QRCS  e]333!!*---$,$"6"6 9$ L L ($F5K  1
Kd**||K<<==)) d+.E.E!FE*466 !FE#D) % 3e]H===!!$''' }}W%% $k*A*A&tT22 %E#E?<QRRMM%///d####r4   r7   )r:   r;   r<   r=   rF   r?   typer   Moduler,   rA   rB   r   r   rd   listr9   r   r   r   rC   rD   s   @r3   rf   rf      si       % % &74
 4
 4
 	4

 O4
 
4
 4
 4
 4
 4
 4
l, ,%, , , , , .2) )<$&) <) 2D8	)
 |d*) 
+	+eEL$u|BT4T.U	U) ) ) )VGHU33D-E$F G3s8 G G G G G G G Gr4   rf   c                   *    e Zd ZdZdg diZdddeddf fd	Z	 	 dd
ej        dej        de	dz  dej        dz  dej        e	z  f
dZ
dej        dej        dz  fdZd
ej        dej        fdZdeeeej        f                  dee         fdZ xZS )ArceeForCausalLMzKArcee Model for causal language modeling, integrated with vLLM
    runtime.qkv_proj)q_projk_projv_projr   rl   r$   r&   Nc          
      <   t                                                       |j        j        }|| _        t          || d          | _        t                      j        rt          |j
        |j        |j        t          |dd          | d          | _        |j        r)| j                            | j        j                  | _        t          |dd          }t%          |j
        |	          | _        nt)                      | _        | j        j        | _        d S )
Nz.model)r{   r$   lm_head_biasFz.lm_head)r"   r#   r$   logit_scaleg      ?)r   )r+   r,   rm   rn   rG   rf   modelr	   rr   r   ro   r   r"   rW   lm_headrq   tie_weightsrs   r   logits_processorr   rz   )r1   r{   r$   rG   r   r2   s        r3   r,   zArceeForCausalLM.__init__i  s"   )3  K6@Q@Q@QRRR
>>& 	, *!"(5V^U;; ***  DL ) Q#|77
8OPP!&-==K$3!% % %D!!
 *++DL J6 	,,,r4   r|   r`   r   r   c                 8    |                      ||||          }|S )N)r|   r`   r   r   )r   )r1   r|   r`   r   r   model_outputs         r3   r9   zArceeForCausalLM.forward  s0     zz!5'	 " 
 
 r4   ra   c                 <    |                      | j        |          }|S r7   )r   r   )r1   ra   logitss      r3   compute_logitszArceeForCausalLM.compute_logits  s    &&t|]CCr4   c                 6    | j                             |          S r7   )r   r   r~   s     r3   r   z ArceeForCausalLM.embed_input_ids  s    z)))444r4   r   c                 p    t          | | j        j        rdgnddg          }|                    |          S )z[Load weights into the model (delegates to inner model and handles
        tied embeddings).zlm_head.N	gate_proj)skip_prefixesskip_substrs)r   rG   rq   r   )r1   r   loaders      r3   r   zArceeForCausalLM.load_weights  sJ     #+/;+JTJ<<PT%
 
 
 ""7+++r4   )NN)r:   r;   r<   r=   packed_modules_mappingr?   r,   rA   rB   r   r9   r   r   r   rd   r   r   rC   rD   s   @r3   r   r   _  sm         	222 68  
  
  
s  
D  
  
  
  
  
  
L <@-1 < < 2D8	
 |d* 
+	+   EL U\D=P    
5 5%, 5 5 5 5
,HU33D-E$F 
,3s8 
, 
, 
, 
, 
, 
, 
, 
,r4   r   )-collections.abcr   	itertoolsr   typingr   rA   r   transformersr   vllm.compilation.decoratorsr   vllm.distributedr	   %vllm.model_executor.layers.activationr
   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   +vllm.model_executor.layers.logits_processorr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   
interfacesr   r   utilsr   r   r   r   r   r   r   rF   rf   r   rj   r4   r3   <module>r      s   % $ $ $ $ $                    $ $ $ $ $ $ = = = = = = ) ) ) ) ) ) G G G G G G 8 8 8 8 8 8 U U U U U U U U G G G G G G               . - - - - - 0 0 0 0 0 0 0 0             - - - - -ry - - -`L' L' L' L' L'	 L' L' L'^ o o o o o o o odM, M, M, M, M,ry,
 M, M, M, M, M,r4   