
    .`iIB                        d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddlmZmZ ddlmZm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z&m'Z'm(Z( ddl)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/ de0de1de1fdZ2de1de1de1fdZ3 G d de          Z4 G d d ej5                  Z6e
 G d! d"ej5                              Z7 G d# d$ej5        e'e(e&          Z8dS )%z>Inference-only deci model compatible with HuggingFace weights.    )Iterable)isliceN)nn)LlamaConfig)support_torch_compile)CacheConfig
VllmConfig)get_pp_group)RMSNorm)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)LlamaAttentionLlamaMLP)IntermediateTensors)AttentionType   )HasNoOpsSupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayeris_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixffn_multn_embdreturnc                 R    t          d| z  |z  dz            }t          |d          S )N         )int_find_multiple)r!   r"   intermediate_sizes      {/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/nemotron_nas.py_ffn_mult_to_intermediate_sizer,   @   s/    AL61A566+S111    nkc                 .    | |z  dk    r| S | |z   | |z  z
  S )Nr    )r.   r/   s     r+   r)   r)   F   s&    1uzzq5AE?r-   c                        e Zd Zddddddej        fdedededed	ed
edz  dedede	dz  de
de
ddf fdZd
edz  ddfdZ xZS )DeciLMAttention    NF confighidden_size	num_headsnum_kv_headsmax_position_embeddingsquant_configbiasbias_o_projcache_configprefix	attn_typer#   c                 ^    t                                          |||||||||	|
|           d S N)super__init__)selfr6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   	__class__s               r+   rD   zDeciLMAttention.__init__N   sL     	#	
 	
 	
 	
 	
r-   c                     d}t          |d          r	|j        dv}t          | j        | j        |j        |          | _        d S )NTposition_embedding_type)mistral_yarnrope_llama4)max_positionrope_parametersis_neox_style)hasattrrH   r   head_dimr:   rL   
rotary_emb)rE   r6   r;   rM   s       r+   _init_rotary_embz DeciLMAttention._init_rotary_embj   s_     6455 	": C M
 #M5"2'	
 
 
r-   )__name__
__module____qualname__r   DECODERr   r(   r   boolr   strrD   rQ   __classcell__rF   s   @r+   r3   r3   M   s	        (,26!+/&.
 

 
 	

 
 "%
 )4/
 
 
 "D(
 
 
 

 
 
 
 
 
8
 )4/
 
	
 
 
 
 
 
 
 
r-   r3   c                        e Zd Z	 	 	 ddedededz  dedz  deddf fd	Zd
e	j
        de	j
        de	j
        dz  dee	j
        e	j
        f         fdZ xZS )DeciLMDecoderLayerNr5   r6   	layer_idxr>   r;   r?   r#   c                 f   t                                                       |j        |         }|j        j        | _        |j        j        | _        |j        | _        t          |dd          }t          |dd          pt          |dd          }|}	t          |d          r|j        }| j        s_|j        |j        j        z  }
t          || j        |j        |
||||	|| d
  
        | _        t!          |j        |j        	          | _        | j        st          |j        d
          r"|j        j        }t)          ||j                  }n|j        j        }t-          | j        ||j        |t          |dd          | d          | _        t!          |j        |j        	          | _        d S d S )Nr:   r4   attention_biasFr<   qkv_biasz
.self_attn)
r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   epsr!   mlp_biasz.mlp)r7   r*   
hidden_actr;   r<   r?   )rC   rD   block_configs	attentionno_op_is_no_op_attentionffn_is_no_op_ffnr7   getattrrN   r_   num_attention_headsn_heads_in_groupr3   	self_attnr   rms_norm_epsinput_layernormr!   r,   r*   r   rc   mlppost_attention_layernorm)rE   r6   r\   r>   r;   r?   block_configr:   r^   r=   r9   r!   r*   rF   s                r+   rD   zDeciLMDecoderLayer.__init__   s    	+I6#/#9#? )-3!-")&2KT"R"R !)95AA 
WFEF
 F
 %6:&& 	-#_N' 	X*l.D.UU  - , 4)(?)#') ,,,  DN $+6+=6CV#W#W#WD ! 	|'44 G'+4$Bf0% %!! %1$4$F! ,"3!,)VZ77   DH -4"(;- - -D)))#	 	r-   	positionshidden_statesresidualc                    | j         rnJ||}|                     |          }n|                     ||          \  }}|                     ||          }| j        s.|                     ||          \  }}|                     |          }||fS )N)rs   rt   )rg   ro   rm   ri   rq   rp   )rE   rs   rt   ru   s       r+   forwardzDeciLMDecoderLayer.forward   s     # 	( $ 4 4] C C*.*>*>}h*W*W'x NN#+ +  M ! 	4&*&C&Cx' '#M8 !HH]33Mh&&r-   )NNr5   )rR   rS   rT   r   r(   r   r   rW   rD   torchTensortuplerw   rX   rY   s   @r+   r[   r[      s        
 ,026> >> > "D(	>
 )4/> > 
> > > > > >@'<' |' ,%	'
 
u|U\)	*' ' ' ' ' ' ' 'r-   r[   c                       e Zd Zdeddededee         f fdZdej	        dej	        fd	Z
	 ddej	        d
z  dej	        ded
z  dej	        d
z  dej	        ez  f
dZdeeeej	        f                  dee         fdZ xZS )	DeciModelr5   )r?   
layer_typevllm_configr?   r}   c                   t                                                       |j        j        |j        |j        | _        | _        j        | _        j	        | _	        t                      j        sj        r5t                      j        r"t          | j	        j                  | _        nt#                      | _        dt$          ffd}t'          j        || d          \  | _        | _        | _        t                      j        r!t1          j        j                  | _        nt#                      | _        t7          ddgj                  | _        d S )	N)r;   r?   c                 v    t          |                     dd          d                   } ||           S )N.r   r;   r?   )r(   rsplit)r?   r\   r>   r6   r}   r;   s     r+   	get_layerz%DeciModel.__init__.<locals>.get_layer   sK    FMM#q11!455I:)   r-   z.layersr?   r`   rt   ru   )rC   rD   model_config	hf_configr>   r;   r6   pad_token_idpadding_idx
vocab_sizer
   is_first_ranktie_word_embeddingsis_last_rankr   r7   embed_tokensr   rW   r   num_hidden_layersstart_layer	end_layerlayersr   rn   normr   make_empty_intermediate_tensors)	rE   r~   r?   r}   r   r>   r6   r;   rF   s	      ` @@@r+   rD   zDeciModel.__init__   s    	)3"/"/(!. +>>' 		1&		1+7>>+F		1 !7")! ! !D !/ 0 0D	c 	 	 	 	 	 	 	 	 	 9D$%%%9
 9
 9
5$.$+
 >>& 	) 28KLLLDII&((DI/Vj)6+=0
 0
,,,r-   	input_idsr#   c                 ,    |                      |          S rB   )r   rE   r   s     r+   embed_input_idszDeciModel.embed_input_ids  s      +++r-   Nrs   intermediate_tensorsinputs_embedsc                    t                      j        r||}n|                     |          }d }n|J |d         }|d         }d}t          | j        | j        | j                  D ]/}|j        s ||||          \  }}|dz  } ||||          \  }}0t                      j        st          ||d          S | 
                    ||          \  }}	|S )Nrt   ru   r   r   )rt   ru   )r
   r   r   r   r   r   r   rg   r   r   r   )
rE   r   rs   r   r   rt   ru   kv_cache_indexlayer_s
             r+   rw   zDeciModel.forward  s    >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDK)94>JJ 	T 	TE, T*/%	=(*S*S'x!#*/%	=(*S*S'xx~~* 	&"/XFF    99]H==qr-   weightsc                    g d}t          |                                           }t                      }|D ]\  }}d|v rd|v sd|v r| j        ~| j                            |          x}rb||         }t          |dt                    }	|                                dk    r|n|d         } |	||           |                    |           d|v sd|v rt          ||          }||D ]i\  }
}}||vr|
                    ||
          }|                    d	          r||vr;t          ||           rL||         }|j        }	 |	|||            nW|                    d	          r||vr;t          ||           rM||         }t          |dt                    }	 |	||           |                    |           |S )
N))	.qkv_projz.q_projq)r   z.k_projr/   )r   z.v_projv).gate_up_projz
.gate_projr   )r   z.up_projr   zrotary_emb.inv_freqzrotary_emb.cos_cachedzrotary_emb.sin_cachedweight_loaderr   scale
zero_pointz.bias)dictnamed_parameterssetr;   get_cache_scalerj   r   dimaddr   replaceendswithr   r   )rE   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr   
param_nameweight_nameshard_ids                r+   load_weightszDeciModel.load_weights;  sR   "
 "
 "
 4002233"%%%#* 2	$ 2	$D-$,,&$..2IT2Q2Q  ,"/??EEE
 - $J/ '@U V V%2%6%6%8%8A%=%=MM=QRCS  e]333!!*---$,$"6"60{CC<5K 4 41
Kd**||K<<==)) d+.E.E*466 #D) % 3e]H=== ==)) d+.E.E*466 #D) '@U V Ve]333d####r-   rB   )rR   rS   rT   r[   r	   rW   typerD   rx   ry   r   r   rw   r   rz   r   r   rX   rY   s   @r+   r|   r|      s=        /A4
 4
 4
  4
 	4

 +,4
 4
 4
 4
 4
 4
l, ,%, , , , , .2   <$&  <  2D8	 
 |d*  
+	+       D>HU33D-E$F >3s8 > > > > > > > >r-   r|   c                   l    e Zd Zg dddgdZdddZdd	d
dddddddddddddZdddedef fdZd(dedefdZ	de
j        de
j        fdZ	 	 d)de
j        d e
j        d!edz  d"e
j        dz  de
j        ez  f
d#Zd$e
j        de
j        dz  fd%Zd&eeee
j        f                  dee         fd'Z xZS )*DeciLMForCausalLM)q_projk_projv_proj	gate_projup_proj)qkv_projgate_up_projinput_embeddingsoutput_embeddings)r   lm_headzmodel.layersrm   r   r   r   o_projro   rp   	down_projrq   zmodel.embed_tokensr   z
model.norm)r   re   wqwkwvwoattention_normfeed_forwardw1w2w3ffn_normtok_embeddingsoutputr   r5   r   r~   r?   c          	      X   t                                                       |j        j        }|j        }|| _        |                     |t          |d                    | _        t                      j
        rt          |j        |j        |t          |d                    | _        |j        r)| j                            | j        j                  | _        t%          |dd          }t'          |j        |          | _        nt+                      | _        | j        j        | _        d S )Nmodelr~   r?   r   r   logit_scaleg      ?)r   )rC   rD   r   r   r;   r6   _init_modelr    r   r
   r   r   r   r7   r   r   tie_weightsr   rj   r   logits_processorr   r   )rE   r~   r?   r6   r;   r   rF   s         r+   rD   zDeciLMForCausalLM.__init__  s%   )3"/%%#L,I,I & 
 

 >>& 	,)!")#FI66	  DL ) Q#|77
8OPP!&-==K$3!% % %D!! *++DL J6 	,,,r-   c                 $    t          ||          S )Nr   )r|   )rE   r~   r?   s      r+   r   zDeciLMForCausalLM._init_model  s    [@@@@r-   r   r#   c                 6    | j                             |          S rB   )r   r   r   s     r+   r   z!DeciLMForCausalLM.embed_input_ids  s    z)))444r-   Nrs   r   r   c                 6    |                      ||||          }|S rB   )r   )rE   r   rs   r   r   model_outputs         r+   rw   zDeciLMForCausalLM.forward  s)     zzy"6
 
 r-   rt   c                 <    |                      | j        |          }|S rB   )r   r   )rE   rt   logitss      r+   compute_logitsz DeciLMForCausalLM.compute_logits  s      &&t|]CCr-   r   c                 l    t          | | j        j        rdgnd           }|                    |          S )Nzlm_head.)skip_prefixes)r   r6   r   r   )rE   r   loaders      r+   r   zDeciLMForCausalLM.load_weights  sC    "+/;+JTJ<<PT
 
 
 ""7+++r-   )r5   )NN)rR   rS   rT   packed_modules_mappingembedding_modulesmistral_mappingr	   rW   rD   r   rx   ry   r   r   rw   r   r   rz   r   r   rX   rY   s   @r+   r   r   |  s       222$i0  +&  ! +.. O$ BD 
 
 
z 
3 
 
 
 
 
 
@A Az A3 A A A A5 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,r-   r   )9__doc__collections.abcr   	itertoolsr   rx   r   transformersr   vllm.compilation.decoratorsr   vllm.configr   r	   vllm.distributedr
   $vllm.model_executor.layers.layernormr   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r    vllm.model_executor.models.llamar   r   vllm.sequencer   vllm.v1.attention.backendr   
interfacesr   r   r   utilsr   r   r   r   r   r    floatr(   r,   r)   r3   Moduler[   r|   r   r1   r-   r+   <module>r      s  2 E D $ $ $ $ $ $              $ $ $ $ $ $ = = = = = = / / / / / / / / ) ) ) ) ) ) 8 8 8 8 8 8 G G G G G G F F F F F F @ @ @ @ @ @               F E E E E E E E - - - - - - 3 3 3 3 3 3 : : : : : : : : : :               2U 2C 2C 2 2 2 2c c c    /
 /
 /
 /
 /
n /
 /
 /
d\' \' \' \' \' \' \' \'~ Z Z Z Z Z	 Z Z Zz^, ^, ^, ^, ^,	<X ^, ^, ^, ^, ^,r-   