
    .`i	F                        d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZmZ dd	lmZmZ dd
lmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddl m!Z!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z(m)Z) ddl*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0 d Z1 G d dej2                  Z3 G d dej4                  Z5 G d dej4                  Z6 G d dej4                  Z7e
 G d d ej4                              Z8 G d! d"ej4        e(e)          Z9dS )#zBInference-only Nemotron model compatible with HuggingFace weights.    )Iterable)isliceN)nn)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
get_act_fn)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors)NemotronConfig   )SupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayeris_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                      t          j                    s| S t           j        j                            | dt          j                              S )Ncuda)device_typedtype)torchis_autocast_enabledampautocast_mode_castget_autocast_gpu_dtype)argss    w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/nemotron.py_cast_if_autocast_enabledr.   J   sJ    $&& 
y&,,fE,H,J,J - 
 
 	
    c            	            e Zd Z	 	 	 	 	 ddeee         z  ej        z  dededef fdZ		 dd	ej
        d
ej
        dz  dej
        fdZ xZS )NemotronLayerNorm1Ph㈵>TNnormalized_shapeepselementwise_affinebiasc                 T    t                                          ||||||           d S N)super__init__)selfr3   r4   r5   r6   devicer%   	__class__s          r-   r:   zNemotronLayerNorm1P.__init__T   s0     	)30BD&RWXXXXXr/   xresidualreturnc                    |||z   }|}t          || j        | j        dz   | j        | j                  }t
          j                            dd          5  t          j        j	        j
        | }||n||fcd d d            S # 1 swxY w Y   d S )Nr   r#   F)enabled)r.   r3   weightr6   r4   r&   r(   autocastr   
functional
layer_norm)r;   r>   r?   r,   s       r-   forwardzNemotronLayerNorm1P.forward_   s    
 HAH(t$dkAoty$(
 
 Y66 	< 	<#.5A (11q(m	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	<s    BB	B)r2   TTNNr8   )__name__
__module____qualname__intlistr&   Sizefloatboolr:   TensorrG   __classcell__r=   s   @r-   r1   r1   S   s         #'	Y 	YS	/EJ6	Y 	Y !		Y
 	Y 	Y 	Y 	Y 	Y 	Y )-< <<< ,%< 
	< < < < < < < <r/   r1   c                   P     e Zd Z	 	 	 ddededededz  ded	ed
df fdZd Z xZ	S )NemotronMLPNF hidden_sizeintermediate_size
hidden_actquant_configr6   prefixr@   c                     t                                                       t          ||||| d          | _        t	          ||||| d          | _        t          |          | _        d S )Nz.up_proj
input_sizeoutput_sizer6   rY   rZ   z
.down_proj)r9   r:   r   up_projr   	down_projr   act_fn)r;   rV   rW   rX   rY   r6   rZ   r=   s          r-   r:   zNemotronMLP.__init__p   s     	+")%&&&
 
 
 +(#%(((
 
 
 !,,r/   c                     |                      |          \  }}|                     |          }|                     |          \  }}|S r8   )r_   ra   r`   )r;   r>   up_s       r-   rG   zNemotronMLP.forward   s>    QAKKOO~~a  1r/   )NFrU   )
rH   rI   rJ   rK   strr   rO   r:   rG   rQ   rR   s   @r-   rT   rT   o   s         37- -- - 	-
 )4/- - - 
- - - - - -4      r/   rT   c                        e Zd Z	 	 	 	 	 ddedededed	ed
edz  dededz  deddf fdZ	de
j        de
j        de
j        fdZ xZS )NemotronAttention    NFrU   configrV   	num_headsnum_kv_headsmax_position_embeddingsrY   r6   cache_configrZ   r@   c
           
         t                                                       || _        t                      }
|| _        | j        |
z  dk    sJ | j        |
z  | _        || _        | j        |
k    r| j        |
z  dk    sJ n|
| j        z  dk    sJ t          d| j        |
z            | _        t          |dd           | _
        | j
        | j        | j        z  | _
        | j        | j
        z  | _        | j        | j
        z  | _        | j
        dz  | _        || _        t          || j
        | j        | j        |||	 d          | _        t#          | j        | j
        z  ||||	 d          | _        t'          | j
        ||j        	          | _        t-          | j        | j
        | j        | j        |||	 d
          | _        d S )Nr   r   head_dimg      	.qkv_proj)rV   	head_sizetotal_num_headstotal_num_kv_headsr6   rY   rZ   z.o_projr\   )max_positionrope_parametersz.attn)rk   rm   rY   rZ   )r9   r:   rV   r   rr   rj   rs   maxrk   getattrro   q_sizekv_sizescalingrl   r   qkv_projr   o_projr   ru   
rotary_embr   attn)r;   ri   rV   rj   rk   rl   rY   r6   rm   rZ   tp_sizer=   s              r-   r:   zNemotronAttention.__init__   s
    	&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF
D99=  ,0DDDMnt}4(4=8}d*'>$)#m 0#6%'''
 
 
 (+dm;#%%%%
 
 
 #M0"2
 
 

 NML*%%###
 
 
			r/   	positionshidden_statesc                 "   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )N)dim)r{   splitrx   ry   r}   r~   r|   )
r;   r   r   qkvrd   qkvattn_outputoutputs
             r-   rG   zNemotronAttention.forward   s    
 }--Q))T[$,E2)NN1ay!Q//1ii1a((KK,,	r/   )rh   NFNrU   )rH   rI   rJ   r   rK   r   rO   r   re   r:   r&   rP   rG   rQ   rR   s   @r-   rg   rg      s        (,26+/C
 C
C
 C
 	C

 C
 "%C
 )4/C
 C
 "D(C
 C
 
C
 C
 C
 C
 C
 C
J
<
 |
 
	
 
 
 
 
 
 
 
r/   rg   c                        e Zd Z	 	 	 ddededz  dedz  deddf
 fdZd	ej	        d
ej	        dej	        dz  de
ej	        ej	        f         fdZ xZS )NemotronDecoderLayerNrU   ri   rm   rY   rZ   r@   c                 8   t                                                       |j        | _        t          |dd          }t          |dd          pt          |dd          }t	          || j        |j        t          |d|j                  ||||| d	  	        | _        t          | j        |j        |j	        |t          |d	d          | d
          | _
        t          |j        |j                  | _        t          |j        |j                  | _        d S )Nrl   rh   attention_biasFr6   num_key_value_headsz
.self_attn)	ri   rV   rj   rk   rl   rY   r6   rm   rZ   mlp_biasz.mlp)rV   rW   rX   rY   r6   rZ   r4   )r9   r:   rV   rw   rg   num_attention_heads	self_attnrT   rW   rX   mlpr1   norm_epsinput_layernormpost_attention_layernorm)r;   ri   rm   rY   rZ   rl   r   r=   s          r-   r:   zNemotronDecoderLayer.__init__   sQ    	!-")&2KT"R"R !)95AA 
WFEF
 F
 +(0 -v/I  %<%%(((
 
 
 ($6(%U33???
 
 
  3FO 
  
  
 )<FO)
 )
 )
%%%r/   r   r   r?   c                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS )N)r   r   )r   r   r   r   )r;   r   r   r?   s       r-   rG   zNemotronDecoderLayer.forward  s     $H 00??MM&*&:&:=(&S&S#M8' ' 
 
 #'"?"?x"X"Xx//h&&r/   )NNrU   )rH   rI   rJ   r   r   r   re   r:   r&   rP   tuplerG   rQ   rR   s   @r-   r   r      s         ,026)
 )
)
 "D()
 )4/	)

 )
 
)
 )
 )
 )
 )
 )
V'<' |' ,%	'
 
u|U\)	*' ' ' ' ' ' ' 'r/   r   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d	z  d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeej        f                  dee         fdZ xZS )NemotronModelrU   rZ   vllm_configrZ   c                   t                                                       |j        j        |j        |j        | _        | _        j        | _        t                      j	        sj
        r3t                      j        r t          | j        j                  | _        nt                      | _        t!          j        fd| d          \  | _        | _        | _        t                      j        r!t+          j        j                  | _        nt                      | _        t1          ddgj                  | _        d S )Nc                 *    t          |           S )N)ri   rm   rY   rZ   )r   )rZ   rm   ri   rY   s    r-   <lambda>z(NemotronModel.__init__.<locals>.<lambda>@  s#    /))	   r/   z.layersr   r   r   r?   )r9   r:   model_config	hf_configrm   rY   ri   
vocab_sizer
   is_first_ranktie_word_embeddingsis_last_rankr   rV   embed_tokensr   r    num_hidden_layersstart_layer	end_layerlayersr1   r   normr   make_empty_intermediate_tensors)r;   r   rZ   rm   ri   rY   r=   s      @@@r-   r:   zNemotronModel.__init__)  s]   )3"/"/( +>>' 	1&	1+7>>+F	1 !7"! !D
 !/ 0 0D8C$      %%%	9
 	9
 	9
5$.$+ >>& 	)+F,>FOTTTDII&((DI/Vj)6+=0
 0
,,,r/   	input_idsr@   c                 ,    |                      |          S r8   )r   r;   r   s     r-   embed_input_idszNemotronModel.embed_input_idsP  s      +++r/   Nr   intermediate_tensorsinputs_embedsc                 p   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S | 	                    ||          \  }}|S )Nr   r?   )r   r?   )
r
   r   r   r   r   r   r   r   r   r   )	r;   r   r   r   r   r   r?   layerrd   s	            r-   rG   zNemotronModel.forwardS  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDK)94>JJ 	P 	PE&+eI}h&O&O#M88~~* 	&"/XFF    99]H==qr/   weightsc                 b   g d}t          |                                           }t                      }|D ]w\  }}| j        ~| j                            |          x}rb||         }t          |dt                    }	|                                dk    r|n|d         } |	||           |                    |           |D ]i\  }
}}||vr|	                    ||
          }|
                    d          r||vr;t          ||           rL||         }|j        }	 |	|||            nk|
                    d          r||vrt          ||          }|&t          ||           r8||         }t          |dt                    }	 |	||           |                    |           y|S )N))rp   z.q_projr   )rp   z.k_projr   )rp   z.v_projr   weight_loaderr   z.bias)dictnamed_parameterssetrY   get_cache_scalerw   r   r   addreplaceendswithr   r   r   )r;   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr   
param_nameweight_nameshard_ids                r-   load_weightszNemotronModel.load_weightsp  s   "
 "
 "
 4002233"%%%#* ,	$ ,	$D- ,"/??EEE
 - $J/ '@U V V%2%6%6%8%8A%=%=MM=QRCS  e]333!!*---5K 4 41
Kd**||K<<==)) d+.E.E*466 #D) % 3e]H=== ==)) d+.E.E0{CC<*466 #D) '@U V Ve]333d####r/   r8   )rH   rI   rJ   r	   re   r:   r&   rP   r   r   rG   r   r   r   r   rQ   rR   s   @r-   r   r   '  s!       AC %
 %
 %
z %
3 %
 %
 %
 %
 %
 %
N, ,%, , , , , .2 <$& < 2D8	
 |d* 
+	+   :6HU33D-E$F 63s8 6 6 6 6 6 6 6 6r/   r   c                   0    e Zd Zdg diZdddZddded	ef fd
Zdej	        dej	        fdZ
	 	 ddej	        dej	        dedz  dej	        dz  dej	        ez  f
dZdej	        dej	        dz  fdZdeeeej	        f                  dee         fdZ xZS )NemotronForCausalLMr{   )q_projk_projv_projinput_embeddingsoutput_embeddings)r   lm_headrU   r   r   rZ   c          	      l   t                                                       |j        j        }|j        }t          |t                    sJ || _        || _        t          |t          |d                    | _
        t                      j        rt          |j        |j        |t          |d                    | _        |j        r| j
        j        j        | j        _        t)          |dd          }t+          |j        |          | _        nt/                      | _        | j
        j        | _        d S )Nmodel)r   rZ   r   )rY   rZ   logit_scaleg      ?)scale)r9   r:   r   r   rY   
isinstancer   ri   r   r!   r   r
   r   r   r   rV   r   r   r   rC   rw   r   logits_processorr   r   )r;   r   rZ   ri   rY   r   r=   s         r-   r:   zNemotronForCausalLM.__init__  s3   )3"/&.11111("#L,I,I
 
 

 >>& 	,)!")#FI66	  DL ) E&*j&=&D#!&-==K$3!% % %D!! *++DL J6 	,,,r/   r   r@   c                 6    | j                             |          S r8   )r   r   r   s     r-   r   z#NemotronForCausalLM.embed_input_ids  s    z)))444r/   Nr   r   r   c                 6    |                      ||||          }|S r8   )r   )r;   r   r   r   r   model_outputs         r-   rG   zNemotronForCausalLM.forward  s)     zzy"6
 
 r/   r   c                 <    |                      | j        |          }|S r8   )r   r   )r;   r   logitss      r-   compute_logitsz"NemotronForCausalLM.compute_logits  s      &&t|]CCr/   r   c                 J    t          |           }|                    |          S r8   )r   r   )r;   r   loaders      r-   r   z NemotronForCausalLM.load_weights  s#    "4((""7+++r/   )NN)rH   rI   rJ   packed_modules_mappingembedding_modulesr	   re   r:   r&   rP   r   r   rG   r   r   r   r   r   rQ   rR   s   @r-   r   r     s        
 
 
 +& 
 BD !
 !
 !
z !
3 !
 !
 !
 !
 !
 !
F5 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,r/   r   ):__doc__collections.abcr   	itertoolsr   r&   r   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr   r	   vllm.distributedr
   r   %vllm.model_executor.layers.activationr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   vllm.transformers_utils.configsr   
interfacesr   r   utilsr   r   r   r   r    r!   r.   	LayerNormr1   ModulerT   rg   r   r   r    r/   r-   <module>r      sT  2 I H $ $ $ $ $ $              * * * * * * = = = = = = / / / / / / / / O O O O O O O O < < < < < <         
 H G G G G G F F F F F F @ @ @ @ @ @               . - - - - - : : : : : : 0 0 0 0 0 0 0 0                
 
 
< < < < <", < < <8    ")   DP P P P P	 P P Pf@' @' @' @' @'29 @' @' @'F ~ ~ ~ ~ ~BI ~ ~ ~BJ, J, J, J, J,")\: J, J, J, J, J,r/   