
    .`i
                     ,   d Z ddlZddlmZmZ ddlmZ ddlZddlmZ ddl	m
Z
 ddlmZ ddlmZmZmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddlm Z  ddl!m"Z" ddl#m$Z$m%Z%m&Z&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z-m.Z.m/Z/m0Z0 ddl1m2Z2 ddl3m4Z4m5Z5 ddl6m7Z7m8Z8 ddl9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@ ddlAmBZBmCZCmDZDmEZEmFZFmGZGmHZH ddlImJZJ ddlKmLZL  G d dejM                  ZN G d dejM                  ZO G d  d!ejM                  ZP G d" d#ejM                  ZQ G d$ d%ejM                  ZR G d& d'ejM                  ZS G d( d)ejM                  ZTeRePeTeQd*ZUe G d+ d,ejM                              ZV G d- d.ejM        e:e=e?e;e@e<e>
  
        ZWdS )/zInference-only NemotronH model.    N)CallableIterable)islice)nn)	Attention)support_torch_compile)CacheConfigModelConfig
VllmConfig)ParallelConfig)get_ep_group$get_tensor_model_parallel_world_size) tensor_model_parallel_all_gather)get_pp_group)ReLUSquaredActivation)FusedMoESharedFusedMoE)activation_without_mul)RMSNorm)ColumnParallelLinearQKVParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)MambaMixer2)MambaStateCopyFuncMambaStateCopyFuncCalculatorMambaStateDtypeCalculatorMambaStateShapeCalculator)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)HasInnerStateIsHybridMixtureOfExpertsSupportsLoRASupportsMambaPrefixCaching
SupportsPPSupportsQuant)AutoWeightsLoaderWeightsMapperis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixsequence_parallel_chunk)IntermediateTensors)NemotronHConfigc                   l     e Zd Z	 	 	 	 	 ddededededz  d	ed
edededdf fdZde	j
        fdZ xZS )NemotronHMLPNFT confighidden_sizeintermediate_sizequant_configbiasreduce_resultsis_sequence_parallelprefixreturnc	           
          t                                                       t          |||||| d          | _        t	          ||||||| d          | _        t                      | _        d S )Nz.up_proj
input_sizeoutput_sizer<   r;   
disable_tpr?   z
.down_proj)rC   rD   r<   r;   r=   rE   r?   )super__init__r   up_projr   	down_projr   act_fn)
selfr8   r9   r:   r;   r<   r=   r>   r?   	__class__s
            y/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/nemotron_h.pyrG   zNemotronHMLP.__init__U   s     	+")%+&&&
 
 
 +(#%)+(((
 
 
 ,--    xc                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)rH   rJ   rI   )rK   rO   _s      rM   forwardzNemotronHMLP.forwardu   s>    ||A1KKNN~~a  1rN   )NFTFr7   )__name__
__module____qualname__r4   intr    boolstrrG   torchTensorrS   __classcell__rL   s   @rM   r6   r6   T   s         37#%*. .. . 	.
 )4/. . . #. . 
. . . . . .@        rN   r6   c            	       h     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        fd
Z
 xZS )NemotronHMoENr7   r8   r;   parallel_configr?   c           
         t                                                       t                      | _        |j        | _        t                      j        | _        | j                                        | _	        | j        
                                | _        |j        | _        |j        | _        t          |dd           d u| _        | j        r|j        n|j        | _        |j        | _        t+          |j        |j        dt,          j        d | d          | _        t3          j        t-          j        |j        t,          j                            | j        _        |j        | _        |j        j        | _         | j        | _!        | j!        | j         z   | _"        | j"        | j        z  | _#        | j	        | j#        z  | _$        | j$        | j#        z   | _%        |j        |j        dk    rd | _&        n7|j'        |j        z  }tQ          ||j        ||d| j        | d          | _&        tS          d"i d	| j        s| j&        nd d
|j        d|j*        d| j        d|j+        ddd|j,        d|ddd|j-        d|j.        d| dddd| j        j        dt_          |j0                  ddd| j        d| j         d| j        | _1        | j        rdt+          |j        | j        |j2        || j        | d           | _3        t+          | j        |j        |j2        || j        | d!           | _4        d S d | _3        d | _4        d S )#Nmoe_latent_sizeFz.gate)r<   params_dtyper;   r?   dtyper   z.shared_experts)r8   r9   r:   r;   r=   r>   r?   shared_expertsnum_expertstop_kr9   r:   r=   renormalizer;   use_grouped_topkTnum_expert_group
topk_groupr?   z.expertsscoring_funcsigmoide_score_correction_bias
activationis_act_and_mulenable_eplbnum_redundant_expertsr>   z.fc1_latent_projrB   z.fc2_latent_proj )5rF   rG   r   tp_sizerouted_scaling_factorr   device_groupep_grouprankep_ranksizeep_sizen_routed_expertsn_shared_expertsgetattruse_latent_moerb   r9   moe_hidden_sizeuse_sequence_parallel_moer>   r   rZ   float32gater   	Parameteremptyro   rr   eplb_configrs   n_redundant_expertsn_logical_expertsn_physical_expertsn_local_physical_expertsphysical_expert_startphysical_expert_endrf   #moe_shared_expert_intermediate_sizer6   r   num_experts_per_tokmoe_intermediate_sizenorm_topk_probn_grouprl   r   mlp_hidden_actexpertsmlp_biasfc1_latent_projfc2_latent_proj)rK   r8   r;   r`   r?   r:   rL   s         rM   rG   zNemotronHMoE.__init__}   s    	;==%+%A"$3}))++}))++%+%<%+%<$+F4Et$L$LTX$X&*&9QF""v?Q 	 %4$M!$####
 
 
	 -/LK/u}EEE-
 -
	) +6#2#>#T !%!6"&"84;S"S(,(?4<(O%%)\D4Q%Q"&)FF 	  "*f.E.J.J"&D :V=TT  #/"."3)$%)%> 111# # #D & 
 
 
 7;6IS4..t
 //	

 ,,
 ,,
 %::
 !5
 --
 &
 "T
 $^^
 ((
 &&&&
 #
  %)I$E$E!
" .f.CDDD#
$ !5%
& (('
( #'":":)
* "&!:!:+
0  	(#3!- 0_)4 222$ $ $D  $4/"._)4 222$ $ $D    $(D #'D   rN   hidden_statesr@   c                 .   |j         \  }}|                    d|          }| j        rt          |          }|                     |                    t          j                            \  }}d }| j        r4| j	        | 	                    |          }| 
                    |          \  }}|                     ||          }| j        r|\  }}n|\  }}|j        t          j        k    r|| j        z  }n| j	        |J |d| j        z  z  }| j        r|                     |          \  }}| j	        	|J ||z  }| j        rt!          |d          }|d |         }n%| j        dk    r| j                            |          }|                    ||          S )Nrd   )r   router_logitsg      ?r      )shapeviewr>   r2   r   torZ   r   r   rf   r   r   re   float16rv   r   r   ru   &maybe_all_reduce_tensor_model_parallel)	rK   r   
num_tokens
hidden_dimr   rR   shared_outputfused_moe_outfinal_hidden_statess	            rM   rS   zNemotronHMoE.forward   s   !.!4
J%**2z::$ 	C3MBBM  99]%5%5EM%5%J%JKKq 	C". $ 3 3M B B#33MBBM1'} % 
 
  	?%2"A""1>.M. %-//4#== , ,,,S4#===M  	O%)%9%9:M%N%N"* ,,,=0$ 	"B#Q# # #6kzk"B\A"&,"U"U## # #''
J???rN   )NNr7   )rT   rU   rV   r4   r    r   rY   rG   rZ   r[   rS   r\   r]   s   @rM   r_   r_   |   s         3715m( m(m( )4/m( ($.	m(
 m( m( m( m( m( m(^5@U\ 5@el 5@ 5@ 5@ 5@ 5@ 5@ 5@ 5@rN   r_   c                        e Zd Z	 	 	 	 	 ddedededz  dedz  dedz  dedz  d	e	d
df fdZ
dej        dej        dz  fdZ xZS )NemotronHMLPDecoderLayerNr7   r8   	layer_idxmodel_configcache_configr;   r`   r?   r@   c           	         t                                                       || _        |j        }|d |dz                                d          dz
  }	t          |j        t                    r4t          |j                  dk    r|j        d         }
n|j        |	         }
n|j        }
t          ||j
        |
||j        | d          | _        t          |j
        |j                  | _        d S )Nr   -r   .mixer)r9   r:   r;   r<   r?   eps)rF   rG   r8   hybrid_override_patterncount
isinstancer:   listlenr6   r9   r   mixerr   layer_norm_epsilonnorm)rK   r8   r   r   r   r;   r`   r?   r   	mlp_indexr:   rL   s              rM   rG   z!NemotronHMLPDecoderLayer.__init__%  s     	"("@+Oi!mO<BB3GG!K	f.55 	96+,,11$*$<Q$?!!$*$<Y$G!! & 8!*/%$$$
 
 

 F.F4MNNN			rN   r   residualc                     ||}|                      |          }n|                      ||          \  }}|                     |          }||fS rQ   r   r   rK   r   r   kwargss       rM   rS   z NemotronHMLPDecoderLayer.forwardG  V     $H IIm44MM&*iix&H&H#M8

=11h&&rN   NNNNr7   rT   rU   rV   r4   rW   r
   r	   r    r   rY   rG   rZ   r[   rS   r\   r]   s   @rM   r   r   $  s        
 ,0+/2615 O  O O  O "D(	 O
 "D( O )4/ O ($. O  O 
 O  O  O  O  O  OD'|' ,%' ' ' ' ' ' ' 'rN   r   c                        e Zd Z	 	 	 	 	 ddedededz  dedz  dedz  dedz  d	e	d
df fdZ
dej        dej        dz  fdZ xZS )NemotronHMoEDecoderLayerNr7   r8   r   r   r   r;   r`   r?   r@   c                     t                                                       || _        t          |||| d          | _        t          |j        |j                  | _        d S )Nr   )r;   r`   r?   r   )	rF   rG   r8   r_   r   r   r9   r   r   	rK   r8   r   r   r   r;   r`   r?   rL   s	           rM   rG   z!NemotronHMoEDecoderLayer.__init__X  sm     	!%+$$$	
 
 

 F.F4MNNN			rN   r   r   c                     ||}|                      |          }n|                      ||          \  }}|                     |          }||fS rQ   r   r   s       rM   rS   z NemotronHMoEDecoderLayer.forwardn  r   rN   r   r   r]   s   @rM   r   r   W  s        
 ,0+/2615O OO O "D(	O
 "D(O )4/O ($.O O 
O O O O O O,'|' ,%' ' ' ' ' ' ' 'rN   r   c                        e Zd Z	 	 	 	 	 ddedededz  dedz  dedz  dedz  d	e	d
df fdZ
dej        dej        dz  fdZ xZS )NemotronHMambaDecoderLayerNr7   r8   r   r   r   r;   r`   r?   r@   c                 `   t                                                       || _        t          |j        |j        |j        |j        |j        z  |j	        |j
        |j        |j        |j        |j        |j        |||| d          | _        t          |j        |j                  | _        d S )Nr   )r9   ssm_state_sizeconv_kernel_sizer:   use_conv_biasuse_biasn_groups	num_headshead_dimrms_norm_epsrp   r   r   r;   r?   r   )rF   rG   r8   r   r9   r   conv_kernelmamba_num_headsmamba_head_dimr   r   r   r   mamba_hidden_actr   r   r   r   s	           rM   rG   z#NemotronHMambaDecoderLayer.__init__  s     	 *!0#/$4v7LL .__,*2.%%%$$$
 
 

$ F.F4MNNN			rN   r   r   c                     ||}|                      |          }n|                      ||          \  }}|                     |          }||fS rQ   r   )rK   r   r   r   outputs        rM   rS   z"NemotronHMambaDecoderLayer.forward  sV     $H IIm44MM&*iix&H&H#M8M**xrN   r   r   r]   s   @rM   r   r   ~  s        
 ,0+/2615O OO O "D(	O
 "D(O )4/O ($.O O 
O O O O O O@ |  ,%               rN   r   c                   |     e Zd Z	 	 	 	 ddedededz  dedz  dedz  ded	df fd
Z	de
j        d	e
j        fdZ xZS )NemotronHAttentionNr7   r8   r   r   r   r;   r?   r@   c           
      ~   t                                                       |j        | _        t                      }|j        | _        | j        |z  dk    sJ | j        |z  | _        |j        | _        | j        |k    r| j        |z  dk    sJ n|| j        z  dk    sJ t          d| j        |z            | _
        t          |d          r|j        |j        | _        n|j        | j        z  | _        | j        | j        z  | _        | j
        | j        z  | _        | j        dz  | _        t!          |j        | j        | j        | j        d|| d          | _        t%          | j        | j        z  |j        d|| d          | _        t)          | j        | j        | j        | j
        ||| d	
          | _        d S )Nr   r   r   g      Fz	.qkv_proj)r<   r;   r?   z.o_projz.attn)num_kv_headsr   r;   r?   )rF   rG   r9   r   num_attention_headstotal_num_headsr   num_key_value_headstotal_num_kv_headsmaxr   hasattrr   q_sizekv_sizescalingr   qkv_projr   o_projr   attn)	rK   r8   r   r   r   r;   r?   ru   rL   s	           rM   rG   zNemotronHAttention.__init__  s    	!-688%9#g-2222-8"("<"g-- *W499999 T4499994#:g#EFF6:&& 	G6?+F"ODMM".$2FFDMnt}4(4=8}d*)M #%'''
 
 
 ( 4=0%%%%
 
 
 NML*%%###
 
 
			rN   r   c                     |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          }|                     |          \  }	}|	S )Nr   )dim)r   splitr   r   r   r   )
rK   r   r   qkvrR   qkvattn_outputr   s
             rM   rS   zNemotronHAttention.forward  sp    
 }--Q))T[$,E2)NN1aii1a((KK,,	rN   )NNNr7   )rT   rU   rV   r4   rW   r
   r	   r    rY   rG   rZ   r[   rS   r\   r]   s   @rM   r   r     s        
 ,0+/26:
 :
:
 :
 "D(	:

 "D(:
 )4/:
 :
 
:
 :
 :
 :
 :
 :
x	|	 
		 	 	 	 	 	 	 	rN   r   c                        e Zd Z	 	 	 	 	 ddedededz  dedz  dedz  dedz  d	e	d
df fdZ
dej        dej        dej        dz  fdZ xZS )NemotronHAttentionDecoderLayerNr7   r8   r   r   r   r;   r`   r?   r@   c           	          t                                                       t          |||||| d          | _        t	          |j        |j                  | _        d S )Nr   r?   r   )rF   rG   r   r   r   r9   r   r   r   s	           rM   rG   z'NemotronHAttentionDecoderLayer.__init__  sl     	'$$$
 
 

 F.F4MNNN			rN   	positionsr   r   c                     ||}|                      |          }n|                      ||          \  }}|                     |          }||fS )N)r   r   )rK   r   r   r   r   s        rM   rS   z&NemotronHAttentionDecoderLayer.forward  sX     $H IIm44MM&*iix&H&H#M8


??h&&rN   r   r   r]   s   @rM   r   r     s        
 ,0+/2615O OO O "D(	O
 "D(O )4/O ($.O O 
O O O O O O.'<' |' ,%	' ' ' ' ' ' ' 'rN   r   )Mr   *Ec                       e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeeeef                  fdZdeeeej        f                  dee         fdZ xZS )NemotronHModelr7   r   vllm_configr?   c                0   t                                                       |j        j        |j        |j        |j        |j        | _        j        | _        t          | j        j
                  | _        dj        v | _        dt          ffd}t          t!          j                  || d          \  | _        | _        | _        t)          ddgj
                  | _        t-          j
        j                  | _        d S )	Nr   r?   c           	          t          |                     dd          d                   }t          j        |                  } |||           S )N.r   )r8   r   r   r   r;   r`   r?   )rW   rsplitALL_DECODER_LAYER_TYPESr   )r?   r   layer_classr   r8   r   r`   r;   s      rM   	get_layerz*NemotronHModel.__init__.<locals>.get_layer?  sh    FMM#q11!455I1.y9K ;#))) /   rN   z.layersr   r   r   r   )rF   rG   r   	hf_configr   r;   r`   r8   
vocab_sizer"   r9   embed_tokensr   has_moerY   r0   r   start_layer	end_layerlayersr/   make_empty_intermediate_tensorsr   r   norm_f)
rK   r   r?   r  r   r8   r   r`   r;   rL   s
       @@@@@rM   rG   zNemotronHModel.__init__+  sC   "-":"D"/"/"/%5 +2O
 

 f<<	c 	 	 	 	 	 	 	 	 	 	 9D.//fCUCUCU9
 9
 9
5$.$+ 0Wj)6+=0
 0
, f0f6OPPPrN   	input_idsr@   c                 ,    |                      |          S rQ   )r  rK   r  s     rM   embed_input_idszNemotronHModel.embed_input_idsW  s      +++rN   Nr   intermediate_tensorsinputs_embedsc                 r   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S | 	                    ||          \  }}|S )Nr   r   )r   r   r   )r   r   )
r   is_first_rankr  r   r	  r  r  is_last_rankr3   r  )	rK   r  r   r  r  r   r   layerrR   s	            rM   rS   zNemotronHModel.forwardZ  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDK)94>JJ 	 	E&+e#+!' ' '#M88 ~~* 	&"/XFF    ;;}h??qrN   c                 ~    | j         r5t          j        | ddd| j        j        t          | dd                    }|S g S )NrH   rI   r7   rs   r   )ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namerg   rs   )r  r   make_expert_params_mappingr8   r}   r   )rK   expert_params_mappings     rM   get_expert_mappingz!NemotronHModel.get_expert_mappingz  sW    < 	)$,$G
 $-$/"$ K8&-d4KQ&O&O% % %! )(	rN   weightsc           
      n   g d}|                                  }t          |                                           }t                      }|D ]i\  }}d|v sd|v rt	          ||          }|!|D ]i\  }}	}
|	|vr|                    |	|          }|                    d          r||vr;t          ||           rL||         }|j        } ||||
            nd}|D ]}|\  }}	}}
|	|vrd}|                    |	|          }t          ||           r7||         }t          j
        t          dt          f         |j                  } |||||
|d          }|r|} nA|rt          ||           r*||         }t          |d	t                    } |||           |                    |           k|S )
N))r   q_projr   )r   k_projr   )r   v_projr   scale
zero_pointz.biasFT.)shard_id	expert_idreturn_successweight_loader)r  dictnamed_parameterssetr$   replaceendswithr.   r'  typingcastr   rX   r   r#   add)rK   r  stacked_params_mappingr  params_dictloaded_paramsnameloaded_weight
param_nameweight_namer$  paramr'  is_expert_weightmappingr%  name_mappedsuccesss                     rM   load_weightszNemotronHModel.load_weights  so   "
 "
 "
 !% 7 7 9 94002233"%%%#* I	$ I	$D-$,$"6"60{CC< 6L ?8 ?81
Kd**||K<<==)) d+.E.E*466 #D) % 3e]H=== $) 4 ,8 ,8GCJ@JY"$..  (,$ #',,{J"G"GK.{DAA ! '4E %+K d+U-@% %M ,m%#!)"+'+  G  * ( ! .tT:: ! '-E$+0E% %M "M%777d####rN   NN)rT   rU   rV   r   rY   rG   rZ   r[   r  r3   rS   r   tuplerW   r  r   r*  r<  r\   r]   s   @rM   r   r   )  sc       AC *Q *Q *Qz *Q3 *Q *Q *Q *Q *Q *QX, ,%, , , , , <@-1 < < 2D8	
 |d* 
+	+   @DsCc/A)B$C    &VHU33D-E$F V3s8 V V V V V V V VrN   r   c            
       &    e Zd ZU dZeed<    eddiddd          Zd	g d
iZdddZ	e
dddeej        ej        f         fd            Ze
dddeeeef         eeeef         f         fd            Ze
deeef         fd            Zdddedef fdZdededdfdZdej        dej        fdZ	 	 d&dej        dej        dedz  d ej        dz  fd!Zd"ej        dej        dz  fd#Zd$eeeej        f                  dee         fd%Z xZS )'NemotronHForCausalLMTis_non_gated_moebackbonemodelAr  )A_log
embeddings)orig_to_new_prefixorig_to_new_substrr   )r  r   r!  input_embeddingsoutput_embeddings)r  lm_headr   r   r@   c                 j    t          j        |j        j        |j        j        |j        j                  S rQ   )r   mamba2_state_dtyper   re   r   mamba_cache_dtypemamba_ssm_cache_dtype)clsr   s     rM   !get_mamba_state_dtype_from_configz6NemotronHForCausalLM.get_mamba_state_dtype_from_config  s4    
 );$*$6$:
 
 	
rN   c           	          |j         }|j        j        }|j        |j        z  }t          j        ||j        |j        |j        |j        |j	        |j
                  S )a3  Calculate shapes for Mamba's convolutional and state caches.

        Args:
            vllm_config: vLLM config

        Returns:
            Tuple containing:
            - conv_state_shape: Shape for convolutional state cache
            - temporal_state_shape: Shape for state space model cache
        )r:   tp_world_sizer   r   r   
state_sizer   )r`   r   r  r   r   r   mamba2_state_shapetensor_parallel_sizer   r   r   )rP  r   r`   r  r:   s        rM   !get_mamba_state_shape_from_configz6NemotronHForCausalLM.get_mamba_state_shape_from_config  si     &5,6	%5	8PP(;/)>'/- /!-
 
 
 	
rN   c                 (    t          j                    S rQ   )r   mamba2_state_copy_func)rP  s    rM   get_mamba_state_copy_funcz.NemotronHForCausalLM.get_mamba_state_copy_func.  s    +BDDDrN   r7   r   r?   c                v   |j         j        }|| _        |j         | _         |j        }|j        | _        t                                                       || _        || _        t          |t          |d                    | _
        t          |j        |j        t          |d                    | _        t          |j                  | _        | j
        j        | _        | j
        j        rg | _        |j        | _        g | _        d }| j
        j        D ]B}t1          |t2                    r+|j        }| j                            |j        j                   Ct;          | j                  | _        |j        | _         |j!        | _"        |j#        | _$        |j%        | _&        |j'        | _(        |j)        | _*        d S d S )NrC  )r   r?   rK  r   )+r   r  r   scheduler_configr;   rF   rG   r8   r   r1   rC  r!   r  r9   rK  r   logits_processorr
  r  expert_weightsr   num_expert_groups
moe_layersr	  r   r   r   appendr   r   num_moe_layersr   num_logical_expertsr   num_physical_expertsr   num_local_physical_expertsr}   num_routed_expertsr~   num_shared_expertsr   rs   )rK   r   r?   r8   r\  example_moer  rL   s          rM   rG   zNemotronHForCausalLM.__init__2  s   )3&'4&7'4 0##L,I,I
 
 

 &	22
 
 
 !00A B B J6 	,
 : 	I"$D%+^D" DOK* @ @e%=>> @ #(+KO**5;+>???"%do"6"6D'2'DD$(3(FD%.9.RD+&1&BD#&1&BD#)4)HD&&&'	I 	IrN   rd  re  Nc                    | j         |k    sJ || _        || _         || j        z
  | _        | j        j        D ]Q}t          |t                    r:|j        }||_	        ||_
        | j        |_        |j                                         Rd S rQ   )re  rd  rc  rs   rC  r	  r   r   r   r   r   r   r   update_expert_map)rK   rd  re  r  moes        rM    update_physical_experts_metadataz5NemotronHForCausalLM.update_physical_experts_metadatad  s    
 .2LLLLL$8!*D'%9D<T%T"Z& 	0 	0E%!9:: 0k/I,)=&*.*D'--///	0 	0rN   r  c                 6    | j                             |          S rQ   )rC  r  r  s     rM   r  z$NemotronHForCausalLM.embed_input_idsu  s    z)))444rN   r   r  r  c                 6    |                      ||||          }|S rQ   )rC  )rK   r  r   r  r  r   r   s          rM   rS   zNemotronHForCausalLM.forwardx  s)     

y"6
 
 rN   r   c                 <    |                      | j        |          }|S rQ   )r]  rK  )rK   r   logitss      rM   compute_logitsz#NemotronHForCausalLM.compute_logits  s      &&t|]CCrN   r  c                 ^    t          | dg          }|                    || j                  S )Nmtp)skip_prefixes)mapper)r,   r<  hf_to_vllm_mapper)rK   r  loaders      rM   r<  z!NemotronHForCausalLM.load_weights  s2    "4w???""743I"JJJrN   r=  ) rT   rU   rV   rA  rX   __annotations__r-   rv  packed_modules_mappingembedding_modulesclassmethodr>  rZ   re   rQ  rW   rW  r   rZ  r   rY   rG   rl  r[   r  r3   rS   rq  r   r*  r<  r\   r]   s   @rM   r@  r@    s         "d!!!%&0%(GG   	 
 
 
 +& 
 
!
 
u{EK'	(
 
 
 [
 
!
 
uS#Xc3m 44	5
 
 
 [
8 E%0BDV0V*W E E E [E BD 0I 0I 0Iz 0I3 0I 0I 0I 0I 0I 0Id0!0 %(0 
	0 0 0 0"5 5%, 5 5 5 5 <@-1 < < 2D8	
 |d*   | 
	   KHU33D-E$F K3s8 K K K K K K K KrN   r@  )X__doc__r-  collections.abcr   r   	itertoolsr   rZ   r   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   r   vllm.config.parallelr   vllm.distributedr   r   !vllm.distributed.communication_opr   vllm.distributed.parallel_stater   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.fused_moer   r   *vllm.model_executor.layers.fused_moe.utilsr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   r   +vllm.model_executor.layers.logits_processorr   -vllm.model_executor.layers.mamba.mamba_mixer2r   ,vllm.model_executor.layers.mamba.mamba_utilsr   r   r   r   'vllm.model_executor.layers.quantizationr    3vllm.model_executor.layers.vocab_parallel_embeddingr!   r"   -vllm.model_executor.model_loader.weight_utilsr#   r$   %vllm.model_executor.models.interfacesr%   r&   r'   r(   r)   r*   r+    vllm.model_executor.models.utilsr,   r-   r.   r/   r0   r1   r2   vllm.sequencer3   vllm.transformers_utils.configsr4   Moduler6   r_   r   r   r   r   r   r   r   r@  rt   rN   rM   <module>r     s'  & & %  . . . . . . . .              * * * * * * = = = = = = < < < < < < < < < < / / / / / / O O O O O O O O N N N N N N 8 8 8 8 8 8 G G G G G G I I I I I I I I M M M M M M 8 8 8 8 8 8            H G G G G G E E E E E E            G F F F F F                                                 . - - - - - ; ; ; ; ; ;% % % % %29 % % %Pe@ e@ e@ e@ e@29 e@ e@ e@P0' 0' 0' 0' 0'ry 0' 0' 0'f$' $' $' $' $'ry $' $' $'N.  .  .  .  .  .  .  . bF F F F F F F FR&' &' &' &' &'RY &' &' &'T 
$	!	'	!	   y y y y yRY y y yxiK iK iK iK iKIiK iK iK iK iKrN   