
    .`iS              
       h   d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlm Z  ddl!m"Z"m#Z#m$Z$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+m,Z, ddl-m.Z. ddl/m0Z1 ddl2m3Z3 ddl4m5Z5m6Z6m7Z7m8Z8m9Z9 ddl:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@  G d dejA                  ZB G d dejA                  ZC G d d ejA                  ZDeDeCd!ZEe G d" d#ejA                              ZF G d$ d%ejA        e5e7e9e6e8          ZG G d& d'eG          ZHdS )(zInference-only Jamba model.    )Iterable)isliceN)nn)JambaConfig)	Attention)support_torch_compile)CacheConfigModelConfig
VllmConfig)$get_tensor_model_parallel_world_size)get_pp_group)FusedMoE)RMSNorm)QKVParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)
MambaMixer)MambaStateCopyFuncMambaStateCopyFuncCalculatorMambaStateDtypeCalculatorMambaStateShapeCalculator)DispatchPooler)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)LlamaMLP)IntermediateTensors   )HasInnerStateIsHybridSupportsLoRASupportsMambaPrefixCaching
SupportsPP)AutoWeightsLoaderWeightsMapperis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                        e Zd Z	 	 	 	 	 	 ddededz  dedz  dej        dz  dedz  dedz  d	ef fd
Z	dej
        dej
        fdZ xZS )JambaMoEN confignum_expertstop_kparams_dtypetp_sizequant_configprefixc                    t                                                       |p|j        | _        |p|j        | _        |j        | _        |j        | _        | j        dk    r't          | j        | j        dd || d          | _	        t          | j        | j        | j        | j        ||ddd|| d          | _        d S )Nr    Fz.router)biasr4   r2   r5   Tz.experts)r3   r2   reduce_resultsrenormalizeuse_grouped_topkr4   r5   )super__init__r0   num_total_expertsnum_experts_per_tokr1   hidden_sizeintermediate_sizer   routerr   experts)	selfr/   r0   r1   r2   r3   r4   r5   	__class__s	           t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/jamba.pyr<   zJambaMoE.__init__<   s     	!,!B0B8f8
!-!'!9!A%%* &!) )))  DK  "J"%"%&&&
 
 
    hidden_statesreturnc                 @   |j         }|                    d| j                  }| j        dk    r|                     |          \  }}n.t          j        |j         d         df|j        |j                  }| 	                    ||          }|                    |          S )Nr    r   )devicedtype)
shapeviewr?   r=   rA   torchonesrK   rL   rB   )rC   rG   
orig_shaperouter_logits_s        rE   forwardzJambaMoE.forwardd   s    "(
%**2t/?@@!A%%#{{=99M11!J$Q'+$+#)  M
 ]MBB!!*---rF   )NNNNNr.   )__name__
__module____qualname__r   intrO   rL   r   strr<   TensorrT   __classcell__rD   s   @rE   r-   r-   ;   s         #' +/"26&
 &
&
 4Z&
 Tz	&

 kD(&
 t&
 )4/&
 &
 &
 &
 &
 &
 &
P.U\ .el . . . . . . . .rF   r-   c                        e Zd Z	 	 	 	 	 ddedededz  dedz  dedz  d	edz  d
e	ddf fdZ
dej        dej        dz  fdZ xZS )JambaMambaDecoderLayerNFr.   r/   	layer_idxmodel_configcache_configr4   is_lora_enabledr5   rH   c                 T   t                                                       || _        || _        t	          |j        |j        |j        |j        |j        z  |j	        |j
        |j        d|j        |j        | j        ||| d          | _        |j        |         }	|	dk    rt!          ||| d          | _        n+t%          |j        |j        |j        || d          | _        t)          |j        |j                  | _        t)          |j        |j                  | _        d S )NTz.mixer)r?   ssm_state_sizeconv_kernel_sizer@   time_step_rankuse_conv_biasuse_biasuse_rms_normrms_norm_eps
activationrb   r`   ra   r5   r    .feed_forwardr4   r5   eps)r;   r<   r/   rb   r   r?   mamba_d_statemamba_d_convmamba_expandmamba_dt_rankmamba_conv_biasmamba_proj_biasrj   
hidden_actmambalayers_num_expertsr-   feed_forwardJambaMLPr@   r   input_layernormpre_ff_layernorm)rC   r/   r_   r`   ra   r4   rb   r5   kwargsr0   rD   s             rE   r<   zJambaMambaDecoderLayer.__init__u   sU    	.*!/#0$1F4FF!/ 0+,( 0%%$$$
 
 

" /	:?? () ///! ! !D !)"(!) ///! ! !D  'v'9v?RSSS '(:@S T T TrF   rG   residualc                     ||}|                      |          }n|                      ||          \  }}t          j        |          }|                     ||           |                     ||          \  }}|                     |          }||fS N)r{   rO   
empty_likerw   r|   ry   )rC   rG   r~   r}   outputs        rE   rT   zJambaMambaDecoderLayer.forward   s     $H 00??MM&*&:&:=(&S&S#M8!-00

=&)))"&"7"7"I"Ix))-88h&&rF   )NNNFr.   )rU   rV   rW   r   rX   r
   r	   r   boolrY   r<   rO   rZ   rT   r[   r\   s   @rE   r^   r^   t   s        
 ,0+/26',/U /U/U /U "D(	/U
 "D(/U )4//U /U /U 
/U /U /U /U /U /Ub'|' ,%' ' ' ' ' ' ' 'rF   r^   c                        e Zd Z	 	 	 	 ddedededz  dedz  dedz  ded	df fd
Z	de
j        de
j        d	e
j        fdZde
j        de
j        de
j        dz  fdZ xZS )JambaAttentionDecoderLayerNr.   r/   r_   r`   ra   r4   r5   rH   c           
      f   t                                                       |j        | _        t                      }|j        | _        | j        |z  dk    sJ | j        |z  | _        |j        | _        | j        |k    r| j        |z  dk    sJ n|| j        z  dk    sJ t          d| j        |z            | _
        |j        | j        z  | _        | j        | j        z  | _        | j
        | j        z  | _        | j        dz  | _        t          |j        | j        | j        | j        d|| d          | _        t#          | j        | j        z  |j        d|| d          | _        t'          | j        | j        | j        | j
        || d	          | _        |j        |         }	|	dk    rt-          ||| d
          | _        n+t1          |j        |j        |j        || d
          | _        t7          |j        |j                  | _        t7          |j        |j                  | _        d S )Nr   r    g      Fz	.qkv_proj)r7   r4   r5   z.o_projz.attn)num_kv_headsra   r5   rl   rm   rn   )r;   r<   r?   r   num_attention_headstotal_num_heads	num_headsnum_key_value_headstotal_num_kv_headsmaxr   head_dimq_sizekv_sizescalingr   qkv_projr   o_projr   attnrx   r-   ry   rz   r@   rv   r   rj   r{   r|   )rC   r/   r_   r`   ra   r4   r5   r}   r3   r0   rD   s             rE   r<   z#JambaAttentionDecoderLayer.__init__   s    	!-688%9#g-2222-8"("<"g-- *W499999 T4499994#:g#EFF*d.BBnt}4(4=8}d*)M #%'''
 
 
 ( 4=0%%%%
 
 
 NML*%###
 
 
	 /	:?? () ///! ! !D !)"(!) ///! ! !D  'v'9v?RSSS '(:@S T T TrF   	positionsrG   c                     |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          }	|                     |	          \  }
}|
S )NrJ   )dim)r   splitr   r   r   r   )rC   r   rG   r}   qkvrS   qkvattn_outputr   s              rE   self_attentionz)JambaAttentionDecoderLayer.self_attention  sp     }--Q))T[$,E2)NN1aii1a((KK,,	rF   r~   c                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS )N)r   rG   )r{   r   r|   ry   )rC   r   rG   r~   r}   s        rE   rT   z"JambaAttentionDecoderLayer.forward  s     $H 00??MM&*&:&:=(&S&S#M8++' , 
 

 #'"7"7x"P"Px))-88h&&rF   )NNNr.   )rU   rV   rW   r   rX   r
   r	   r   rY   r<   rO   rZ   r   rT   r[   r\   s   @rE   r   r      s1       
 ,0+/26IU IUIU IU "D(	IU
 "D(IU )4/IU IU 
IU IU IU IU IU IUV
<
 |

 

 
 
 
'<' |' ,%	' ' ' ' ' ' ' 'rF   r   )	attentionrw   c                       e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  dej        f
dZ
deeeeeef                  fdZdeeeej        f                  dee         fdZ xZS )
JambaModelr.   r5   vllm_configr5   c                   t                                                       |j        j        |j        |j        |j        | _        j        | _        t          | j        j	                  | _
        dt          |j                  idt          ffd}t          j        || d          \  | _        | _        | _        t'          ddgj	                  | _        t+          j	        j                  | _        d S )	Nrb   r5   c                     t          |                     dd          d                   }t          j        |                  } ||f| dS )N.r    rm   )rX   rsplitALL_DECODER_LAYER_TYPESlayers_block_type)r5   r_   layer_classra   r/   extra_kwargsr`   r4   s      rE   	get_layerz&JambaModel.__init__.<locals>.get_layerD  sq    FMM#q11!455I1&2J92UVK;	
 *    rF   z.layersr   rG   r~   rn   )r;   r<   r`   	hf_configra   r4   r/   
vocab_sizer   r?   embed_tokensr   lora_configrY   r*   num_hidden_layersstart_layer	end_layerlayersr)   make_empty_intermediate_tensorsr   rj   final_layernorm)
rC   r   r5   r   ra   r/   r   r`   r4   rD   s
       @@@@@rE   r<   zJambaModel.__init__1  s<   )3"/"/"/ +2O
 

 *40G+H+HI	c 	 	 	 	 	 	 	 	 	 	 9D$i68J8J8J9
 9
 9
5$.$+ 0Wj)6+=0
 0
,  'v'9v?RSSSrF   	input_idsrH   c                 ,    |                      |          S r   )r   rC   r   s     rE   embed_input_idszJambaModel.embed_input_idsZ  s      +++rF   Nr   intermediate_tensorsinputs_embedsc                 r   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S | 	                    ||          \  }}|S )NrG   r~   )r   rG   r~   )rG   r~   )
r   is_first_rankr   r   r   r   r   is_last_rankr   r   )	rC   r   r   r   r   rG   r~   layerrS   s	            rE   rT   zJambaModel.forward]  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDK)94>JJ 	 	E&+e#=8' ' '#M88 ~~* 	&"/XFF    //xHHqrF   c                 H    t          j        | ddd| j        j                  S )N	gate_proj	down_projup_proj)ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namer0   )r   make_expert_params_mappingr/   r0   rC   s    rE   get_expert_mappingzJambaModel.get_expert_mapping{  s2     2 + +'/
 
 
 	
rF   weightsc           	         g d}t          |                                           }t                      }|                                 }|D ]<\  }}d|v r|D ]n\  }}	}
|	|vrd|v r|                    |	|          }|                    d          r||vr@t          ||           rQ||         }|j        } ||||
            n|D ]S\  }}	}}
|	|vrt          ||           r|                    |	|          }||         }|j        } |||||
|            nU|                    d          r||vrt          ||           r||         }t          |dt                    } |||           |
                    |           >|S )N))r   q_projr   )r   k_projr   )r   v_projr   ).gate_up_projz
.gate_projr   )r   z.up_projr    zrotary_emb.inv_freqrB   z.bias)shard_id	expert_idweight_loader)dictnamed_parameterssetr   replaceendswithr(   r   getattrr   add)rC   r   stacked_params_mappingparams_dictloaded_paramsexpert_params_mappingnameloaded_weight
param_nameweight_namer   paramr   r   s                 rE   load_weightszJambaModel.load_weights  s3   "
 "
 "
 4002233"%%% $ 7 7 9 9#* 6	$ 6	$D-$,,5K 28 281
Kd**$$||K<<==)) d+.E.E*466 #D) % 3e]H=== +!8 !8 "$.. .tT:: ! <<Z@@D'-E$)$7M!M%!)"+    E }}W-- !$k2I2I .tT:: ! '-E$+0E% %M "M%777d####rF   NN)rU   rV   rW   r   rY   r<   rO   rZ   r   r   rT   listtuplerX   r   r   r   r   r[   r\   s   @rE   r   r   /  s\       AC 'T 'T 'Tz 'T3 'T 'T 'T 'T 'T 'TR, ,%, , , , , <@-1 < < 2D8	
 |d* 
   <	
DsCc/A)B$C 	
 	
 	
 	
DHU33D-E$F D3s8 D D D D D D D DrF   r   c            
       B    e Zd Z eddd          Zg dddgdgd	Zd
ddZdddedef fdZ	de
j        de
j        fdZ	 	 d&de
j        de
j        dedz  de
j        dz  fdZd ZdefdZedddee
j        e
j        f         fd            Zedddeeeef         eeef         f         fd            Zedeeef         fd             Zd!e
j        de
j        dz  fd"Zd#eeee
j        f                  dee         fd$Zdeeeeeef                  fd%Z xZS )'JambaForCausalLMr   z.A)z.self_attn.z.A_log)orig_to_new_substr)r   r   r   r   r   in_proj)r   gate_up_projr   input_embeddingsoutput_embeddings)r   lm_headr.   r   r   r5   c                   |j         j        }|j        }t                                                       || _        || _        |j         | _         || _        t          |t          |d                    | _	        t          |j        |j        t          |d                    | _        t          |j                  | _        | j	        j        | _        d S )Nmodelr   r5   r   r   )r`   r   scheduler_configr;   r<   r/   r   r   r+   r   r   r   r?   r   r   logits_processorr   )rC   r   r5   r/   r   rD   s        rE   r<   zJambaForCausalLM.__init__  s    )3&7&'4 0#L,I,I
 
 

 &	22
 
 
 !00A B B J6 	,,,rF   r   rH   c                 6    | j                             |          S r   )r   r   r   s     rE   r   z JambaForCausalLM.embed_input_ids  s    z)))444rF   Nr   r   r   c                 6    |                      ||||          }|S r   )r   )rC   r   r   r   r   r}   rG   s          rE   rT   zJambaForCausalLM.forward  s)     

y"6
 
 rF   c                 (     | j         j        |fi |S r   )mamba_cachecopy_inputs_before_cuda_graphs)rC   input_buffersr}   s      rE   r   z/JambaForCausalLM.copy_inputs_before_cuda_graphs  s!    >t>}WWPVWWWrF   
batch_sizec                 6    | j                             |          S r   )r   "get_seqlen_agnostic_capture_inputs)rC   r   s     rE   r   z3JambaForCausalLM.get_seqlen_agnostic_capture_inputs  s    BB:NNNrF   r   c                 j    t          j        |j        j        |j        j        |j        j                  S r   )r   mamba1_state_dtyper`   rL   ra   mamba_cache_dtypemamba_ssm_cache_dtype)clsr   s     rE   !get_mamba_state_dtype_from_configz2JambaForCausalLM.get_mamba_state_dtype_from_config  s4    
 );$*$6$:
 
 	
rF   c                     |j         }|j        j        }|j        }t	          j        |j        |j        |z  |j        |j	                  S )N)tp_world_sizer@   
state_sizeconv_kernel)
parallel_configr`   r   r?   r   mamba1_state_shapetensor_parallel_sizerr   rp   rq   )r  r   r	  r   r?   s        rE   !get_mamba_state_shape_from_configz2JambaForCausalLM.get_mamba_state_shape_from_config#  sV    
 &5,6	+(;)>'4{B .!.	
 
 
 	
rF   c                 (    t          j                    S r   )r   mamba1_state_copy_func)r  s    rE   get_mamba_state_copy_funcz*JambaForCausalLM.get_mamba_state_copy_func3  s    +BDDDrF   rG   c                 <    |                      | j        |          }|S r   )r   r   )rC   rG   logitss      rE   compute_logitszJambaForCausalLM.compute_logits7  s      &&t|]CCrF   r   c                 X    t          |           }|                    || j                  S )N)mapper)r&   r   hf_to_vllm_mapper)rC   r   loaders      rE   r   zJambaForCausalLM.load_weights>  s+    "4((""743I"JJJrF   c                 4    | j                                         S r   )r   r   r   s    rE   r   z#JambaForCausalLM.get_expert_mappingB  s    z,,...rF   r   ) rU   rV   rW   r'   r  packed_modules_mappingembedding_modulesr   rY   r<   rO   rZ   r   r   rT   r   rX   r   classmethodr   rL   r  r  r   r  r  r   r   r   r   r   r[   r\   s   @rE   r   r     s        &+.$??  
 
 

 %i0;  +& 
 BD 
 
 
z 
3 
 
 
 
 
 
45 5%, 5 5 5 5 <@-1 < < 2D8	
 |d*   X X XOS O O O O 
!
 
u{EK'	(
 
 
 [
 
!
 
uS#Xc3h/	0
 
 
 [
 E%0BDV0V*W E E E [E| 
	   KHU33D-E$F K3s8 K K K K/DsCc/A)B$C / / / / / / / /rF   r   c                   2     e Zd ZdZdddedef fdZ xZS )JambaForSequenceClassificationTr.   r   r   r5   c                N   t                                          ||           |j        j        }|j        }t          |dd          }t          j        |j        |||j        j	                  | _
        |j        j        }|J t          j        || j
                  | _        d S )Nr   
score_biasF)r7   rL   )
classifier)r;   r<   r`   r   
num_labelsr   r   Linearr?   
head_dtypescorepooler_configr   for_seq_clspooler)rC   r   r5   r/   r   r  r$  rD   s          rE   r<   z'JambaForSequenceClassification.__init__I  s    [@@@)3 +
"6<??

 Y*5	
 
 

 $0>((($04:VVVrF   )rU   rV   rW   is_pooling_modelr   rY   r<   r[   r\   s   @rE   r  r  F  sl        AC W W Wz W3 W W W W W W W W W WrF   r  )I__doc__collections.abcr   	itertoolsr   rO   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   r   vllm.distributedr   vllm.distributed.parallel_stater   $vllm.model_executor.layers.fused_moer   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   ,vllm.model_executor.layers.mamba.mamba_mixerr   ,vllm.model_executor.layers.mamba.mamba_utilsr   r   r   r   !vllm.model_executor.layers.poolerr   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr    vllm.model_executor.models.llamar   rz   vllm.sequencer   
interfacesr!   r"   r#   r$   r%   utilsr&   r'   r(   r)   r*   r+   Moduler-   r^   r   r   r   r   r   rF   rE   <module>rA     s   " ! $ $ $ $ $ $              $ $ $ $ $ $ * * * * * * = = = = = = < < < < < < < < < < A A A A A A 8 8 8 8 8 8 9 9 9 9 9 9 8 8 8 8 8 8         
 H G G G G G C C C C C C            = < < < < < F F F F F F        P O O O O O A A A A A A - - - - - -                            6. 6. 6. 6. 6.ry 6. 6. 6.rC' C' C' C' C'RY C' C' C'Ll' l' l' l' l' l' l' l'` ,#   Z Z Z Z Z Z Z Zzv/ v/ v/ v/ v/Iv/ v/ v/rW W W W W%5 W W W W WrF   