
    .`iE                     6   d Z ddlmZ ddlZddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlm Z m!Z!m"Z"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z)m*Z* ddl+m,Z, ddl-m.Z. ddl/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5 ddl6m7Z7m8Z8m9Z9m:Z:m;Z;  G d dej<                  Z= G d dej<                  Z> G d dej<                  Z?e?e>dZ@e
 G d  d!ej<                              ZA G d" d#ej<        e0e2e4e1e5e3	  	        ZBdS )$zInference-only Bamba model.    )IterableN)nn)BambaConfig)	Attention)support_torch_compile)CacheConfigModelConfig
VllmConfig)$get_tensor_model_parallel_world_size)get_pp_group)
SiluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)MambaMixer2)MambaStateCopyFuncMambaStateCopyFuncCalculatorMambaStateDtypeCalculatorMambaStateShapeCalculator)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )HasInnerStateIsHybridSupportsLoRASupportsMambaPrefixCaching
SupportsPPSupportsQuant)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   H     e Zd Z	 	 	 ddededz  dededdf
 fd	Zd
 Z xZ	S )BambaMLPNF configquant_configbiasprefixreturnc                 T   t                                                       t          |j        |j        gdz  ||| d          | _        t          |j        |j        ||| d          | _        |j        dk    rt          d|j         d          t                      | _        d S )	N   z.gate_up_proj)
input_sizeoutput_sizesr/   r.   r0   z
.down_proj)r4   output_sizer/   r.   r0   siluzUnsupported activation: z!. Only silu is supported for now.)super__init__r   hidden_sizeintermediate_sizegate_up_projr   	down_proj
hidden_act
ValueErrorr   act_fn)selfr-   r.   r/   r0   	__class__s        t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/bamba.pyr9   zBambaMLP.__init__;   s     	6) 23a7%+++
 
 
 +/*%(((
 
 
 &&26+< 2 2 2   !ll    c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r<   r@   r=   )rA   x_s      rC   forwardzBambaMLP.forwardX   sB      ##1KKNN~~a  1rD   )NFr,   )
__name__
__module____qualname__r   r   boolstrr9   rI   __classcell__rB   s   @rC   r+   r+   :   s         37# ## )4/# 	#
 # 
# # # # # #:      rD   r+   c                        e Zd Z	 	 	 	 ddedededz  dedz  dedz  ded	df fd
Z	de
j        de
j        dz  fdZ xZS )BambaMixerDecoderLayerNr,   r-   	layer_idxmodel_configcache_configr.   r0   r1   c                    t                                                       || _        t          |j        |j        |j        |j        |j        z  |j        |j	        |j
        |j        |j        |j        |j        |||| d          | _        t!          ||| d          | _        t%          |j        |j                  | _        t%          |j        |j                  | _        d S )Nz.mixer)r:   ssm_state_sizeconv_kernel_sizer;   use_conv_biasuse_biasn_groups	num_headshead_dimrms_norm_eps
activationrT   rU   r.   r0   .feed_forwardr.   r0   eps)r8   r9   r-   r   r:   mamba_d_statemamba_d_convmamba_expandmamba_conv_biasmamba_proj_biasmamba_n_groupsmamba_n_headsmamba_d_headr^   r>   mambar+   feed_forwardr   input_layernormpre_ff_layernorm)rA   r-   rS   rT   rU   r.   r0   rB   s          rC   r9   zBambaMixerDecoderLayer.__init__`   s     	 *!/#0$1F4FF 0+**(,(%%%$$$
 
 

$ %6N6N6N
 
 
  'v'9v?RSSS '(:@S T T TrD   hidden_statesresidualc                     ||}|                      |          }n|                      ||          \  }}|                     |          }|                     ||          \  }}|                     |          }||fS rF   )rn   rl   ro   rm   )rA   rp   rq   kwargsoutputs        rC   rI   zBambaMixerDecoderLayer.forward   s     $H 00??MM&*&:&:=(&S&S#M8M**"&"7"7"I"Ix))-88h&&rD   NNNr,   )rJ   rK   rL   r   intr	   r   r   rN   r9   torchTensorrI   rO   rP   s   @rC   rR   rR   _   s        
 ,0+/26!U !U!U !U "D(	!U
 "D(!U )4/!U !U 
!U !U !U !U !U !UF'|' ,%' ' ' ' ' ' ' 'rD   rR   c                        e Zd Z	 	 	 	 ddedededz  dedz  dedz  ded	df fd
Z	de
j        de
j        d	e
j        fdZde
j        de
j        de
j        dz  fdZ xZS )BambaAttentionDecoderLayerNr,   r-   rS   rT   rU   r.   r0   r1   c           
         t                                                       t          |dd          }|j        | _        t	                      }|j        | _        | j        |z  dk    sJ | j        |z  | _        |j        | _	        | j	        |k    r| j	        |z  dk    sJ n|| j	        z  dk    sJ t          d| j	        |z            | _        |j        | j        z  | _        | j        | j        z  | _        | j        | j        z  | _        | j        dz  | _        || _        t          |d| j                  }	|	| j        z  |j        d<   t%          | j        ||j        dt'          j                    	          | _        t-          |j        | j        | j        | j	        d
|| d          | _        t1          | j        | j        z  |j        d
|| d          | _        t5          | j        | j        | j        | j        || d          | _        t9          ||| d          | _        t=          |j        |j                  | _         t=          |j        |j                  | _!        d S )Nmax_position_embeddingsi    r   r   g      attn_rotary_embpartial_rotary_factorT)	head_sizemax_positionrope_parametersis_neox_styledtypeFz	.qkv_proj)r/   r.   r0   z.o_projz.attn)num_kv_headsrU   r0   r`   ra   rb   )"r8   r9   getattrr:   r   num_attention_headstotal_num_headsr\   num_key_value_headstotal_num_kv_headsmaxr   r]   q_sizekv_sizescalingr|   r   r   rw   get_default_dtype
rotary_embr   qkv_projr   o_projr   attnr+   rm   r   r^   rn   ro   )rA   r-   rS   rT   rU   r.   r0   r|   tp_size
rotary_dimrB   s             rC   r9   z#BambaAttentionDecoderLayer.__init__   s    	")&2KT"R"R!-688%9#g-2222-8"("<"g-- *W499999 T4499994#:g#EFF*d.BBnt}4(4=8}d*'>$V%6FF
:Dt}:T67"m0"2)++
 
 
 *M #%'''
 
 
 ( 4=0%%%%
 
 
 NML*%###
 
 
	 %6N6N6N
 
 
  'v'9v?RSSS '(:@S T T TrD   	positionsrp   c                 "   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          \  }}|                     |||          }	|                     |	          \  }
}|
S )N)dim)r   splitr   r   r   r   r   )rA   r   rp   rs   qkvrH   qkvattn_outputrt   s              rC   self_attentionz)BambaAttentionDecoderLayer.self_attention   s     }--Q))T[$,E2)NN1ay!Q//1ii1a((KK,,	rD   rq   c                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS )N)r   rp   )rn   r   ro   rm   )rA   r   rp   rq   rs   s        rC   rI   z"BambaAttentionDecoderLayer.forward   s     $H 00??MM&*&:&:=(&S&S#M8++' , 
 

 #'"7"7x"P"Px))-88h&&rD   ru   )rJ   rK   rL   r   rv   r	   r   r   rN   r9   rw   rx   r   rI   rO   rP   s   @rC   rz   rz      s1       
 ,0+/26IU IUIU IU "D(	IU
 "D(IU )4/IU IU 
IU IU IU IU IU IUV< |
 
   '<' |' ,%	' ' ' ' ' ' ' 'rD   rz   )	attentionrl   c                        e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  dej        f
dZ
deeeej        f                  dee         fdZ xZS )
BambaModelr,   r0   vllm_configr0   c                   t                                                       |j        j        |j        |j        |j        | _        j        | _        t          | j        j	                  | _
        dt          ffd}t          j        || d          \  | _        | _        | _        t#          ddgj	                  | _        t'          j	        j                  | _        d S )Nr0   c                     t          |                     dd          d                   }t          j        |                  } |||           S )N.r   ra   )rv   rsplitALL_DECODER_LAYER_TYPESlayers_block_type)r0   rS   layer_classrU   r-   rT   r.   s      rC   	get_layerz&BambaModel.__init__.<locals>.get_layer   s`    FMM#q11!455I1&2J92UVK;)   rD   z.layersr   rp   rq   rb   )r8   r9   rT   	hf_configrU   r.   r-   
vocab_sizer   r:   embed_tokensrN   r(   num_hidden_layersstart_layer	end_layerlayersr'   make_empty_intermediate_tensorsr   r^   final_layernorm)	rA   r   r0   r   rU   r-   rT   r.   rB   s	       @@@@rC   r9   zBambaModel.__init__  s   )6@"/"/"/ +2O
 


	c 
	 
	 
	 
	 
	 
	 
	 
	 
	 9D$i68J8J8J9
 9
 9
5$.$+ 0Wj)6+=0
 0
,  'v'9v?RSSSrD   	input_idsr1   c                 ,    |                      |          S rF   )r   rA   r   s     rC   embed_input_idszBambaModel.embed_input_ids5  s      +++rD   Nr   intermediate_tensorsinputs_embedsc                 d   t                      j        r||}n|                     |          }d }n|J |d         }|d         }d }t          | j                  D ]\  }} ||||          \  }}t                      j        st          ||d          S |                     ||          \  }}	|S )Nrp   rq   )r   rp   rq   )rp   rq   )r   is_first_rankr   	enumerater   is_last_rankr   r   )
rA   r   r   r   r   rp   rq   ilayerrH   s
             rC   rI   zBambaModel.forward8  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7H!$+.. 	 	HAu&+e#+!' ' '#M88 ~~* 	&"/XFF    //xHHqrD   weightsc                    g d}t          |                                           }t                      }|D ]\  }}d|v rd|v r|                    dd          }d|v r|                    dd          }|D ]i\  }}}	||vr|                    ||          }|                    d          r||vr;t          ||           rL||         }
|
j        } ||
||	            nU|                    d          r||vrt          ||           r||         }
t          |
d	t                    } ||
|           |	                    |           |S )
N))r   q_projr   )r   k_projr   )r   v_projr   )r<   	gate_projr   )r<   up_projr   zrotary_emb.inv_freqA_logAz.self_attn.z
.self_attnr,   z.biasweight_loader)
dictnamed_parameterssetreplaceendswithr&   r   r   r   add)rA   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamr   s               rC   load_weightszBambaModel.load_weightsY  s   "
 "
 "
 4002233"%%%#* #	$ #	$D-$,,$||GS11$$||L"555K 4 41
Kd**||K<<==)) d+.E.E*466 #D) % 3e]H=== ==)) d+.E.E*466 #D) '@U V Ve]333d####rD   NN)rJ   rK   rL   r
   rN   r9   rw   rx   r   r   rI   r   tupler   r   rO   rP   s   @rC   r   r     s%       AC $T $T $Tz $T3 $T $T $T $T $T $TL, ,%, , , , , <@-1 < < 2D8	
 |d* 
   B0HU33D-E$F 03s8 0 0 0 0 0 0 0 0rD   r   c            
           e Zd Zg dddgdZdddZedd	d
eej        ej        f         fd            Z	edd	d
eee
e
f         ee
e
e
f         f         fd            Zed
eeef         fd            Zdddedef fdZdej        d
ej        fdZ	 	 ddej        dej        dedz  dej        dz  fdZdej        d
ej        dz  fdZdeeeej        f                  d
ee         fdZ xZS )BambaForCausalLM)r   r   r   r   r=   )r   r<   input_embeddingsoutput_embeddings)r   lm_headr   r
   r1   c                 j    t          j        |j        j        |j        j        |j        j                  S rF   )r   mamba2_state_dtyperT   r   rU   mamba_cache_dtypemamba_ssm_cache_dtype)clsr   s     rC   !get_mamba_state_dtype_from_configz2BambaForCausalLM.get_mamba_state_dtype_from_config  s4    
 );$*$6$:
 
 	
rD   c           	          |j         }|j        j        }|j        |j        z  }t          j        ||j        |j        |j	        |j
        |j        |j                  S )a3  Calculate shapes for Mamba's convolutional and state caches.

        Args:
            vllm_config: vLLM config

        Returns:
            Tuple containing:
            - conv_state_shape: Shape for convolutional state cache
            - temporal_state_shape: Shape for state space model cache
        )r;   tp_world_sizer[   r\   r]   
state_sizeconv_kernel)parallel_configrT   r   rf   r:   r   mamba2_state_shapetensor_parallel_sizeri   rj   rk   rd   re   )r   r   r   r   r;   s        rC   !get_mamba_state_shape_from_configz2BambaForCausalLM.get_mamba_state_shape_from_config  si     &5,6	%2Y5JJ(;/)>--+ .!.
 
 
 	
rD   c                 (    t          j                    S rF   )r   mamba2_state_copy_func)r   s    rC   get_mamba_state_copy_funcz*BambaForCausalLM.get_mamba_state_copy_func  s    +BDDDrD   r,   r   r0   c                   |j         j        }|| _        |j         | _         |j        }|j        | _        t                                                       || _        || _        t          |t          |d                    | _
        t          |j        |j        t          |d                    | _        t          |j                  | _        | j
        j        | _        d S )Nmodel)r   r0   r   r   )rT   r   r   scheduler_configr.   r8   r9   r-   r   r)   r   r   r   r:   r   r   logits_processorr   )rA   r   r0   r-   r   rB   s        rC   r9   zBambaForCausalLM.__init__  s    )3&'4&7'4 0#L,I,I
 
 

 &	22
 
 
 !00A B B J6 	,,,rD   r   c                 6    | j                             |          S rF   )r   r   r   s     rC   r   z BambaForCausalLM.embed_input_ids  s    z)))444rD   Nr   r   r   c                 6    |                      ||||          }|S rF   )r   )rA   r   r   r   r   rs   rp   s          rC   rI   zBambaForCausalLM.forward  s)     

y"6
 
 rD   rp   c                 <    |                      | j        |          }|S rF   )r   r   )rA   rp   logitss      rC   compute_logitszBambaForCausalLM.compute_logits  s      &&t|]CCrD   r   c                 J    t          |           }|                    |          S rF   )r%   r   )rA   r   loaders      rC   r   zBambaForCausalLM.load_weights  s#    "4((""7+++rD   r   )rJ   rK   rL   packed_modules_mappingembedding_modulesclassmethodr   rw   r   r   rv   r   r   r   r
   rN   r9   rx   r   r   rI   r   r   r   r   rO   rP   s   @rC   r   r     sB       
 
 

 #K0  +& 
 
!
 
u{EK'	(
 
 
 [
 
!
 
uS#Xc3m 44	5
 
 
 [
8 E%0BDV0V*W E E E [E BD 
 
 
z 
3 
 
 
 
 
 
65 5%, 5 5 5 5 <@-1 < < 2D8	
 |d*   | 
	   ,HU33D-E$F ,3s8 , , , , , , , ,rD   r   )C__doc__collections.abcr   rw   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr   r	   r
   vllm.distributedr   vllm.distributed.parallel_stater   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   -vllm.model_executor.layers.mamba.mamba_mixer2r   ,vllm.model_executor.layers.mamba.mamba_utilsr   r   r   r   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   r    r!   r"   r#   r$   utilsr%   r&   r'   r(   r)   Moduler+   rR   rz   r   r   r    rD   rC   <module>r     s   " ! % $ $ $ $ $        $ $ $ $ $ $ * * * * * * = = = = = = < < < < < < < < < < A A A A A A 8 8 8 8 8 8 < < < < < < 8 8 8 8 8 8         
 H G G G G G E E E E E E            G F F F F F @ @ @ @ @ @        P O O O O O - - - - - -                            " " " " "ry " " "J4' 4' 4' 4' 4'RY 4' 4' 4'nn' n' n' n' n' n' n' n'd ,#   { { { { { { { {|y, y, y, y, y,Iy, y, y, y, y,rD   