
    .`i'                        d Z ddlmZ ddlZddlmZ ddlmZ ddlmZ ddl	m
Z
mZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZmZ ddlmZ ddlmZmZ ddlm Z  ddl!m"Z"m#Z#m$Z$ ddl%m&Z& ddl'm(Z(m)Z)m*Z*m+Z+m,Z, e-ej.        ej.        f         Z/ G d dej0                  Z1e G d dej0                              Z2 G d dej0        e"e#e$          Z3dS )zPyTorch MAMBA2 model.    )IterableN)nn)MambaConfig)support_torch_compile)CacheConfigModelConfig
VllmConfig)get_pp_group)RMSNorm)LogitsProcessor)MambaMixer2)MambaStateCopyFuncMambaStateCopyFuncCalculatorMambaStateDtypeCalculatorMambaStateShapeCalculator)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)HasInnerStateIsAttentionFreeSupportsMambaPrefixCaching)IntermediateTensors   )AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   ~     e Zd Z	 	 	 	 ddededz  dedz  dedz  deddf fd	Zd
e	j
        de	j
        dz  fdZ xZS )Mamba2DecoderLayerN configmodel_configcache_configquant_configprefixreturnc                 ~   t                                                       || _        t          |j        |j        |j        t          |d|j        |j        z            |j	        |j
        |j        |j        |j        |j        |j        |||| d          | _        t#          |j        |j                  | _        d S )Nintermediate_sizez.mixer)hidden_sizessm_state_sizeconv_kernel_sizer*   use_conv_biasuse_biasn_groups	num_headshead_dimrms_norm_eps
activationr$   r%   r&   r'   eps)super__init__r#   r   r+   
state_sizeconv_kernelgetattrexpandr.   r/   r0   r1   r2   layer_norm_epsilon
hidden_actmixerr   norm)selfr#   r$   r%   r&   r'   	__class__s         u/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/mamba2.pyr8   zMamba2DecoderLayer.__init__0   s     	 *!,#/%+V]V=O-O  !.__&_2(%%%$$$#
 
 

( F.F4MNNN			    hidden_statesresidualc                     ||}|                      |          }n|                      ||          \  }}|                     |          }||fS N)r@   r?   )rA   rE   rF   kwargsoutputs        rC   forwardzMamba2DecoderLayer.forwardP   sV     $H IIm44MM&*iix&H&H#M8M**xrD   )NNNr"   )__name__
__module____qualname__r   r   r   r   strr8   torchTensorrK   __classcell__rB   s   @rC   r!   r!   /   s         ,0+/26O OO "D(O "D(	O
 )4/O O 
O O O O O O@ |  ,%               rD   r!   c                        e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  dej        f
dZ
deeeej        f                  dee         fdZ xZS )Mamba2Modelr"   r'   vllm_configr'   c                   t                                                       |j        j        |j        |j        |j        |j        }t          |          }|rJ | _        j	        | _	        t          | j	        j                  | _        t          j        fd| d          \  | _        | _        | _        t%          j        j                  | _        t+          ddgj                  | _        d S )Nc                 ,    t          |           S )N)r$   r%   r&   r'   )r!   )r'   r%   r#   r$   r&   s    rC   <lambda>z&Mamba2Model.__init__.<locals>.<lambda>x   s&    -)))   rD   z.layersrV   r5   rE   rF   )r7   r8   r$   	hf_configr%   r&   lora_configboolr#   
vocab_sizer   r+   
embeddingsr   num_hidden_layersstart_layer	end_layerlayersr   r=   norm_fr   make_empty_intermediate_tensors)
rA   rW   r'   r\   is_lora_enabledr%   r#   r$   r&   rB   s
        @@@@rC   r8   zMamba2Model.__init__b   s+   )3"/"/"/!-{++"""" +0O
 

 9D$       %%%
9
 
9
 
9
5$.$+ f0f6OPPP/Vj)6+=0
 0
,,,rD   	input_idsr(   c                 ,    |                      |          S rH   )r_   rA   rg   s     rC   embed_input_idszMamba2Model.embed_input_ids   s    y)))rD   N	positionsintermediate_tensorsinputs_embedsc                 `   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j                  D ]\  }} ||||          \  }}t                      j        st          ||d          S |                     ||          \  }}	|S )NrE   rF   )rk   rE   rF   )rE   rF   )r
   is_first_rankrj   	enumeraterc   is_last_rankr   rd   )
rA   rg   rk   rl   rm   rE   rF   ilayer_s
             rC   rK   zMamba2Model.forward   s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7H!$+.. 	 	HAu&+e#=8' ' '#M88 ~~* 	&"/XFF    ;;}h??qrD   weightsc                 |   t          |                                           }t                      }|D ]\  }}d|v r|                    dd          }|                    d          r||vr9t          ||           rJ||         }t          |dt                    } |||           |                    |           |S )NA_logAz.biasweight_loader)	dictnamed_parameterssetreplaceendswithr   r;   r   add)rA   ru   params_dictloaded_paramsnameloaded_weightparamry   s           rC   load_weightszMamba2Model.load_weights   s    4002233"%%%#* 	$ 	$D-$||GS11 }}W%% $k*A*A&tT22 %E#E?<QRRMM%///d####rD   NN)rL   rM   rN   r	   rO   r8   rP   rQ   rj   r   rK   r   tupler|   r   rR   rS   s   @rC   rU   rU   `   s       AC #
 #
 #
z #
3 #
 #
 #
 #
 #
 #
J* *%, * * * * <@-1 < < 2D8	
 |d* 
   @HU33D-E$F 3s8        rD   rU   c            
           e Zd Zedddeej        ej        f         fd            Zedddeeeef         eeeef         f         fd            Z	edee
e
f         fd            Zddded	ef fd
Zdej        dej        fdZ	 	 ddej        dej        dedz  dej        dz  fdZd ZdefdZdej        dej        fdZdeeeej        f                  dee         fdZ xZS )Mamba2ForCausalLMrW   r	   r(   c                 j    t          j        |j        j        |j        j        |j        j                  S rH   )r   mamba2_state_dtyper$   dtyper%   mamba_cache_dtypemamba_ssm_cache_dtype)clsrW   s     rC   !get_mamba_state_dtype_from_configz3Mamba2ForCausalLM.get_mamba_state_dtype_from_config   s4    
 );$*$6$:
 
 	
rD   c           	          |j         }|j        j        }|j        |j        z  }t          j        ||j        |j        |j	        |j
        |j        |j                  S )a3  Calculate shapes for Mamba's convolutional and state caches.

        Args:
            vllm_config: vLLM config

        Returns:
            Tuple containing:
            - conv_state_shape: Shape for convolutional state cache
            - temporal_state_shape: Shape for state space model cache
        )r*   tp_world_sizer0   r1   r2   r9   r:   )parallel_configr$   r[   r<   r+   r   mamba2_state_shapetensor_parallel_sizer0   r1   r2   r9   r:   )r   rW   r   r[   r*   s        rC   !get_mamba_state_shape_from_configz3Mamba2ForCausalLM.get_mamba_state_shape_from_config   si     &5,6	%,y/DD(;/)>')' +!-
 
 
 	
rD   c                 (    t          j                    S rH   )r   mamba2_state_copy_func)r   s    rC   get_mamba_state_copy_funcz+Mamba2ForCausalLM.get_mamba_state_copy_func   s    +BDDDrD   r"   rV   r'   c                
   |j         j        }|j        }t                                                       || _        || _        || _        |j         | _         t          |t          |d                    | _	        t          |j        |j        t          |d                    | _        |j        r)| j                            | j	        j                  | _        t#          |j                  | _        | j	        j        | _        d S )Nbackbone)rW   r'   lm_headrV   )r$   r[   scheduler_configr7   r8   r#   rW   rU   r   r   r   r^   r+   r   tie_word_embeddingstie_weightsr_   r   logits_processorre   )rA   rW   r'   r#   r   rB   s        rC   r8   zMamba2ForCausalLM.__init__   s    )3&7& 0'4##L,L,L
 
 
 &	22
 
 

 % 	N<33DM4LMMDL /0A B B M9 	,,,rD   rg   c                 6    | j                             |          S rH   )r   rj   ri   s     rC   rj   z!Mamba2ForCausalLM.embed_input_ids	  s    },,Y777rD   Nrk   rl   rm   c                 6    |                      ||||          }|S rH   )r   )rA   rg   rk   rl   rm   rI   rE   s          rC   rK   zMamba2ForCausalLM.forward  s)     y"6
 
 rD   c                 (     | j         j        |fi |S rH   )mamba_cachecopy_inputs_before_cuda_graphs)rA   input_buffersrI   s      rC   r   z0Mamba2ForCausalLM.copy_inputs_before_cuda_graphs  s!    >t>}WWPVWWWrD   
batch_sizec                 6    | j                             |          S rH   )r   "get_seqlen_agnostic_capture_inputs)rA   r   s     rC   r   z4Mamba2ForCausalLM.get_seqlen_agnostic_capture_inputs  s    BB:NNNrD   rE   c                 <    |                      | j        |          }|S rH   )r   r   )rA   rE   logitss      rC   compute_logitsz Mamba2ForCausalLM.compute_logits   s    &&t|]CCrD   ru   c                 J    t          |           }|                    |          S rH   )r   r   )rA   ru   loaders      rC   r   zMamba2ForCausalLM.load_weights$  s#    "4((""7+++rD   r   )rL   rM   rN   classmethodr   rP   r   r   intr   r   r   r	   rO   r8   rQ   rj   r   rK   r   r   r   r   r|   r   rR   rS   s   @rC   r   r      s0        
!
 
u{EK'	(
 
 
 [
 
!
 
uS#Xc3m 44	5
 
 
 [
8 E%0BDV0V*W E E E [E BD 
 
 
z 
3 
 
 
 
 
 
88 8%, 8 8 8 8 <@-1 < < 2D8	
 |d*   X X XOS O O O OEL U\    ,HU33D-E$F ,3s8 , , , , , , , ,rD   r   )4__doc__collections.abcr   rP   r   transformersr   vllm.compilation.decoratorsr   vllm.configr   r   r	   vllm.distributed.parallel_stater
   $vllm.model_executor.layers.layernormr   +vllm.model_executor.layers.logits_processorr   -vllm.model_executor.layers.mamba.mamba_mixer2r   ,vllm.model_executor.layers.mamba.mamba_utilsr   r   r   r   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   %vllm.model_executor.models.interfacesr   r   r   vllm.sequencer   utilsr   r   r   r   r   r   rQ   KVCacheModuler!   rU   r    rD   rC   <module>r      s     $ $ $ $ $ $        $ $ $ $ $ $ = = = = = = < < < < < < < < < < 8 8 8 8 8 8 8 8 8 8 8 8 G G G G G G E E E E E E            G F F F F F        P O O O O O         
 . - - - - -              el*
+.  .  .  .  .  .  .  . b Z Z Z Z Z") Z Z Zzh, h, h, h, h,I}o/Ih, h, h, h, h,rD   