
    .`i%'              	          d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZmZmZ ddlmZ ddlmZm Z  ddl!m"Z" ddl#m$Z$m%Z%m&Z&m'Z' ddl(m)Z) ddl*m+Z+m,Z,m-Z-m.Z.m/Z/ e0ej1        ej1        f         Z2 G d dej3                  Z4e
 G d dej3                              Z5 G d dej3        e$e%e'e&          Z6dS )zPyTorch MAMBA model.    )Iterable)isliceN)nn)MambaConfig)support_torch_compile)CacheConfigModelConfig
VllmConfig)get_pp_group)RMSNorm)LogitsProcessor)
MambaMixer)MambaStateCopyFuncMambaStateCopyFuncCalculatorMambaStateDtypeCalculatorMambaStateShapeCalculator)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)HasInnerStateIsAttentionFreeSupportsMambaPrefixCaching
SupportsPP)IntermediateTensors   )AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                        e Zd Z	 	 	 	 	 ddededz  dedz  dedz  dedz  d	ed
df fdZ	de
j        de
j        dz  fdZ xZS )MambaDecoderLayerNF configmodel_configcache_configquant_configis_lora_enabledprefixreturnc                    t                                                       || _        |j        dk    | _        || _        | j        r|j        nd }t          |j        |j	        |j
        |j        |j        |j        |j        | j        | j         ||j        | j        ||| d          | _        t#          |j        |j                  | _        d S )Nfalcon_mambaz.mixer)hidden_sizessm_state_sizeconv_kernel_sizeintermediate_sizetime_step_rankuse_conv_biasuse_biasuse_rms_normrms_norm_has_weightrms_norm_eps
activationr)   r&   r'   r*   eps)super__init__r%   
model_typeis_falcon_mambar)   mixer_rms_epsr   r.   
state_sizeconv_kernelr1   r2   r3   r4   
hidden_actmixerr   layer_norm_epsilonnorm)	selfr%   r&   r'   r(   r)   r*   r?   	__class__s	           t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/mamba.pyr<   zMambaDecoderLayer.__init__2   s     	%0NB.040DN,,$*!,#/$6!0 ._-$($8 8&( 0%%$$$
 
 

$ F.F4MNNN			    hidden_statesresidualc                     ||}|                      |          }n|                      ||          \  }}t          j        |          }|                     ||           ||fS N)rE   torch
empty_likerC   )rF   rJ   rK   kwargsoutputs        rH   forwardzMambaDecoderLayer.forwardT   si     $H IIm44MM&*iix&H&H#M8!-00

=&)))xrI   )NNNFr$   )__name__
__module____qualname__r   r	   r   r   boolstrr<   rN   TensorrR   __classcell__rG   s   @rH   r#   r#   1   s         ,0+/26', O  O O "D( O "D(	 O
 )4/ O  O  O 
 O  O  O  O  O  OD |  ,%               rI   r#   c                        e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  dej        f
dZ
deeeej        f                  dee         fdZ xZS )
MambaModelr$   r*   vllm_configr*   c                   t                                                       |j        j        |j        |j        |j        |j        }t          |          | _        j	        | _	        t          | j	        j                  | _        t          j        fd| d          \  | _        | _        | _        t%          j        j                  | _        t+          ddgj                  | _        d S )Nc                 .    t          |           S )N)r&   r'   r(   r)   r*   )r#   )r*   r'   r%   r)   r&   r(   s    rH   <lambda>z%MambaModel.__init__.<locals>.<lambda>|   s)    ,))) /   rI   z.layersr]   r9   rJ   rK   )r;   r<   r&   	hf_configr'   r(   lora_configrV   r%   
vocab_sizer   r.   
embeddingsr    num_hidden_layersstart_layer	end_layerlayersr   rD   norm_fr   make_empty_intermediate_tensors)
rF   r^   r*   rc   r'   r%   r)   r&   r(   rG   s
       @@@@@rH   r<   zMambaModel.__init__g   s%   )3"/"/"/!-{++ +0O
 

 9D$        %%%9
 9
 9
5$.$+ f0f6OPPP/Vj)6+=0
 0
,,,rI   	input_idsr+   c                 ,    |                      |          S rM   )re   rF   rl   s     rH   embed_input_idszMambaModel.embed_input_ids   s    y)))rI   N	positionsintermediate_tensorsinputs_embedsc                 r   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S | 	                    ||          \  }}|S )NrJ   rK   )rp   rJ   rK   )rJ   rK   )
r   is_first_rankro   r   ri   rg   rh   is_last_rankr   rj   )	rF   rl   rp   rq   rr   rJ   rK   layer_s	            rH   rR   zMambaModel.forward   s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDK)94>JJ 	 	E&+e#=8' ' '#M88 ~~* 	&"/XFF    ;;}h??qrI   weightsc                 |   t          |                                           }t                      }|D ]\  }}d|v r|                    dd          }|                    d          r||vr9t          ||           rJ||         }t          |dt                    } |||           |                    |           |S )NA_logAz.biasweight_loader)	dictnamed_parameterssetreplaceendswithr   getattrr   add)rF   rx   params_dictloaded_paramsnameloaded_weightparamr|   s           rH   load_weightszMambaModel.load_weights   s    4002233"%%%#* 	$ 	$D-$||GS11}}W%% $k*A*A&tT22 %E#E?<QRRMM%///d####rI   NN)rS   rT   rU   r
   rW   r<   rN   rX   ro   r   rR   r   tupler   r   rY   rZ   s   @rH   r\   r\   e   s       AC #
 #
 #
z #
3 #
 #
 #
 #
 #
 #
J* *%, * * * * <@-1 < < 2D8	
 |d* 
   <HU33D-E$F 3s8        rI   r\   c            
           e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  fdZ
edddeej        ej        f         fd            Zedddeeeef         eeef         f         fd            Zedeeef         fd            Zd ZdefdZdej        dej        fdZdeeeej        f                  dee         fdZ xZS )MambaForCausalLMr$   r]   r^   r*   c                   |j         j        }|j        | _        t                                                       || _        || _        |j         | _         t          |t          |d                    | _	        |j
        r| j	        j        | _        n/t          |j        |j        t          |d                    | _        t!          |j                  | _        | j	        j        | _        d S )Nbackbone)r^   r*   lm_headr]   )r&   rb   scheduler_configr;   r<   r%   r^   r\   r!   r   tie_word_embeddingsre   r   r   rd   r.   r   logits_processorrk   )rF   r^   r*   r%   rG   s       rH   r<   zMambaForCausalLM.__init__   s    )3 + <&'4"#L,L,L
 
 
 % 	=3DLL)!"#FI66  DL !00A B B M9 	,,,rI   rl   r+   c                 6    | j                             |          S rM   )r   ro   rn   s     rH   ro   z MambaForCausalLM.embed_input_ids   s    },,Y777rI   Nrp   rq   rr   c                 6    |                      ||||          }|S rM   )r   )rF   rl   rp   rq   rr   rP   rJ   s          rH   rR   zMambaForCausalLM.forward   s)     y"6
 
 rI   r
   c                 j    t          j        |j        j        |j        j        |j        j                  S rM   )r   mamba1_state_dtyper&   dtyper'   mamba_cache_dtypemamba_ssm_cache_dtype)clsr^   s     rH   !get_mamba_state_dtype_from_configz2MambaForCausalLM.get_mamba_state_dtype_from_config   s4    
 );$*$6$:
 
 	
rI   c                     |j         }|j        j        }t          j        |j        |j        |j        |j                  S )N)tp_world_sizer1   r@   rA   )	parallel_configr&   rb   r   mamba1_state_shapetensor_parallel_sizer1   r@   rA   )r   r^   r   rb   s       rH   !get_mamba_state_shape_from_configz2MambaForCausalLM.get_mamba_state_shape_from_config   sI    
 &5,6	(;)>'9 +!-	
 
 
 	
rI   c                 (    t          j                    S rM   )r   mamba1_state_copy_func)r   s    rH   get_mamba_state_copy_funcz*MambaForCausalLM.get_mamba_state_copy_func
  s    +BDDDrI   c                 (     | j         j        |fi |S rM   )mamba_cachecopy_inputs_before_cuda_graphs)rF   input_buffersrP   s      rH   r   z/MambaForCausalLM.copy_inputs_before_cuda_graphs  s!    >t>}WWPVWWWrI   
batch_sizec                 6    | j                             |          S rM   )r   "get_seqlen_agnostic_capture_inputs)rF   r   s     rH   r   z3MambaForCausalLM.get_seqlen_agnostic_capture_inputs  s    BB:NNNrI   rJ   c                 <    |                      | j        |          }|S rM   )r   r   )rF   rJ   logitss      rH   compute_logitszMambaForCausalLM.compute_logits  s    &&t|]CCrI   rx   c                 J    t          |           }|                    |          S rM   )r   r   )rF   rx   loaders      rH   r   zMambaForCausalLM.load_weights  s#    "4((""7+++rI   r   )rS   rT   rU   r
   rW   r<   rN   rX   ro   r   rR   classmethodr   r   r   intr   r   r   r   r   r   r   r   r   rY   rZ   s   @rH   r   r      s-        BD 
 
 
z 
3 
 
 
 
 
 
88 8%, 8 8 8 8 <@-1 < < 2D8	
 |d*    
!
 
u{EK'	(
 
 
 [
 
!
 
uS#Xc3h/	0
 
 
 [
 E%0BDV0V*W E E E [EX X XOS O O O OEL U\    ,HU33D-E$F ,3s8 , , , , , , , ,rI   r   )7__doc__collections.abcr   	itertoolsr   rN   r   transformersr   vllm.compilation.decoratorsr   vllm.configr   r	   r
   vllm.distributed.parallel_stater   $vllm.model_executor.layers.layernormr   +vllm.model_executor.layers.logits_processorr   ,vllm.model_executor.layers.mamba.mamba_mixerr   ,vllm.model_executor.layers.mamba.mamba_utilsr   r   r   r   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   %vllm.model_executor.models.interfacesr   r   r   r   vllm.sequencer   utilsr   r   r   r    r!   r   rX   KVCacheModuler#   r\   r    rI   rH   <module>r      s     $ $ $ $ $ $              $ $ $ $ $ $ = = = = = = < < < < < < < < < < 8 8 8 8 8 8 8 8 8 8 8 8 G G G G G G C C C C C C            G F F F F F        P O O O O O            . - - - - -              el*
+1  1  1  1  1 	 1  1  1 h W W W W W W W WtZ, Z, Z, Z, Z,I}oz;UZ, Z, Z, Z, Z,rI   