
    .`iI              
       T   d dl mZ d dlmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZmZmZ d dlmZ d dlmZm Z m!Z!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/ ddl0m1Z1m2Z2m3Z3m4Z4m5Z5 ddl6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z=  G d dej>                  Z? G d dej>                  Z@ G d dej>                  ZA G d dej>                  ZBe G d d ej>                              ZC G d! d"ej>        e1e3e4e2e5          ZDdS )#    )Iterable)isliceN)
Lfm2Config)	Attention)support_torch_compile)CacheConfigModelConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
SiluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)MambaStateCopyFuncMambaStateCopyFuncCalculatorMambaStateDtypeCalculatorMambaStateShapeCalculator)	ShortConv)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )HasInnerStateIsHybridSupportsLoRA
SupportsPPSupportsQuant)AutoWeightsLoaderPPMissingLayerextract_layer_indexis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   r     e Zd Z	 	 ddedededededz  dedz  d	ef fd
Zde	j
        de	j
        fdZ xZS )Lfm2MLPN dimff_dimmultiple_ofauto_adjust_ff_dimffn_dim_multiplierquant_configprefixc                 X   t                                                       |r7t          d|z  dz            }|t          ||z            }|||z   dz
  |z  z  }t          ||gdz  d|| d          | _        t          ||d|| d          | _        t                      | _        d S )	N      r   F.w1)
input_sizeoutput_sizesbiasr3   r4   z.w2r9   output_sizer;   r3   r4   )	super__init__intr   w1r   w2r   act_fn)	selfr.   r/   r0   r1   r2   r3   r4   	__class__s	           s/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/lfm2.pyr?   zLfm2MLP.__init__3   s     	 	OVa((F!-/&899 Vk%9A%=+$MNF, A%>>>
 
 
 $%>>>
 
 
 !ll    xreturnc                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)rA   rC   rB   )rD   rH   gate_up_s       rF   forwardzLfm2MLP.forwardU   s>    WWQZZ
KK  wwqzz1rG   )Nr-   )__name__
__module____qualname__r@   boolfloatr   strr?   torchTensorrN   __classcell__rE   s   @rF   r,   r,   2   s         37 #  # #  # 	 #
 ! # "DL # )4/ #  #  #  #  #  #  #D %,        rG   r,   c                        e Zd Z	 	 	 	 ddededededed	ed
edz  dedz  deddf fdZde	j
        de	j
        de	j
        fdZ xZS )Lfm2Attention    Nr-   config	layer_idxhidden_size	num_headsnum_kv_headsmax_position_embeddingscache_configr3   r4   rI   c
           
         t                                                       || _        || _        || _        t                      }
|| _        | j        |
z  dk    sJ | j        |
z  | _        || _        | j        |
k    r| j        |
z  dk    sJ n|
| j        z  dk    sJ t          d| j        |
z            | _        | j        | j        z  | _
        | j        | j
        z  | _        | j        | j
        z  | _        | j
        dz  | _        || _        t          | j        | j
        | j        | j        d||	 d          | _        t#          | j        | j
        z  | j        d||	 d          | _        t'          | j
        | j        |j        d	
          | _        t-          | j        | j
        | j        | j        ||	 d          | _        t1          | j
        |j                  | _        t1          | j
        |j                  | _        d S )Nr   r   g      F	.qkv_proj)r^   	head_sizetotal_num_headstotal_num_kv_headsr;   r3   r4   z	.out_projr<   T)max_positionrope_parametersis_neox_stylez.attn)r`   rb   r4   eps)r>   r?   r]   r^   r`   r   rf   r_   rg   maxhead_dimq_sizekv_sizescalingra   r   qkv_projr   out_projr   ri   
rotary_embr   attnr   norm_epsq_layernormk_layernorm)rD   r\   r]   r^   r_   r`   ra   rb   r3   r4   tp_sizerE   s              rF   r?   zLfm2Attention.__init__]   s=    	"&(688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF(D,@@nt}4(4=8}d*'>$)(m 0#6%'''
 
 
 *+dm;(%'''
 
 
 #M5"2	
 
 
 NML*%###
 
 
	 #4=foFFF"4=foFFFrG   	positionshidden_statesc                    |j         \  }}|                     |          \  }}|                    | j        | j        | j        gd          \  }}}|                    || j        | j                                                  }|                    || j	        | j                                                  }| 
                    |          }|                     |          }|                     |||          \  }}|                    || j        | j        z            }|                    || j	        | j        z            }|                     |||          }	|                     |	          \  }
}|
S )N)r.   )shaperr   splitro   rp   viewr_   rn   
contiguousr`   rw   rx   rt   ru   rs   )rD   rz   r{   n_tokensrM   qkvqkvattn_outputoutputs              rF   rN   zLfm2Attention.forward   s?   
 $)!}--Q))T[$,E2)NN1aFF8T^T];;FFHHFF8T.>>IIKKQQy!Q//1FF8T^dm;<<FF8T.>??ii1a((MM+..	rG   )r[   NNr-   )rO   rP   rQ   r   r@   r   r   rT   r?   rU   rV   rN   rW   rX   s   @rF   rZ   rZ   \   s        (,+/26DG DGDG DG 	DG
 DG DG "%DG "D(DG )4/DG DG 
DG DG DG DG DG DGL< | 
	       rG   rZ   c                        e Zd Z	 	 	 	 ddedededz  dedz  dedz  ded	df fd
Z	de
j        de
j        de
j        dz  d	ee
j        e
j        f         fdZ xZS )Lfm2AttentionDecoderLayerNr-   r\   r]   model_configrb   r3   r4   rI   c                    t                                                       || _        || _        || _        t          |dd          }t          |||j        |j        |j	        |||| d	  	        | _
        t          |j        |j        |j        |j        |j        || d          | _        t%          |j        |j                  | _        t%          |j        |j                  | _        d S )Nra   r[   z
.self_attn)	r\   r]   r^   r_   r`   ra   rb   r3   r4   .feed_forwardr.   r/   r0   r1   r2   r3   r4   rk   )r>   r?   r4   r\   r]   getattrrZ   r^   num_attention_headsnum_key_value_heads	self_attnr,   	block_dimblock_ff_dimblock_multiple_ofblock_auto_adjust_ff_dimblock_ffn_dim_multiplierfeed_forwardr   rv   operator_normffn_norm)	rD   r\   r]   r   rb   r3   r4   ra   rE   s	           rF   r?   z"Lfm2AttentionDecoderLayer.__init__   s    	"")&2KT"R"R&*03$;%%(((

 

 

 $ &0%>%>%+++
 
 
 %V%7V_MMM 2HHHrG   rz   r{   residualc                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          |fS )N)rz   r{   )r   r   r   r   )rD   rz   r{   r   kwargss        rF   rN   z!Lfm2AttentionDecoderLayer.forward   s     $H ..}==MM&*&8&8&Q&Q#M8-XX"&--x"H"Hx  //99rG   NNNr-   )rO   rP   rQ   r   r@   r	   r   r   rT   r?   rU   rV   tuplerN   rW   rX   s   @rF   r   r      s       
 ,0+/26&I &I&I &I "D(	&I
 "D(&I )4/&I &I 
&I &I &I &I &I &IP:<: |: ,%	: 
u|U\)	*: : : : : : : :rG   r   c                        e Zd Z	 	 	 	 ddedededz  dedz  dedz  ded	df fd
Z	de
j        de
j        dz  fdZ xZS )Lfm2ShortConvDecoderLayerNr-   r\   r]   r   rb   r3   r4   rI   c           
         t                                                       || _        t          ||j        |||| d          | _        t          |j        |j        |j	        |j
        |j        || d          | _        t          |j        |j                  | _        t          |j        |j                  | _        d S )Nz.conv)r\   r.   r]   r   rb   r4   r   r   rk   )r>   r?   r]   r   conv_dim
short_convr,   r   r   r   r   r   r   r   r^   rv   r   r   )rD   r\   r]   r   rb   r3   r4   rE   s          rF   r?   z"Lfm2ShortConvDecoderLayer.__init__   s     	"#%%###
 
 
 $ &0%>%>%+++
 
 
 %V%7V_MMM 2HHHrG   r{   r   c                     ||}|                      |          }n|                      ||          \  }}t          j        |          }|                     ||           |                     ||          \  }}|                     |          }||fS rK   )r   rU   
empty_liker   r   r   )rD   r{   r   r   r   s        rF   rN   z!Lfm2ShortConvDecoderLayer.forward  s     $H ..}==MM&*&8&8&Q&Q#M8!-00	
 	
 	
 #'--"A"Ax))-88h&&rG   r   )rO   rP   rQ   r   r@   r	   r   r   rT   r?   rU   rV   rN   rW   rX   s   @rF   r   r      s        
 ,0+/26I II I "D(	I
 "D(I )4/I I 
I I I I I I@'|' ,%' ' ' ' ' ' ' 'rG   r   c                        e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  dej        f
dZ
deeeej        f                  dee         fdZ xZS )	Lfm2Modelr-   r4   vllm_configr4   c                J    t                                                       |j        j        |j        |j        |j         _        j         _        t           j        j	        j                   _
        dt          f fd}t          j        || d          \   _         _         _        t#          ddgj	                   _        t'                      j        r"t+          j	        j                   _        d S t1                       _        d S )	N)org_num_embeddingsr4   c                     t          |           }j        j        |         dk    }|rt          nt          } |||           S )Nfull_attentionr3   r4   )r&   r\   layer_typesr   r   )	r4   r]   is_attnlayer_classrb   r\   r   r3   rD   s	       rF   	get_layerz%Lfm2Model.__init__.<locals>.get_layer9  sf    +F33Ik-i8<LLG-4S)):S  ;)   rG   z.layersr   r{   r   rk   )r>   r?   r   	hf_configrb   r3   r\   
vocab_sizer   r^   embed_tokensrT   r)   num_hidden_layersstart_layer	end_layerlayersr(   make_empty_intermediate_tensorsr   is_last_rankr   rv   embedding_normr%   )	rD   r   r4   r   rb   r\   r   r3   rE   s	   `   @@@@rF   r?   zLfm2Model.__init__)  sP   )3"/"/"/ +2OV/FDU
 
 
	c 	 	 	 	 	 	 	 	 	 	 9D$i68J8J8J9
 9
 9
5$.$+ 0Wj)6+=0
 0
, >>& 	3")&*<&/"R"R"RD"0"2"2DrG   	input_idsrI   c                 ,    |                      |          S rK   )r   rD   r   s     rF   embed_input_idszLfm2Model.embed_input_idsT  s      +++rG   Nrz   intermediate_tensorsinputs_embedsc                 r   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S | 	                    ||          \  }}|S )Nr{   r   )rz   r{   r   )r{   r   )
r   is_first_rankr   r   r   r   r   r   r   r   )	rD   r   rz   r   r   r{   r   layerrM   s	            rF   rN   zLfm2Model.forwardW  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDK)94>JJ 	 	E&+e#+!' ' '#M88
 ~~* 	&"/XFF    ..}hGGqrG   weightsc                    g d}t          |                                           }t                      }|D ]\  }}d|v r|                    ddd          }|D ]O\  }}}	||vr|                    ||          }t	          ||           r2||         }
|
j        } ||
||	            n;t	          ||           r||         }
t          |
dt                    } ||
|           |                    |           |S )N))rd   z.q_projr   )rd   z.k_projr   )rd   z.v_projr   )r8   r8   r   )r8   z.w3r   z.conv.z.short_conv.r   weight_loader)	dictnamed_parameterssetreplacer'   r   r   r   add)rD   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamr   s               rF   load_weightszLfm2Model.load_weightsv  sC   "
 "
 "
 4002233"%%%#* 	$ 	$D-4||Hna@@5K 4 41
Kd**||K<<*466 #D) % 3e]H===*466 #D) '@U V Ve]333d####rG   NN)rO   rP   rQ   r
   rT   r?   rU   rV   r   r   rN   r   r   r   r   rW   rX   s   @rF   r   r   '  s       AC )3 )3 )3z )33 )3 )3 )3 )3 )3 )3V, ,%, , , , , <@-1 < < 2D8	
 |d* 
   > HU33D-E$F  3s8                rG   r   c                       e Zd Zg dddgdgdZdddZed	d
deej        df         fd            Z	ed	d
deee
e
f                  fd            Zedee         fd            Zddd	ededdf fdZdej        dej        fdZ	 	 ddej        dej        dedz  dej        dz  dej        f
dZdej        dej        fdZdeeeej        f                  dee         fdZ xZS ) Lfm2ForCausalLM)q_projk_projv_projrA   w3in_proj)rr   rA   r   input_embeddingsoutput_embeddings)r   lm_headr   r
   rI   .c                 T    t          j        |j        j        |j        j                  S rK   )r   short_conv_state_dtyper   dtyperb   mamba_cache_dtype)clsr   s     rF   !get_mamba_state_dtype_from_configz1Lfm2ForCausalLM.get_mamba_state_dtype_from_config  s+    
 )?$*$6
 
 	
rG   c                 t    |j         }|j        j        }t          j        |j        |j        |j                  S )zCalculate shapes for LFM2's convolutional cache.

        Args:
            vllm_config: vLLM config

        Returns:
            Tuple containing:
            - conv_state_shape: Shape for convolutional state cache
        )tp_world_sizeintermediate_sizeconv_kernel)parallel_configr   r   r   short_conv_state_shapetensor_parallel_sizer   conv_L_cache)r   r   r   r   s       rF   !get_mamba_state_shape_from_configz1Lfm2ForCausalLM.get_mamba_state_shape_from_config  sC     &5,6	(?)>'0!.
 
 
 	
rG   c                 (    t          j                    S rK   )r   short_conv_state_copy_func)r   s    rF   get_mamba_state_copy_funcz)Lfm2ForCausalLM.get_mamba_state_copy_func  s    +FHHHrG   r-   r   r4   Nc          	      Z   |j         j        }|j        }|j        }|j        dk    rt          d          t                                                       || _        t          |t          |d                    | _        t                      j        rZt          |j        |j        |t          |d                    | _        | j                            | j        j                  | _        nt)                      | _        t+          |j                  | _        | j        j        | _        d S )NallzcLfm2 currently does not support 'all' prefix caching, please use '--mamba-cache-mode=align' insteadmodel)r   r4   r   r   )r   r   r3   rb   mamba_cache_modeNotImplementedErrorr>   r?   r\   r   r*   r   r   r   r   r   r^   r   tie_weightsr   r%   r   logits_processorr   )rD   r   r4   r\   r3   rb   rE   s         rF   r?   zLfm2ForCausalLM.__init__  s#   )3"/"/(E11%@  
 	#L,I,I
 
 

 >>& 		,)!")#FI66	  DL  <33DJ4KLLDLL)++DL /0A B B J6 	,,,rG   r   c                 6    | j                             |          S rK   )r   r   r   s     rF   r   zLfm2ForCausalLM.embed_input_ids  s    z)))444rG   rz   r   r   c                 6    |                      ||||          }|S rK   )r   )rD   r   rz   r   r   r   r{   s          rF   rN   zLfm2ForCausalLM.forward  s)     

y"6
 
 rG   r{   c                 <    |                      | j        |          }|S rK   )r   r   )rD   r{   logitss      rF   compute_logitszLfm2ForCausalLM.compute_logits  s    &&t|]CCrG   r   c                 l    t          | | j        j        rdgnd           }|                    |          S )Nzlm_head.)skip_prefixes)r$   r\   tie_word_embeddingsr   )rD   r   loaders      rF   r   zLfm2ForCausalLM.load_weights	  sC    "+/;+JTJ<<PT
 
 
 ""7+++rG   r   )rO   rP   rQ   packed_modules_mappingembedding_modulesclassmethodr   rU   r   r   r@   r   r   r   r
   rT   r?   rV   r   r   rN   r  r   r   r   rW   rX   s   @rF   r   r     s?       
 
 
 
 ;  +& 
 
!
 
u{C	 
 
 
 [
 
!
 
uS#X	
 
 
 [
, I%0B*C I I I [I BD 
 
 
z 
3 
 
 
 
 
 
 
B5 5%, 5 5 5 5 <@-1 < < 2D8	
 |d* 
   EL U\    ,HU33D-E$F ,3s8 , , , , , , , ,rG   r   )Ecollections.abcr   	itertoolsr   rU   torch.nnnntransformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr   r	   r
   vllm.distributedr   r   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   ,vllm.model_executor.layers.mamba.mamba_utilsr   r   r   r   +vllm.model_executor.layers.mamba.short_convr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   r    r!   r"   r#   utilsr$   r%   r&   r'   r(   r)   r*   Moduler,   rZ   r   r   r   r    rG   rF   <module>r!     s   % $ $ $ $ $              # # # # # # * * * * * * = = = = = = < < < < < < < < < < O O O O O O O O < < < < < < 8 8 8 8 8 8         
 H G G G G G            B A A A A A F F F F F F @ @ @ @ @ @        P O O O O O - - - - - - X X X X X X X X X X X X X X                 ' ' ' ' 'bi ' ' 'TX X X X XBI X X Xv7: 7: 7: 7: 7:	 7: 7: 7:t3' 3' 3' 3' 3'	 3' 3' 3'l n n n n n	 n n nbu, u, u, u, u,I}lJ-u, u, u, u, u,rG   