
    .`i#              	          d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZmZ ddlmZ ddlmZ ddlmZ ddlmZm Z m!Z!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z(m)Z)m*Z*m+Z+ ddl,m-Z-m.Z. ddl/m0Z0 ddl1m2Z2 ddl3m4Z4 ddl5m6Z6 ddl7m8Z8m9Z9 ddl:m;Z;m<Z<m=Z= ddl>m?Z?m@Z@mAZAmBZB ddlCmDZDmEZEmFZFmGZGmHZH ddlImJZJ ddlKmLZL ddlMmNZN dd lOmPZP dd!lQmRZR dd"lSmTZT  G d# d$e          ZUd%eUd&eVd'eWfd(ZX ejY        d)           G d* d+e&e                      ZZd,ej[        d-ej[        d.e\d'dfd/Z]d,ej[        d-ej[        d.e\d'dfd0Z^ ePd)e]d-ge^1            G d2 d3ej_                  Z` G d4 d5ej_                  Za G d6 d7ej_                  Zb G d8 d9ej        j_                  Zce G d: d;ej        j_                              Zd G d< d=ej        j_        e?eAeBe@          ZedS )>zInference-only PLaMo2 model.    )Iterable)isliceN)nn)PretrainedConfig)	Attention)support_torch_compile)
VllmConfigget_current_vllm_config)divide$get_tensor_model_parallel_world_size)get_pp_group)ForwardContextget_forward_context)CustomOp)
SiluAndMul)RMSNorm)ColumnParallelLinearMergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)	MambaBase)MambaStateCopyFuncMambaStateCopyFuncCalculatorMambaStateDtypeCalculatorMambaStateShapeCalculator)causal_conv1d_fncausal_conv1d_update)selective_state_update) mamba_chunk_scan_combined_varlen)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)composed_weight_loaderdefault_weight_loadersharded_weight_loader)HasInnerStateIsHybridSupportsLoRA
SupportsPP)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefix)set_weight_attrs)current_platform)IntermediateTensors)direct_register_custom_op)AttentionMetadata)Mamba2AttentionMetadatac                       e Zd ZU dZeed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   eed
<   eed<   eed<   eed<   eed<   dS )Plamo2Configplamo2
model_typehidden_sizenum_hidden_layersrms_norm_epsnum_attention_headshidden_size_per_headnum_key_value_headsmamba_d_statemamba_d_convmamba_num_heads
mamba_stepintermediate_size
vocab_sizeN)__name__
__module____qualname__r:   str__annotations__intfloat     u/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/plamo2.pyr8   r8   L   s         JOOOOOOOOrO   r8   configireturnc                     | j         dk    sJ | j        | j         dz  k    r|| j        dz
  k    S || j         z  | j         dz  k    S )N      )rD   r<   )rQ   rR   s     rP   is_mambarW   a   sZ    q    F$5$:;;F,q000!!v'8A'=>>rO   plamo2_mamba_mixerc                   ,    e Zd Zdddededdf fdZd Zd	ej        d
ej        fdZ	d	ej        d
ej        fdZ
d	ej        d
ej        fdZdeej        ej        f         fdZdeeedf         eedf         f         fdZedefd            Z xZS )Plamo2MambaMixer prefixvllm_configr]   rS   Nc          	      	   t                                                       |j        j        | _        |j        | _        |j        | _        |j        | _        t          |j                  | _	        | j        j
        | _
        | j        j        | _        | j        j        | _        | j        j        | j        j        z  | _        t%                      | _        | j        j        | _        | j        j        | _        t-          d| j
        dz            | _        t1          | j        | j        d| dd          | _        | j        j        j                            d          | j        j        _        t;          | j
        | j        gdz  d| j        | dd	          | _        t?          | j        | j        | j        dz  z   d| j        | d
d	          | _         t1          | j        | j        d| j        | dd	          | _!        tE          j#        tI          j%        tM          | j        | j                  tH          j'                            | _(        tE          j#        tI          j)        tM          | j        | j                                      | _*        tE          j#        tI          j)        tM          | j        | j                                      | _+        tY          | j*        dt[          d          i           t]          t[          d          d           }tY          | j(        d|i           tY          | j+        dt[          d          i           t?          | j        | j
        dd| j        | dd          | _/        d| _0        tc          | j        | j        j2                  | _3        tc          | j        | j        j2                  | _4        tc          | j        | j        j2                  | _5        | j        j6        | _7        tq                      j9        }||j:        v rtw          d|           | |j:        |<   tI          j<        g           tI          j<        g           f| _=        | j7        dk    s
J d            || _>        d S )N@      Fz.conv1d)
input_sizeoutput_sizebiasr]   return_biasrU   rV   z.in_proj)rd   quant_configr]   re   z
.bcdt_projz.dt_proj)dtypeweight_loaderr   c                 P    t          j        |                                            S N)torchexprM   )xs    rP   <lambda>z+Plamo2MambaMixer.__init__.<locals>.<lambda>   s    177991E1E0E rO   Tz	.out_proj)rd   input_is_parallelrf   r]   re   siluepszDuplicate layer name: zchunk_size must be set for v1)?super__init__model_config	hf_configrQ   cache_configrf   boollora_configis_lora_enabledr;   rA   ssm_state_sizerB   conv_kernel_sizerC   r?   rE   r   tp_sizehead_dim	num_headsmaxtime_step_rankr   conv1dweightdata	unsqueezer   in_projr   	bcdt_projdt_projr   	Parameterrk   emptyr   float32AonesDdt_biasr1   r'   r%   out_proj
activationr   r=   dt_normB_normC_normmamba_chunk_size
chunk_sizer
   compilation_configstatic_forward_context
ValueErrortensorkv_cacher]   )selfr^   r]   kwargsa_weight_loaderr   	__class__s         rP   ru   zPlamo2MambaMixer.__init__r   s7   !.8'4'4'4#K$;<<;2"k7 $ 8K'$+*JJ 	 <==84!"d&6"&<==*,.%%%
 
 
 #'+"4"9"C"CA"F"F1#$q(*&&&
 
 
 +"$"5"99*(((
 
 
 ,N*&&&
 
 
 Kt~t|44m  
 
 ej)M)MNNOO|EJvdndl/S/S$T$TUU/3H3K3K!LMMM0!!$$&E&E
 
 	/?!CDDD9Nq9Q9Q'RSSS)""*'''
 
 
 !t28PQQQd1t{7OPPPd1t{7OPPP+6466I'>>>>f>>???<@1&9b))5<+;+;<"$$$&E$$$rO   c                    | j         r(|                     |                                          }n|                     |          }t          j        || j        | j        | j        gd          \  }}}|                     |                                          }|                     |                                          }| 	                    |                                          }| 
                    |          }|||fS Nrs   dim)r{   r   
contiguousrk   splitr|   r   r   r   r   r   )r   hidden_statesssm_parametersBC	time_stepdts          rP   _project_ssm_parametersz(Plamo2MambaMixer._project_ssm_parameters   s     	;!^^M,D,D,F,FGGNN!^^M::N+ $"5t7JK
 
 
1i LL!5!5!7!788	KK''KK''\\)$$!RxrO   r   outputc                     d S rj   rN   r   r   r   r   s       rP   forward_nativezPlamo2MambaMixer.forward_native   s	     	rO   c                 \    t           j        j                            ||| j                   d S rj   )rk   opsvllmrX   r]   r   s       rP   forwardzPlamo2MambaMixer.forward   s5     		))K	
 	
 	
 	
 	
rO   c                     t                      }|j        }|t          |t                    sJ || j                 }t          |t
                    sJ | j        |j                 }|d                             dd          }|d         }|j	        }	|j
        }
|j        }|j        }|j        }|j        }|j        }|j        }|                     |          }|                    dd          \  }}| j        j                            | j        j                            d          | j        j                            d                    }|j|                    dd                                                              dd                                          }|                     |          |d d <   d S |j        }|j        }|j        }|dk    }|dk    }||z   }t;          j        |d |         ||gd          \  }}t;          j        |d |         ||gd          \  }}t;          j        |	||gd          \  }}t;          j        ||z   | j         | j!        z  | j"        z  g|j#        |j$                  } t;          j        | ||gd          \  }!}"|r|                    dd          }#tK          |#|| j        j&        | j'        ||
|||	  	        }|                    dd          }|d |         }|                                }| (                    |          \  }$}%}&d }'|
*|r(t;          j)        |
d d d d d f         ||         d          }'tU          |                    || j         | j!        z  | j"                  |&| j+        |$                    |dd          |%                    |dd          f|| j,        |                    || j         | j!        z  | j"                  | j-        |||||'d	d
t]          d          f|"                    |d| j"                  |j#        d}(|(||<   |rt_          |||| j        j&        | j'        |          }ta          j1                    r|                                }| (                    |          \  }$}%}&| j+        d d d df         d d d d d f         2                    d| j"        | j3        j4                  })|&d d d d d f         2                    dd| j"                  }&| j-        d d d df         2                    d| j"                  }*| j,        d d d df         2                    d| j"                  }+|$5                    d          }$|%5                    d          }%|                    d| j         | j!        z  | j"                  }tm          |||&|)|$|%|+|7                    |d| j"                  |*d	||!                    |d| j"                             |                     |           |d |<   d S )Nr   rs   rU   rV   r   )rg   device)r   conv_stateshas_initial_statecache_indicesmetadataquery_start_locTg        inf)r   r   zr   seq_idx
cu_seqlenscu_chunk_seqlenslast_chunk_indicesinitial_statesdt_softplusdt_limitoutstate_dtype)conv_state_indices.)r   r   r   state_batch_indicesr   )8r   attn_metadata
isinstancedictr]   r6   r   virtual_engine	transposestate_indices_tensorhas_initial_states_pprep_initial_statesr   	seq_idx_pquery_start_loc_pcu_chunk_seqlen_plast_chunk_indices_pr   chunkr   r   viewsizecloner   r   num_prefillsnum_decode_tokensnum_prefill_tokensrk   r   r   r   r~   r   rg   r   r   rd   r   r   wherer    r   r   r   rM   r   r2   is_rocmexpandrQ   rA   r   r   reshape),r   r   r   r   forward_contextr   self_kv_cache
conv_state	ssm_stater   r   r   r   r   r   r   r   projected_statesgateconv_weightsr   num_decodesr   has_prefill
has_decodenum_actual_tokenshidden_states_dhidden_states_pgate_dgate_pstate_indices_tensor_dstate_indices_tensor_ppreallocated_ssm_outpreallocated_ssm_out_dpreallocated_ssm_out_prm   r   r   r   r   varlen_stater   r   r   s,                                               rP   forward_cudazPlamo2MambaMixer.forward_cuda   s    .//
 ,;+H$mT22222)$+6Mm-DEEEEE M/*HIM&q)33B;;J%a(I#0#E #0#E "/"C&1J%/I - ? - ?#0#E   <<66.44QB4??m {)..K##A&&(:(?(?(B(B
 
   ''1--3355??1EEjll  m44F111IF$1#5*="Q& 1_
.< ,1;,,,-,-,
 ,
 ,
(
 ###${4F&GQ
 
 
 :? ,':
 :
 :
6 6  %{"[04</4=@  % ' 
  
  
 :? ,-:
 :
 :
6 6  >	=  ))!Q//A. ?&"64& 1
 
 
O .771==O-.A/A.ABO .88::O33ODDHAq" "N#/4G/!&(D$)<=45" " <$$&$,(F  )1b11)1b11 &&++&$,(F  !,!2#7- uU||,*//0BBVV%O-  L6 1=I,-  3	2 #9  O  ')) ?"1"<"<">">33ODDHAq" qqq$|$QQQ4Z077DM4;#< A AAAqqq$J&&r2t}==Bl111dC<077DMJJGqqq$|$++B>>AAAAA-22DNdl2DM O #..b$-@@ $:*//ROO     &*]]3G%H%H!!!"""rO   c                     | j         J | j        J t          j        | j         j        | j        j        | j        j                  S rj   )rv   rx   r   mamba2_state_dtyperg   mamba_cache_dtypemamba_ssm_cache_dtyper   s    rP   get_state_dtypez Plamo2MambaMixer.get_state_dtype  sP     ,,, ,,,(;#/3
 
 	
rO   .c           	          t          j        | j        t                      d| j        | j        | j        | j                  S )Nr   rE   tp_world_sizen_groupsr   r   
state_sizeconv_kernel)r   mamba2_state_shaperE   r   r   r   r|   r}   r   s    rP   get_state_shapez Plamo2MambaMixer.get_state_shape  sD    (;"4>@@n]*-
 
 
 	
rO   c                     dS )Nmamba2rN   r   s    rP   
mamba_typezPlamo2MambaMixer.mamba_type  s    xrO   )rG   rH   rI   r	   rJ   ru   r   rk   Tensorr   r   r   tuplerg   r   rL   r  propertyr  __classcell__r   s   @rP   rZ   rZ   n   s        BD e e eJ e3 eRV e e e e e eN  &|    

|

 

 

 

 

OI|OI OI OI OI OIb
u{EK'?!@ 
 
 
 
	
uS#Xc3h'G!H 	
 	
 	
 	
 C    X    rO   rZ   r   r   
layer_namec                 j    t                      }|j        |         }|                    | |           d S )N)r   r   )r   no_compile_layersr   )r   r   r  r   r   s        rP   rX   rX     s;    
 ':&;&;O,Z8DM&AAAAArO   c                     d S rj   rN   )r   r   r  s      rP   plamo2_mamba_mixer_faker    s	    
 FrO   )op_nameop_funcmutates_args	fake_implc            	       `     e Zd Z	 	 d
dededz  deddf fdZdej        dej        fd	Z	 xZ
S )DenseMLPNr[   rQ   rf   r]   rS   c                 B   t                                                       |j        | _        |j        | _        t	          | j        | j        gdz  d| d|d          | _        t                      | _        t          | j        | j        d| d|d          | _	        d S )NrV   Fz.gate_up_proj)rd   r]   rf   re   z
.down_proj)
rt   ru   r;   rE   r   gate_up_projr   actr   	down_proj)r   rQ   rf   r]   r   s       rP   ru   zDenseMLP.__init__  s     	!-!'!96#$q(+++%
 
 
 <<*"(((%
 
 
rO   r   c                     |                      |          }|                     |          }|                     |          S rj   )r  r  r  )r   r   hs      rP   r   zDenseMLP.forward   s6    m,,HHQKK~~a   rO   )Nr[   )rG   rH   rI   r8   r!   rJ   ru   rk   r	  r   r  r  s   @rP   r  r    s         37	
 

 )4/
 	

 

 
 
 
 
 
6!U\ !el ! ! ! ! ! ! ! !rO   r  c                   d     e Zd Zdddededdf fdZdej        d	ej        dej        fd
Z xZ	S )Plamo2AttentionMixerr[   r\   r^   r]   rS   Nc          
         t                                                       |j        j        }|j        }|j        }|j        | _        t                      }|j        | _	        | j	        |z  dk    sJ | j	        |z  | _
        |j        | _        | j        |k    r| j        |z  dk    sJ n|| j        z  dk    sJ t          d| j        |z            | _        |j        | _        | j
        | j        z  | _        | j        | j        z  | _        | j        dz  | _        t)          |j        | j        | j	        | j        d|| d          | _        t-          | j	        | j        z  |j        d|| d          | _        |j        }t3          |j        d          r9t5          |j        j        t8                    rt;          ||j        j                  }t=          | j        ||j        	          | _         tC          |j        |j"        
          | _#        tH          j%        &                    tI          j'        | j
        |j        f                    | j#        _(        tS          | j#        j(        dtU          d          i           tC          |j        |j"        
          | _+        tH          j%        &                    tI          j'        | j        |j        f                    | j+        _(        | j        dk    r)tS          | j+        j(        dtU          d          i           tY          | j
        | j        | j        | j        || d          | _-        d S )Nr   rU   g      Fz	.qkv_proj)rd   rf   r]   z.o_projmax_model_len)max_positionrope_parametersrq   rh   z.attn)num_kv_headsrx   r]   ).rt   ru   rv   rw   rx   rf   r;   r   r>   total_num_headsr   r@   total_num_kv_headsr   r%  r?   r   q_sizekv_sizescalingr   qkv_projr   o_projmax_position_embeddingshasattrr   r"  rL   minr"   r$  
rotary_embr   r=   q_normrk   r   r   r   r   r1   r'   k_normr   attn)
r   r^   r]   r   rQ   rx   rf   r~   r#  r   s
            rP   ru   zPlamo2AttentionMixer.__init__'  s]   )3"/"/!-688%9#g-2222-8"("<"g-- *W499999 T4499994#:g#EFF3nt}4(4=8}d*)M #%'''
 
 
 ( 4=0%%%%
 
 
 5;+_== 	U*$2CC
 C
 	U |[-E-STTL"M%"2
 
 

 f9v?RSSS"X//J(CDEE
 
 	K2G2J2J K	
 	
 	
 f9v?RSSS"X//J)6+FGHH
 
 "a''"_6KA6N6N$O   NML*%###
 
 
			rO   	positionsr   c                    |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|j        }	|                    |	d d         | j        j        j        z             }| j                            |                              |	          }|j        }
|                    |
d d         | j	        j        j        z             }| j	                            |                              |
          }| 
                    |||          \  }}|                     |||          }|                     |          \  }}|S r   )r+  r   r(  r)  shaper   r1  r   r   r2  r0  r3  r,  )r   r4  r   r   qkv_qkvq_shapek_shapeattn_outputr   s                rP   r   zPlamo2AttentionMixer.forwardx  s)    }--Q))T[$,E2)NN1a'IIgcrclT[%7%==>>K&&q))11'::'IIgcrclT[%7%==>>K&&q))11'::y!Q//1ii1a((KK,,	rO   
rG   rH   rI   r	   rJ   ru   rk   r	  r   r  r  s   @rP   r   r   &  s        AC O
 O
 O
z O
3 O
RV O
 O
 O
 O
 O
 O
b< |
 
       rO   r   c            	       l     e Zd Z	 ddedededdf fdZdej        d	ej        d
ej        dz  fdZ	 xZ
S )Plamo2DecoderLayerr[   r^   	layer_idxr]   rS   Nc                 @   t                                                       |j        j        }|j        }t          ||          | _        | j        rt          || d          | _        nt          || d          | _        t          ||| d          | _
        t          |j        |j                  | _        t          |j        |j                  | _        t          |j        |j                  | _        t          |j        |j                  | _        d S )Nz.mixerr^   r]   z.mlp)rQ   rf   r]   rq   )rt   ru   rv   rw   rf   rW   rZ   mixerr   r  mlpr   r;   r=   pre_mixer_normpost_mixer_normpre_mlp_normpost_mlp_norm)r   r^   rB  r]   r   rQ   rf   r   s          rP   ru   zPlamo2DecoderLayer.__init__  s.    	)3"/ 33= 	)'60A0A0A  DJJ .'60A0A0A  DJ ___
 
 
 &f&8f>QRRR&v'9v?RSSS#F$6F<OPPP$V%7V=PQQQrO   r4  r   residualc                    ||}|                      |          }n|                      ||          \  }}| j        rt          j        |          }d|i}nd|i} | j        dd|i|}| j        r|}|                     |          }|                     ||          \  }}|                     |          }|                     |          }||fS )Nr   r4  r   rN   )	rG  rW   rk   
empty_likerE  rH  rI  rF  rJ  )r   r4  r   rK  r   r   mixer_kwargss          rP   r   zPlamo2DecoderLayer.forward  s    $H //>>MM&*&9&9-&R&R#M8= 		%m44F&LL
 YL #
 
 
'

 
 = 	#"M,,];;"&"3"3M8"L"Lx//**=99h&&rO   )r[   )rG   rH   rI   r	   rL   rJ   ru   rk   r	  r   r  r  s   @rP   rA  rA    s        EGR R%R25R?BR	R R R R R R2"'<"' |"' ,%	"' "' "' "' "' "' "' "'rO   rA  c                   x     e Zd Zdddededdf fdZdej        d	ej        d
ej        dz  dej        fdZ xZ	S )Plamo2Decoderr[   r\   r^   r]   rS   Nc                   t                                                       j        j        }dt	          j                  idt          ffd}t          |j        || d          \  | _	        | _
        | _        d S )Nr{   r]   c                 v    t          |                     dd          d                   }t          d|| dS )N.rU   )r^   rB  r]   rN   )rL   rsplitrA  )r]   rB  extra_kwargsr^   s     rP   	get_layerz)Plamo2Decoder.__init__.<locals>.get_layer  sR    FMM#q11!455I% '#  	  rO   .layersr\   )rt   ru   rv   rw   ry   rz   rJ   r/   r<   start_layer	end_layerlayers)r   r^   r]   rQ   rV  rU  r   s    `   @rP   ru   zPlamo2Decoder.__init__  s    )3)40G+H+HI	c 	 	 	 	 	 	 	 9D$i68J8J8J9
 9
 9
5$.$+++rO   r4  r   rK  c                 r    t          | j        | j        | j                  D ]} ||||          \  }}||fS )Nr4  r   rK  )r   rZ  rX  rY  )r   r4  r   rK  layers        rP   r   zPlamo2Decoder.forward  sY     DK)94>JJ 	 	E&+e#+!' ' '#M88
 h&&rO   r?  r  s   @rP   rP  rP    s        AC 
 
 
z 
3 
 
 
 
 
 
 
$'<' |' ,%	'
 
' ' ' ' ' ' ' 'rO   rP  c                        e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  dej        f
dZ
 xZS )Plamo2Modelr[   r\   r^   r]   c                   t                                                       |j        j        }|| _        |j        | _        |j        | _        t          | j        |j	        | d          | _
        t          ddg|j	                  | _        t          || d          | _        t          |j	        |j                  | _        d S )Nz.embed_tokensr\   r   rK  rW  rD  rq   )rt   ru   rv   rw   rQ   pad_token_idpadding_idxrF   r$   r;   embed_tokensr.   make_empty_intermediate_tensorsrP  rZ  r   r=   norm)r   r^   r]   rQ   r   s       rP   ru   zPlamo2Model.__init__  s    )3!. +2O+++
 
 

 0Wj)6+=0
 0
, $vDVDVDVWWWF.F4GHHH			rO   	input_idsrS   c                 ,    |                      |          S rj   )rc  r   rf  s     rP   embed_input_idszPlamo2Model.embed_input_ids  s      +++rO   Nr4  intermediate_tensorsinputs_embedsc                 @   t                      j        r||}n|                     |          }d }n|J |d         }|d         }|                     |||          \  }}t                      j        st          ||d          S |                     ||          \  }}|S )Nr   rK  r\  )r   rK  )r   is_first_rankri  rZ  is_last_rankr3   re  )r   rf  r4  rj  rk  r   rK  r8  s           rP   r   zPlamo2Model.forward  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7H"&++' #. #
 #
x
 ~~* 	&"/XFF    99]H==qrO   NN)rG   rH   rI   r	   rJ   ru   rk   r	  ri  r3   r   r  r  s   @rP   r_  r_    s        AC I I Iz I3 I I I I I I(, ,%, , , , , <@-1 < < 2D8	
 |d* 
       rO   r_  c            
           e Zd ZdgdgdgdZdddeded	d
f fdZdej        d	ej        fdZ		 	 ddej        dej        de
d
z  dej        d
z  fdZeddd	eej        ej        f         fd            Zeddd	eeeef         eeeef         f         fd            Zed	eeef         fd            Zdej        d	ej        d
z  fdZdeeeej        f                  fdZ xZS )Plamo2ForCausalLMr+  r  r   )r+  r  r   r[   r\   r^   r]   rS   Nc                l   t                                                       |j        j        }|j        }|| _        || _        |j        | _        || _        | j        j        | j        _        t          |t          |d                    | _        | j        j        | _        t          | j        | j        j        | d          | _        | j        j        r)| j                            | j        j                  | _        t'          |j        | j        j                  | _        | j        j        | _        d S )NmodelrD  z.lm_headr\   )rt   ru   rv   rw   scheduler_configrQ   r^   r?   r   r_  r0   rs  rF   r#   r;   lm_headtie_word_embeddingstie_weightsrc  r   logits_processorrd  )r   r^   r]   rQ   rt  r   s        rP   ru   zPlamo2ForCausalLM.__init__0  s%   )3&7&'4 0
  ${? #L,I,I
 
 

 +0%OK#&&&
 
 

 ;* 	M<33DJ4KLLDL /t{5!
 !
 J6 	,,,rO   rf  c                 6    | j                             |          S rj   )rs  ri  rh  s     rP   ri  z!Plamo2ForCausalLM.embed_input_idsR  s    z)))444rO   r4  rj  rk  c                 6    |                      ||||          }|S rj   )rs  )r   rf  r4  rj  rk  r   r   s          rP   r   zPlamo2ForCausalLM.forwardU  s)     

y"6
 
 rO   r	   c                 j    t          j        |j        j        |j        j        |j        j                  S rj   )r   r   rv   rg   rx   r   r   )clsr^   s     rP   !get_mamba_state_dtype_from_configz3Plamo2ForCausalLM.get_mamba_state_dtype_from_configb  s4    
 );$*$6$:
 
 	
rO   c           	          |j         }|j        j        }|j        |j        z  }t          j        ||j        d|j        |j        |j        |j	                  S )a1  Calculate shapes for Mamba's convolutional and state caches.
        Args:
            vllm_config: vLLM config
        Returns:
            Tuple containing:
            - conv_state_shape: Shape for convolutional state cache
            - temporal_state_shape: Shape for state space model cache
        r   r   )
parallel_configrv   rw   rC   r?   r   r  tensor_parallel_sizerA   rB   )r|  r^   r  rw   rE   s        rP   !get_mamba_state_shape_from_configz3Plamo2ForCausalLM.get_mamba_state_shape_from_configm  sf     &5,6	%5	8VV(;/)>/3 .!.
 
 
 	
rO   c                 (    t          j                    S rj   )r   mamba2_state_copy_func)r|  s    rP   get_mamba_state_copy_funcz+Plamo2ForCausalLM.get_mamba_state_copy_func  s    +BDDDrO   r   c                 <    |                      | j        |          }|S rj   )rx  ru  )r   r   logitss      rP   compute_logitsz Plamo2ForCausalLM.compute_logits  s      &&t|]CCrO   weightsc                    t          |                                           }|D ]\  }dk    r| j        j        rd|vsJ t	          fdt
          j        D                       rEddddddd	}|                                D ]\  }}|v r                    ||           d
v sdv sdv sdv rdv r|	                    dd          }|
                    |j        d         | j        j        d          }|                    dd          \  }}|
                    |j        d         d          }|
                    |j        d         d          }t          j        ||gd          }dv r|	                    dd          }dv r|dz  }n'dv r|dz  }ndv r|dz  }ndv r|dz  }n	dv r|dz  }t!          |           r|         }	t#          |	dt$                    }
 |
|	|           d S )Nzlm_head.weightc              3       K   | ]}|v V  	d S rj   rN   ).0substrnames     rP   	<genexpr>z1Plamo2ForCausalLM.load_weights.<locals>.<genexpr>  s;         $     rO   z.Az.B_norm.weightz.C_norm.weightz.dt_norm.weightz.q_norm.weightz.k_norm.weight)z.A_logz.B_norm_weightz.C_norm_weightz.dt_norm_weightz	.q_weightz	.k_weightz.mixer.in_proj.weightzmixer.in_proj.qweightzmixer.in_proj.scaleszmixer.in_proj.qzeroszmixer.in_proj.weightr   rU   rs   rV   r   z.pre_mixer_normg      ?z.post_mixer_normg?z.pre_mlp_normz.post_mlp_normgWfѷ?zmodel.norm.weightrh   )r   named_parametersrQ   rv  anyr,   ROTARY_EMBEDS_UNUSED_WEIGHTSitemsreplacer   r   r6  rC   r   rk   catr-   getattrr&   )r   r  params_dictloaded_weightreplacementsoldnewgate_weighthidden_states_weightparamrh   r  s              @rP   load_weightszPlamo2ForCausalLM.load_weights  s   4002233#* R	0 R	0D- '''DK,K''{::::    /L       "2"2#4-- L )..00 2 2S$;;<<S11D (4//*d22)T11)T11)T11$1$;$;Aq$A$AM !. 5 5!'*DK,G! ! 5B4G4Gr4G4R4R11)11-2Ea2H"MM';'C'C!'*B( ($ !&	;8L*MSU V V V)T11$1$;$;Aq$A$AM !D(($#t++( D(($!T))/$,,$ 'tT22 %E#E?<QRRMM%////eR	0 R	0rO   ro  )rG   rH   rI   packed_modules_mappingr	   rJ   ru   rk   r	  ri  r3   r   classmethodr
  rg   r}  rL   r  r   r  r  r   r  r  r  s   @rP   rq  rq  '  s,         L'(;  BD  
  
  
z  
3  
  
  
  
  
  
  
D5 5%, 5 5 5 5 <@-1 < < 2D8	
 |d*    
!
 
u{EK'	(
 
 
 [
 
!
 
uS#Xc3m 44	5
 
 
 [
4 E%0BDV0V*W E E E [E| 
	   T0HU33D-E$F T0 T0 T0 T0 T0 T0 T0 T0rO   rq  )f__doc__collections.abcr   	itertoolsr   rk   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   vllm.distributedr   r   vllm.distributed.parallel_stater   vllm.forward_contextr   r   vllm.model_executor.custom_opr   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   r   +vllm.model_executor.layers.logits_processorr   )vllm.model_executor.layers.mamba.abstractr   ,vllm.model_executor.layers.mamba.mamba_utilsr   r   r   r   2vllm.model_executor.layers.mamba.ops.causal_conv1dr   r   .vllm.model_executor.layers.mamba.ops.mamba_ssmr   1vllm.model_executor.layers.mamba.ops.ssd_combinedr    'vllm.model_executor.layers.quantizationr!   +vllm.model_executor.layers.rotary_embeddingr"   3vllm.model_executor.layers.vocab_parallel_embeddingr#   r$   -vllm.model_executor.model_loader.weight_utilsr%   r&   r'   %vllm.model_executor.models.interfacesr(   r)   r*   r+    vllm.model_executor.models.utilsr,   r-   r.   r/   r0   vllm.model_executor.utilsr1   vllm.platformsr2   vllm.sequencer3   vllm.utils.torch_utilsr4   vllm.v1.attention.backendr5   &vllm.v1.attention.backends.mamba2_attnr6   r8   rL   ry   rW   registerrZ   r	  rJ   rX   r  Moduler  r   rA  rP  r_  rq  rN   rO   rP   <module>r     s   # " $ $ $ $ $ $              ) ) ) ) ) ) * * * * * * = = = = = = ; ; ; ; ; ; ; ; I I I I I I I I 8 8 8 8 8 8 D D D D D D D D 2 2 2 2 2 2 < < < < < < 8 8 8 8 8 8            H G G G G G ? ? ? ? ? ?                   R Q Q Q Q Q      G F F F F F @ @ @ @ @ @                
                         7 6 6 6 6 6 + + + + + + - - - - - - < < < < < < 7 7 7 7 7 7 J J J J J J    #   *?\ ?c ?d ? ? ? ? '((x x x x xy( x x )(xvB<BLB B 
	B B B B<L  
	      %	   ! ! ! ! !ry ! ! !De e e e e29 e e eP<' <' <' <' <' <' <' <'~' ' ' ' 'EHO ' ' 'D 4 4 4 4 4%(/ 4 4 4n@0 @0 @0 @0 @0	HO]L*h@0 @0 @0 @0 @0rO   