
    .`i<                     0   d dl Z d dlmZ d dl mZ d dlmZmZmZ d dlm	Z	m
Z
mZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ ddlmZmZmZmZ ddl m!Z!m"Z"m#Z# ddl$m%Z% ddl&m'Z'm(Z( ddl)m*Z*m+Z+ ddl,m-Z-  ee.          Z/de j0        de j0        de j0        de j0        de j0        de j0        de1ddfdZ2de j0        de j0        de j0        de j0        de j0        de j0        de1ddfdZ3 ede2dge3            G d  d!ej4        e%          Z5dS )"    N)	rearrange)nn)CacheConfigModelConfigget_current_vllm_config)divideget_tensor_model_parallel_rank$get_tensor_model_parallel_world_size)ForwardContextget_forward_context)init_logger)sharded_weight_loader)set_weight_attrs)direct_register_custom_op)AttentionMetadata)GDNAttentionMetadata   )FusedRMSNormGated	chunk_kdafused_kda_gatefused_recurrent_kda)ColumnParallelLinearReplicatedLinearRowParallelLinear)	MambaBase)MambaStateDtypeCalculatorMambaStateShapeCalculator)causal_conv1d_fncausal_conv1d_update)QuantizationConfigq_proj_statesk_proj_statesv_proj_statesg1betacore_attn_out
layer_namereturnc                 r    t                      }|j        |         }|                    | |||||           d S )N)r!   r"   r#   r$   r%   r&   )r   no_compile_layers_forward)	r!   r"   r#   r$   r%   r&   r'   forward_contextselfs	            r/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/layers/kda.pykda_attentionr/   )   sS     ':&;&;O,Z8DMM####          c                     d S )N )r!   r"   r#   r$   r%   r&   r'   s          r.   kda_attention_faker3   >   s	     Fr0   r/   )op_nameop_funcmutates_args	fake_implc                       e Zd Zedefd            Zdeej        ej        ej        ej        f         fdZ	deee
df         ee
df         ee
df         ee
df         f         fdZ	 	 	 	 	 dd	e
d
e
dedz  dedz  dedz  dededdf fdZdej        dej        dej        ddfdZdej        dej        dej        dej        dej        dej        ddfdZ xZS )KimiDeltaAttentionr(   c                     dS )Ngdn_attentionr2   r-   s    r.   
mamba_typezKimiDeltaAttention.mamba_typeS   s    r0   c                     | j         | j        t          d          t          j        | j         j        | j        j                  S )Nz)model_config and cache_config must be set)model_configcache_config
ValueErrorr   kda_state_dtypedtypemamba_cache_dtyper<   s    r.   get_state_dtypez"KimiDeltaAttention.get_state_dtypeW   sJ     $(9(AHIII(8#T%6%H
 
 	
r0   .c                 Z    t          j        | j        | j        | j        | j                  S )N)conv_kernel_size)r   kda_state_shapetp_size	num_headshead_dim	conv_sizer<   s    r.   get_state_shapez"KimiDeltaAttention.get_state_shape`   s/     )8L$.$-$.
 
 
 	
r0   Nh㈵> 	layer_idxhidden_sizequant_configr@   r?   rms_norm_epsprefixc           	         t                                                       t                      | _        t	                      | _        || _        || _        || _        |t          d          |j
        }	|	d         | _        |	d         | _        || _        || _        | j        | j        z  dk    sJ t          | j        | j                  | _        | j        | j        z  }
|	d         | _        t%          | j        |
d|| d          | _        t%          | j        |
d|| d	          | _        t%          | j        |
d|| d
          | _        t-          | j        | j        d|| d          | _        t%          | j        |
d|| d          | _        t3          j        t7          j        t          |
| j                  t6          j                            | _        t?          | j        dtA          d          i           t%          | j        | j        d|| d          | _!        t%          | j        |
dt6          j        | d          | _"        t%          | j        |
dt6          j        | d          | _#        t%          | j        |
dt6          j        | d          | _$        | j"        j%        j&        '                    d          | j"        j%        _&        | j#        j%        j&        '                    d          | j#        j%        _&        | j$        j%        j&        '                    d          | j$        j%        _&        t3          j        t7          j        dd| j        dt6          j                            | _(        t?          | j(        dtA          d          i           t-          | j        | j        d|| d          | _)        t%          | j        |
d|| d          | _*        tW          | j        |d          | _,        t[          |
| j        d|| d          | _.        t_                      j0        }||j1        v rt          d|           | |j1        |<   d S )Nzmodel_config must be providedrK   rJ   r   short_conv_kernel_sizeFz.q_proj)biasrR   rT   z.k_projz.v_projz	.f_a_projz	.f_b_proj)rC   weight_loaderz.b_projz	.q_conv1d)
input_sizeoutput_sizerW   params_dtyperT   z	.k_conv1dz	.v_conv1dr      z	.g_a_projz	.g_b_projsigmoid)eps
activationz.o_projzDuplicate layer name: )2super__init__r
   rI   r	   tp_rankrQ   r?   r@   rA   linear_attn_configrK   rJ   rP   rT   r   local_num_headsrL   r   q_projk_projv_projr   f_a_projf_b_projr   	Parametertorchemptyfloat32dt_biasr   r   b_projq_conv1dk_conv1dv_conv1dweightdata	unsqueezeA_logg_a_projg_b_projr   o_normr   o_projr   compilation_configstatic_forward_context)r-   rP   rQ   rR   r@   r?   rS   rT   kwargs
kda_configprojection_sizer{   	__class__s               r.   ra   zKimiDeltaAttention.__init__g   s    	;==577&((<===!4
":.#K0"~,1111%dndlCC-$.8#$<=*%%%%
 
 
 +%%%%
 
 
 +%%%%
 
 
 )M%'''
 
 
 -M%'''
 
 
 |K==U]SSS
 
 	9Nq9Q9Q'RSSS*N%%%%
 
 
 -~''''
 
 
 -~''''
 
 
 -~''''
 
 
 %)M$8$=$G$G$J$J!$(M$8$=$G$G$J$J!$(M$8$=$G$G$J$J!\K1d2AU]KKK
 

 	o7LQ7O7O%PQQQ(M%'''
 
 
 -M%'''
 
 
 (M|	
 
 
 (%%%%
 
 
 566I'>>>>f>>???<@1&999r0   hidden_states	positionsoutputc           	      .   |                     d          }|                     |          d         }|                     |          d         }|                     |          d         }|                     |          d                                                                         }|                     |                     |          d                   d         }	t          |	| j
        | j        | j                  }	|                    d          }|	                    d          }	|                     |                     |          d                   d         }
t!          |
d| j                  }t#          j        d|| j        | j        f|j        |j                  }t"          j        j                            ||||	||| j                   |                     ||          }t!          |d          }|                     |          d         |d d <   d S )Nr   )g_biasz... (h d) -> ... h ddr   )rC   devicez1 n h d -> n (h d))sizere   rf   rg   ro   floatr]   ri   rh   r   rv   rK   rn   ru   rx   rw   r   rk   zerosrd   rC   r   opsvllmr/   rT   ry   rz   )r-   r   r   r   
num_tokensqkvr%   r$   g_proj_statesg2r&   s                r.   forwardzKimiDeltaAttention.forward   s    #''**
KK&&q)KK&&q)KK&&q){{=))!,2244<<>>]]4==77:;;A>B
DM$,OOO~~a  \\!__dmmM&B&B1&EFFqI}&<NNN
D0$-@% '
 
 

 		$$K	
 	
 	
 M266!-1EFFKK..q1qqq			r0   r!   r"   r#   r$   r%   r&   c                 4    t                      }|j        }|d S t          |t                    sJ | j                 }t          |t
                    sJ |j        }	|j        }
|j        }|j	        } j
        |j                 }|d |         }|d |         }|d |         }|d |         }|d |         }|\  }}}}|                    dd          }|                    dd          }|                    dd          } j        j                             j        j                            d           j        j                            d                    } j        j                             j        j                            d           j        j                            d                    } j        j                             j        j                            d           j        j                            d                    }|j        dk    r|                    dd          }|                    dd          }|                    dd          }t)          || j        j        d||	||
|	  	                            dd          }t)          || j        j        d||	||
|	  	                            dd          }t)          || j        j        d||	||
|	  	                            dd          }no|d |j	                 }t-          ||| j        j        d|d	          }t-          ||| j        j        d|d	          }t-          ||| j        j        d|d	          }t/           fd
|||f          \  }}}|j        dk    rI||	          }d||<   ||                                         }t3          ||||||dd|
	  	        \  }}|||<   n+t5          ||||||d|
d |j        dz            |	  	        \  }}|dd |f         |dd |f<   d S )Nr   r\   r   silu)r_   conv_stateshas_initial_statecache_indicesquery_start_locmetadataT)r_   conv_state_indicesvalidate_datac                 2    t          | dj                  S )Nzn (h d) -> 1 n h dr   )r   rK   )xr-   s    r.   <lambda>z-KimiDeltaAttention._forward.<locals>.<lambda>  s    i#74=III r0   )	r   r   r   gr%   initial_stateoutput_final_stateuse_qk_l2norm_in_kernel
cu_seqlens)	r   r   r   r   r%   r   r   r   ssm_state_indices)r   attn_metadata
isinstancedictrT   r   r   non_spec_query_start_locnon_spec_state_indices_tensornum_actual_tokenskv_cachevirtual_engine	transposerp   rs   viewr   rq   rr   num_prefillsr   rW   r   map
contiguousr   r   num_decodes)r-   r!   r"   r#   r$   r%   r&   r,   r   r   r   r   r   constant_cachesconv_state_qconv_state_kconv_state_vrecurrent_stateq_conv_weightsk_conv_weightsv_conv_weightsr   r   r   decode_conv_indiceszero_idxr   core_attn_out_non_speclast_recurrent_states   `                            r.   r+   zKimiDeltaAttention._forward  s    .//+:+H F-.....%dk2-)=>>>>>);#0#I (5(S%);-(FG%&8'8&89%&8'8&89%&8'8&89"""#&&&'FUC|\?#--b"55#--b"55#--b"55-22M %%a(($-*>*C*CA*F*F
 
 -22M %%a(($-*>*C*CA*F*F
 
 -22M %%a(($-*>*C*CA*F*F
 
 %)))33Aq99M)33Aq99M)33Aq99M "!("3; 8&
 
 
 i1oo  !"!("3; 8&
 
 
 i1oo  !"!("3; 8&
 
 
 i1oo A #@1-11# %"!#6"  A %"!#6"  A %"!#6"  A IIIIAqRS9
 
1a %))46G5GHH()OH%+,IJUUWWM +#'(,3
 
 
&$ >RO9::
 $-(,34Sm6ORS6S4ST"?
 
 
&$ 0F!!!!0
a++++,,,r0   )NNNrN   rO   )__name__
__module____qualname__propertystrr=   tuplerk   rC   rE   intrM   r    r   r   r   ra   Tensorr   r+   __classcell__)r   s   @r.   r9   r9   R   s$       C    X
	u{EKekA	B
 
 
 

	uS#Xc3hsCx%S/Q	R
 
 
 
 37+/+/"OA OAOA OA )4/	OA
 "D(OA "D(OA OA OA 
OA OA OA OA OA OAb$2|$2 <$2 	$2
 
$2 $2 $2 $2L\
|\
 |\
 |	\

 L\
 l\
 |\
 
\
 \
 \
 \
 \
 \
 \
 \
r0   r9   )6rk   einopsr   r   vllm.configr   r   r   vllm.distributedr   r	   r
   vllm.forward_contextr   r   vllm.loggerr   -vllm.model_executor.model_loader.weight_utilsr   vllm.model_executor.utilsr   vllm.utils.torch_utilsr   vllm.v1.attention.backendr   #vllm.v1.attention.backends.gdn_attnr   fla.ops.kdar   r   r   r   linearr   r   r   mamba.abstractr   mamba.mamba_utilsr   r   mamba.ops.causal_conv1dr   r   quantization.base_configr    r   loggerr   r   r/   r3   Moduler9   r2   r0   r.   <module>r      s#                I I I I I I I I I I         
 E D D D D D D D # # # # # # O O O O O O 6 6 6 6 6 6 < < < < < < 7 7 7 7 7 7 D D D D D D                    
 & % % % % % S S S S S S S S K K K K K K K K 8 8 8 8 8 8	X		<< < 		
 , <  
   *	<	<	 <	 			
 ,	 <	 	 
	 	 	 	  !" 	   h
 h
 h
 h
 h
I h
 h
 h
 h
 h
r0   