
    .`i1                        d dl Z d dlZd dlmZ d dlmZmZ d dlZd dlm	Z	 d dl
mZ d dlmZmZmZ d dlmZmZmZmZ d dlmZmZ  ed	d
          Ze G d d
                      Z G d dee         e j                  ZdS )    N)	dataclass)ClassVarTypeVar)
VllmConfig)cdiv)AttentionCGSupportAttentionMetadataBuilderCommonAttentionMetadata)PAD_SLOT_IDcompute_causal_conv1d_metadatamamba_get_block_table_tensorsplit_decodes_and_prefills)AttentionSpec	MambaSpecMBaseMambaAttentionMetadata)boundc                   V   e Zd ZU eed<   eed<   eed<   eed<   eed<   ej        dz  ed<   ej        dz  ed<   ej        dz  ed	<   ej        ed
<   ej        dz  ed<   ej        dz  ed<   ej        dz  ed<   ej        ed<   dZedz  ed<   dZ	ej        dz  ed<   dZ
ej        dz  ed<   dS )r   num_prefillsnum_prefill_tokensnum_decodesnum_decode_tokensnum_reqsNhas_initial_states_pquery_start_loc_pnum_computed_tokens_pstate_indices_tensorblock_idx_last_scheduled_token!block_idx_first_scheduled_token_pblock_idx_last_computed_tokenseq_lens	nums_dict	batch_ptrtoken_chunk_offset_ptr)__name__
__module____qualname__int__annotations__torchTensorr"   dictr#   r$        y/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/attention/backends/mamba_attn.pyr   r      s$        MMM  ,----|d**** <$....,&&& %*L4$7777',|d'::::#(<$#6666 l "Itd{!!!%)Iu|d")))26EL4/66666r.   c            	       J    e Zd ZU ee         ed<   dZeed<   ej	        Z
ee         ed<   dZeed<   dedee         d	ed
ej        f fdZdedefdZ	 ddedededefdZdededeej        ej        ej        f         fdZdedefdZdedej        dej        defdZ xZS )!BaseMambaAttentionMetadataBuildermetadata_cls   reorder_batch_threshold_cudagraph_supportTsupports_update_block_tablekv_cache_speclayer_namesvllm_configdevicec                     t                                          ||||           t          |t                    sJ |j        | _        | j        j        j        | _        | j        j	        $t          | j        | j        j	                  | _        | j        j        j        dk    rt          j        | j        t          | j        j        j        | j        j                  ft          j        |          | _        t          j        | j        ft          j        |          | _        t          j        | j        ft          j        |          | _        d S t          j        | j        ft          j        |          | _        d S )Nall)dtyper:   )super__init__
isinstancer   compilation_configr9   scheduler_configmax_num_seqsdecode_cudagraph_max_bsmax_cudagraph_capture_sizemincache_configmamba_cache_moder*   emptyr   model_configmax_model_lenr7   
block_sizeint32r   r   r    )selfr7   r8   r9   r:   	__class__s        r/   r?   z*BaseMambaAttentionMetadataBuilder.__init__D   sx    	[&III-33333"-"@'+'7'H'U$"=I+.,'B, ,D(
 (9UBB(-0(5C*5  k
) 
) 
)D% 38+-/k3 3 3D/
 27-/k2 2 2D... ).-/k) ) )D%%%r.   common_attn_metadatareturnc                 t    |}|j         |j        k    s
J d            d|_        |                     d|          S )z
        This method builds the metadata for full cudagraph capture.
        Currently, only decode is supported for full cudagraphs with Mamba.
        zmMamba only supports decode-only full CUDAGraph capture. Make sure all cudagraph capture sizes <= max_num_seq.r3   r   )r   num_actual_tokensmax_query_lenbuild)rN   rP   ms      r/   build_for_cudagraph_capturez=BaseMambaAttentionMetadataBuilder.build_for_cudagraph_captures   sJ     !zQ0000D 100
 zz!Qr.   Fcommon_prefix_len
fast_buildc                 ,    |                      |          S )z
        Default build implementation for Mamba-like attention backends.
        Subclasses (e.g., Mamba2) can override to add additional metadata.
        )_compute_common_metadata)rN   rX   rP   rY   s       r/   rU   z'BaseMambaAttentionMetadataBuilder.build   s     ,,-ABBBr.   mamba_block_sizec                    |                                 }t          ||          dz
  }t          |dz   |          dz
  }t          |j        |          dz
  }t          j        |d          }t          j        |d          }|||fS )Nr3   r   )rF   )compute_num_computed_tokensr   r!   r*   clamp)rN   rP   r\   num_computed_tokensr    block_idx_first_scheduled_tokenr   s          r/   %_compute_prefix_caching_block_indiceszGBaseMambaAttentionMetadataBuilder._compute_prefix_caching_block_indices   s    
 3NNPP(,-@BR(S(SVW(W% $q(*:;;a? 	(
 %.0@AAAE 	' ).)q)
 )
 )
% */**
 *
 *
&
 *+*
 	
r.   c                    |j         }t          || j                  \  }}}}d}d}d}	d}
d}d}d}d}d\  }}}| j        j        j        dk    rB|                                }	|j        }| j        j	        }| 
                    ||          \  }}}n:t          |j        |j        | j        | j        j        j                  dddf         }|dk    r|	|                                }	|j        | dz
  d         |z
  }|j        | dz
  d         |z
  }|	||z
  |         dk    }t          ||j        j                  \  }}}| j        j        j        dk    r"|	J |	||z
  |         }
|J |||z
  |         }n|| j        k    r| j        j                                        r| j        d|                             |d	           | j        d|         }t.          ||d<   | j        j        j        dk    rf| j        d|                             |d	           | j        d|         }| j        d|                             |d	           | j        d|         }|                     |||||||||||
||j        |||
          S )zD
        Compute metadata common to both Mamba1 and Mamba2.
        )decode_thresholdN)NNNr<   r   r3   )r:   Tnon_blocking)r   r   r   r   r   r   r   r   r   r    r   r   r!   r"   r#   r$   )r   r   r4   r9   rG   rH   r^   block_table_tensorr7   rL   rb   r   r!   query_start_loc_cpuquery_start_locr   r:   rD   rA   cudagraph_modehas_full_cudagraphsr   copy_r   r   r    r2   )rN   rP   r   r   r   r   r   r   r   r`   r   ra   r   r    r   r"   r#   r$   r   r\   query_start_loc_p_cpus                        r/   r[   z:BaseMambaAttentionMetadataBuilder._compute_common_metadata   s    (0 '$t7S   	I\#46H  $ " $ +/',0)(,%)-& 8H4	94(9UBB"6"R"R"T"T $8#J #1<
 ::$&6 	-/.. $@$7$-" ->	$ $
 aad$  !"*&:&V&V&X&X# %8,9J9L9LM#$ "
 %4l]Q5F5H5HI#$ 
 $H|$;h$FG!K !
 /)/?F   9Iy"8 ,=FF*666(;|+h6)% 7BBB4S|+h651 4777'6JJLL 8 %l{l399$4 :    $(#<=O>O=O#P 1< .,=FF3L[LAGG2 H    261T&&&2. 2<K<@FF1 G    150R&&&1-   %1#//!5!5+I.O*G"7)2#9! ! 
 
 	
r.   metadata	blk_tableslot_mappingc                    t          j         |          }t          ||j        | j        | j        j        j                  }| j        j        j        dv r|d d df         }|j        d         }|j        dk    rQ|| j	        k    rF| j
        j                                        r(| j        d |         }|                    |d           |}||_        |S )N)nonealignr   Tre   )copyr   r!   r7   r9   rG   rH   shaper   rD   rA   rj   rk   r   rl   )rN   rn   ro   rp   new_metadatastate_indices_tr   persistent_state_indices_ts           r/   update_block_tablez4BaseMambaAttentionMetadataBuilder.update_block_table1  s     y**6):	
 
 (9=NNN-aaad3O?1% !Q&&D888'6JJLL 9 *.)B9H9)M&&,,_4,PPP8O,;)r.   )F)r%   r&   r'   typer   r)   r4   r(   r   UNIFORM_SINGLE_TOKEN_DECODEr5   r   r6   boolr   liststrr   r*   r:   r?   r
   rW   rU   tupler+   rb   r[   ry   __classcell__)rO   s   @r/   r1   r1   <   s        q'#$S$$$6 !34    )-,,,-$- #Y-  	-
 - - - - - -^ $; 	
       , !	
C 
C
C 6
C 	
C
 

C 
C 
C 
C
5
 
 
u|U\5<7	8	
 
 
 
>
5
 

 
 
 
B < l	
 
       r.   r1   )abcrt   dataclassesr   typingr   r   r*   vllm.configr   vllm.utils.math_utilsr   vllm.v1.attention.backendr   r	   r
    vllm.v1.attention.backends.utilsr   r   r   r   vllm.v1.kv_cache_interfacer   r   r   r   ABCr1   r-   r.   r/   <module>r      s   


  ! ! ! ! ! ! $ $ $ $ $ $ $ $  " " " " " " & & & & & &         
            @ ? ? ? ? ? ? ?GC3444 7 7 7 7 7 7 7 7<S S S S S(@(CSW S S S S Sr.   