
    .`i2              
          d dl mZ d dlmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZmZmZmZmZ d d	lmZmZ  ee          Z G d
 de          Ze G d d                      Ze G d d                      Ze G d d                      Ze G d d                      Zdej        dej        dej        de ej        ej        f         fdZ!defdZ" G d de          Z#dS )    )	dataclass)ClassVarN)
VllmConfig)init_logger)current_platform)get_paged_mqa_logits_metadatais_deep_gemm_supported)AttentionBackendAttentionCGSupportAttentionMetadataBuilderCommonAttentionMetadata
MultipleOf)split_decodes_and_prefillssplit_prefill_chunksc                       e Zd Zedefd            Zedeeez           fd            Z	e
dee         fd            Zeded         fd            Ze	 dded	ed
edededeedf         fd            Ze	 ddedeedf         fd            ZdS )DeepseekV32IndexerBackendreturnc                      dS )NDEEPSEEK_V32_INDEXER r       z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/attention/backends/mla/indexer.pyget_namez"DeepseekV32IndexerBackend.get_name   s    %%r   c                  2    t          j                    rdndgS )N   @   )r   is_rocmr   r   r    get_supported_kernel_block_sizesz:DeepseekV32IndexerBackend.get_supported_kernel_block_sizes    s    %-//7R88r   c                 
    g dS )N)    r      r   )clss    r   get_supported_head_sizesz2DeepseekV32IndexerBackend.get_supported_head_sizes$   s    }}r   !DeepseekV32IndexerMetadataBuilderc                      t           S )N)r$   r   r   r   get_builder_clsz)DeepseekV32IndexerBackend.get_builder_cls(   s    00r   auto
num_blocks
block_sizenum_kv_heads	head_sizecache_dtype_str.c                     |dk    sJ | ||fS )Nr   r   )r(   r)   r*   r+   r,   s        r   get_kv_cache_shapez,DeepseekV32IndexerBackend.get_kv_cache_shape,   s"     q    J	22r   Finclude_num_layers_dimensionc                     | rdS dS )N)r   r         )r   r   r1   r   )r/   s    r   get_kv_cache_stride_orderz3DeepseekV32IndexerBackend.get_kv_cache_stride_order7   s     ( 	 <yr   N)r'   F)__name__
__module____qualname__staticmethodstrr   listintr   r   classmethodr#   typer&   tupler.   boolr3   r   r   r   r   r      sl       &c & & & \& 9d33C.D 9 9 9 \9 c    [ 1T"EF 1 1 1 \1   &3 333 3 	3
 3 
sCx3 3 3 \3 -2 &*	sCx   \  r   r   c                       e Zd ZU ej        ed<   ej        ed<   ej        ed<   ej        ed<   ej        ed<   eed<   eed<   eed<   eed	<   d
S )&DeepseekV32IndexerPrefillChunkMetadatablock_tablecu_seqlen_kscu_seqlen_kecu_seq_lenstoken_to_seqtotal_seq_lenstoken_start	token_endnum_reqsN)r5   r6   r7   torchTensor__annotations__r;   r   r   r   rA   rA   @   s         ,,,NNNMMMMMr   rA   c                   &    e Zd ZU ee         ed<   dS )!DeepseekV32IndexerPrefillMetadatachunksN)r5   r6   r7   r:   rA   rM   r   r   r   rO   rO   M   s$         7888888r   rO   c                   j    e Zd ZU ej        ed<   ej        ed<   ej        ed<   eed<   ej        ed<   dS ) DeepSeekV32IndexerDecodeMetadatarB   seq_lensdecode_lensrequires_paddingschedule_metadataN)r5   r6   r7   rK   rL   rM   r?   r   r   r   rR   rR   R   sV         l|#####r   rR   c                       e Zd ZU ej        ed<   eed<   eed<   eed<   eed<   ej        ed<   ej        ed<   eed<   eed	<   eed
<   eed<   eed<   dZedz  ed<   dZ	e
dz  ed<   dS )DeepseekV32IndexerMetadatarS   rJ   max_query_lenmax_seq_lennum_actual_tokensquery_start_locslot_mappinghead_dimnum_decodesnum_decode_tokensnum_prefillsnum_prefill_tokensNdecodeprefill)r5   r6   r7   rK   rL   rM   r;   rc   rR   rd   rO   r   r   r   rX   rX   [   s          lMMM\!!!,MMM 6:F,t3:::8<G.5<<<<<r   rX   start_seq_locseq_len_per_batchdevicer   c                    |                      t          j                  }|                     t          j                  }|                                dk    r|                                dk    sJ |                                |                                dz   k    s
J d            |dd         |dd         z
  }t          |d                                                   }|                                }|dk    rBt          j        dt          j        |          t          j        dt          j        |          fS t          j        |d          |z
  }t          j	        t          j
        |          |          }	||	         }
t          j	        ||          }t          j	        ||          }t          j
        |t          j                  t          j	        |dd         |          z
  dz   }||z
  |z   }|
|z   }|
                                                     |          |                                                     |          fS )	a  
    Args:
      start_seq_loc: 1D long tensor [B+1], cumulative counts of
                     selected tokens per batch.
            Example: [0, 2, 4, 7] ->
                     batch sizes (selected) [2, 2, 3], N=7 tokens total.
      seq_len_per_batch: 1D long tensor [B],
                         full sequence length (KV length) of each batch.
                         Example: [5, 9, 4].

    Returns:
      start_tensor: 1D long tensor [N], start offset in the
                    concatenated KV cache for each token's batch.
      end_location: 1D long tensor [N],
                    **exclusive** end = start + token's local position.
                    (So the attended KV slice is kv[start:end].)

    Assumes each batch contributes its full `seq_len_per_batch[i]`
    keys to the KV cache, andthe selected tokens within a batch
    are the **last** `counts[i]` positions of that sequence.
    dtyper   z"start_seq_loc must have length B+1Nr   rj   rg   dim)torK   longrn   numelr;   itememptycumsumrepeat_interleavearange)re   rf   rg   qLcountsNBkv_starts_per_batchbatch_idstart_tensorL_expandm_expand
pos_within	local_posend_locations                   r   kv_spans_from_batchesr   w   s   0 	uz**A5:..A5577a<<AEEGGqLLL(7799		A%%%'K%%% qrrUQssV^FAbEJJLLA			AAvvKF;;;KF;;;
 	
  ,qa00014 &u|A??H 'x0L &q&11H&vv66H 	Qej)))E,CAcrcFF,S,SSVWW  8#j0I)+L  ((,*:*:*<*<*?*?*G*GGGr   vllm_configc                 $    | j         j        }|dz  S )N(   )model_configmax_model_len)r   r   s     r   get_max_prefill_buffer_sizer      s    ,:M 2r   c            	       t     e Zd ZU ej        Zee         ed<   dZe	ed<    fdZ
d Z	 dde	ded	ed
efdZ xZS )r$   _cudagraph_supportr   reorder_batch_thresholdc                 B    t                      j        |i | | j        j        }t	          | j                  | _        | j        j        r| j        j        j        nd| _        | xj        t          | j        d          z  c_        t          j                            | j                  }|j        }|| _        t          j        |j        ft          j        | j                  | _        t          j        | j        dz   dft          j        | j                  | _        d S )Nr   r   rl   r1   )super__init__r   scheduler_configr   max_prefill_buffer_sizespeculative_confignum_speculative_tokensr   minrK   cudaget_device_propertiesrg   multi_processor_countnum_smsrs   max_num_seqsint32decode_lens_bufferscheduler_metadata_buffer)selfargskwargsr   propssm_count	__class__s         r   r   z*DeepseekV32IndexerMetadataBuilder.__init__   s   $)&)))+<'B4CS'T'T$ 2D/FF 	# 	$$D,G(K(KK$$
00==."'+*,EK#
 #
 #

 */\Aq!T[*
 *
 *
&&&r   c                 ,   |||dz            ||         z
  }t          ||||         | j                  \  }}||                                         }	||                                         }
|||                                         }t	          j        d||z
  t          j                  }t	          j        ||||                                       | j                  }|| j	        k    sJ t	          j
        t	          j        dt          j                  |||                             d          g                              t          j                                      | j                  }t          ||||||||         |	|
||z
  	  	        S )Nr   r   ri   rm   )	rC   rD   rE   rF   rG   rB   rH   rI   rJ   )r   rg   rr   sumrK   rv   r   ru   ro   r   catzerosrt   rA   )r   
reqs_startreqs_endquery_start_loc_cpuseq_lens_cpurB   prefill_query_start_locrC   rD   rH   rI   rG   seq_idxrF   rE   s                  r   build_one_prefill_chunkz9DeepseekV32IndexerMetadataBuilder.build_one_prefill_chunk   s     
X\ 9:!*-. 	  &;#\*X2E%F&
 &
"l **5::<<'16688	%j&9:>>@@,q(Z"7u{KKK.\*X"56
 

"T[// 	 !=====IK555 H!45<<<CC  R__R__ 	 6%%#%)#Jx$78#
*

 

 

 
	
r   Fcommon_prefix_lencommon_attn_metadata
fast_buildr   c                     j         }j        }j        t           j                  \  }}}}	||z   |k    sJ ||	z   |k    sJ d }
|dk    rDt          j        |d           j        |          } fd|D             }t          |          }
d }|dk    rt          j
        j        d |dz             j        d |                     j        d |         }t          j
        j        d |dz                      }|                                |                                k                                    }j        d |         }t#                      r*t%          | j        j         j                   j        d d <   t/          j        d |df         j        d |         || j        	          }t3          j        j         j        j        j        j        j        d
||||	|
|          }|S )N)decode_thresholdr   )request_offsetc           	      \    g | ](\  }}                     ||j        j                  )S r   )r   r   block_table_tensor).0r   r   r   r   r   s      r   
<listcomp>z;DeepseekV32IndexerMetadataBuilder.build.<locals>.<listcomp>'  sT     	 	 	 )J ,,'(5(; 	 	 	r   )rP   r   )out.)rB   rS   rT   rU   rV   r!   )rS   rJ   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rd   rc   )rJ   r[   r   r   r   r   r   r   rO   rK   diffr\   r   maxr   rr   rS   r	   r   kv_cache_specr)   r   r   rR   r   rX   rY   rZ   r]   )r   r   r   r   rJ   
num_tokensr_   ra   r`   rb   prefill_metadatachunk_seq_idsrP   decode_metadatarT   decode_lens_cpurU   rS   attn_metadatar   s   ` `                @r   buildz'DeepseekV32IndexerMetadataBuilder.build  s    (0);
2F&$t7S   	I\#46H \)X5555 #55CCCC!0$1+,,?,*  M
	 	 	 	 	 	 -:	 	 	F  A      ??J$45F{Q5FG+L[L9    1,;,?K#j$89J;?9JK O
 !0 3 3 5 58K8K8M8M MSSUU+4\k\BH%'' 4Qd0;T\5 5.qqq1 ?0CL[LRUDUV-6||D'!1"&"@  O 3)2)2.<,82D0@-:#/%1$"
 
 
& r   r4   )r5   r6   r7   r   UNIFORM_SINGLE_TOKEN_DECODEr   r   rM   r   r;   r   r   r   r?   rX   r   __classcell__)r   s   @r   r$   r$      s         6 !34    $%S$$$
 
 
 
 
4&
 &
 &
X !	V VV 6V 	V
 
$V V V V V V V Vr   r$   )$dataclassesr   typingr   rK   vllm.configr   vllm.loggerr   vllm.platformsr   vllm.utils.deep_gemmr   r	   vllm.v1.attention.backendr
   r   r   r   r    vllm.v1.attention.backends.utilsr   r   r5   loggerr   rA   rO   rR   rX   rL   rg   r>   r   r   r$   r   r   r   <module>r      s   " ! ! ! ! !        " " " " " " # # # # # # + + + + + + V V V V V V V V                    
 
X		" " " " " 0 " " "J 	 	 	 	 	 	 	 	 9 9 9 9 9 9 9 9 $ $ $ $ $ $ $ $ = = = = = = = =6=H<=H49L=HJO,=H
5<%&=H =H =H =H@
Z 
 
 
 
_ _ _ _ _(@ _ _ _ _ _r   