
    .`iz=              
          d Z ddlZddlmZ ddlmZmZ ddlZddlm	Z
 ddlmZ ddlmZ ddlmZmZmZmZmZmZ dd	lmZ dd
lmZ ddlmZ  ee          Z G d de          Ze G d d                      Z G d dee                   Z de!e"e#df                  de!e#         fdZ$de!e"e#df                  de!e#         dej%        dz  dej&        dz  dej'        f
dZ( G d de          Z)dS )z#Attention layer with TreeAttention.    N)	dataclass)ClassVarOptional)_custom_ops)
VllmConfig)init_logger)AttentionBackendAttentionImplAttentionMetadataBuilderAttentionTypeCommonAttentionMetadata
MultipleOf)split_decodes_and_prefills)unified_attention)AttentionSpecc                      e Zd ZU dZeed<   ej        ej        gZ	e
eej                          ed<   edeeez           fd            Zedee         fd            Zedefd            Zeded         fd	            Ze	 ddedededededeedf         fd            Zeded         fd            Zedefd            ZdS )TreeAttentionBackendTaccept_output_buffersupported_dtypesreturnc                  "    t          d          gS )N   )r        x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/attention/backends/tree_attn.py get_supported_kernel_block_sizesz5TreeAttentionBackend.get_supported_kernel_block_sizes#   s    2r   c                 
    g dS )N)    @   `                  r   )clss    r   get_supported_head_sizesz-TreeAttentionBackend.get_supported_head_sizes'   s    4444r   c                      dS )N	TREE_ATTNr   r   r   r   get_namezTreeAttentionBackend.get_name+   s    {r   TreeAttentionImplc                      t           S N)r+   r   r   r   get_impl_clsz!TreeAttentionBackend.get_impl_cls/   s      r   auto
num_blocks
block_sizenum_kv_heads	head_sizecache_dtype_str.c                 @    |dz  dk    rt          d          d| |||fS )Nr   r   z$Block size must be a multiple of 16.   )
ValueError)r0   r1   r2   r3   r4   s        r   get_kv_cache_shapez'TreeAttentionBackend.get_kv_cache_shape3   s3     ?aCDDD:z<CCr   TreeAttentionMetadataBuilderc                      t           S r-   )r9   r   r   r   get_builder_clsz$TreeAttentionBackend.get_builder_cls?   s    ++r   c                      dS )NFr   )argskwargss     r   use_cascade_attentionz*TreeAttentionBackend.use_cascade_attentionC   s    ur   N)r/   )__name__
__module____qualname__r   bool__annotations__torchfloat16bfloat16r   r   listdtypestaticmethodintr   r   classmethodr'   strr*   typer.   tupler8   r;   r?   r   r   r   r   r      s        !%$%%%5:]EN4ShtEK01SSS d33C.D       \  5c 5 5 5 [5 c    \ !$23 ! ! ! \!   &	D 	D	D	D 	D 		D
 	D 
sCx	D 	D 	D \	D ,T"@A , , , \, $    \  r   r   c                   X   e Zd ZU eed<   eed<   ej        ed<   eed<   ej        ed<   ej        ed<   ej        ed<   dZeed	<   dZeed
<   dZ	eed<   dZ
eed<   dZej        dz  ed<   dZed          ed<   dZed          ed<   eded          fd            Zeded          fd            ZdS )TreeAttentionMetadatanum_actual_tokensmax_query_lenquery_start_locmax_seq_lenseq_lensblock_tableslot_mappingr   num_prefill_tokensnum_decode_tokensnum_prefillsnum_decodesNtree_attn_bias_cached_prefill_metadata_cached_decode_metadatar   c                    | j         dk    rd S | j        | j        S | j        | j        d          }t	          j        |          }| j        | j        d          }t          | j        t          |
                                                                          ||d         z
  t          |
                                                                          || j        | j        d          | j        | j        d                    | _        | j        S )Nr   )rR   rS   rT   rU   rV   rW   rX   )r[   r^   rT   r\   rE   diffrV   rQ   rY   rK   maxitemrW   rX   rZ   selfq_start_loc	q_seqlens
kv_seqlenss       r   prefill_metadataz&TreeAttentionMetadata.prefill_metadata]   s   !!4(4 00*4+;+=+=>J{++	]4#3#5#56
(="5immoo224455'+a.8JNN,,113344()9););<*4+A+C+CD)
 )
 )
% ,,r   c                    | j         dk    rd S | j        | j        S | j        d | j        dz            }t	          j        |          }| j        d | j                 }t          | j         t          |	                                
                                          |t          |	                                
                                          || j        d | j                 | j        d | j                  | j                  | _        | j        S )Nr      )rR   rS   rT   rU   rV   rW   rX   r]   )rZ   r_   rT   r\   rE   ra   rV   rQ   rK   rb   rc   rW   rX   r]   rd   s       r   decode_metadataz%TreeAttentionMetadata.decode_metadatav   s   !Q&&4'3 //*+AT-=-A+ABJ{++	]#5T%5#56
'<"4immoo224455'JNN,,113344();4+;);<*+CT-C+CD.	(
 	(
 	(
$ ++r   )r@   rA   rB   rK   rD   rE   TensorrY   rZ   r[   r\   r]   r^   r   r_   propertyri   rl   r   r   r   rQ   rQ   H   sU        \!!!l,sL#K*.NEL4'... CGh'>?FFFAEX&=>EEE-(+B"C - - - X-0 ,*A!B , , , X, , ,r   rQ   c            	       v     e Zd Zdedee         dedej        f fdZ		 dde
ded	ed
efdZdede
d
efdZ xZS )r9   kv_cache_speclayer_namesvllm_configdevicec                 R   t                                          ||||           |j        | _        |j        }d }|x}r|j        }|t          j        |          ndg}t          |          }	t          ||	t          j
        |          | _        | j        j        d         | _        d S )N)r   )rI   rs   r   )super__init__r1   speculative_configspeculative_token_treeastliteral_eval_get_depth_counts_prepare_tree_attn_biasrE   float32r]   shapereorder_batch_threshold)re   rp   rq   rr   rs   spec_configspec_token_treespectree_choicesdepth_counts	__class__s             r   rv   z%TreeAttentionMetadataBuilder.__init__   s     	[&III'2!4&*4 	:"9O1@1LC_---SWRX 	 )665-	
 
 
 (,':'@'C$$$r   Fcommon_prefix_lencommon_attn_metadata
fast_buildr   c                     | j         j        d         }t          ||          \  }}}}|j        }	|j        }
|j        }|j        }|j        }|j        }|j	        }t          |	||||||
||||| j                   S )Nr   )decode_threshold)rR   rY   rZ   r[   r\   rS   rT   rU   rV   rW   rX   r]   )r]   r~   r   rR   rT   rS   rV   rU   block_table_tensorrX   rQ   )re   r   r   r   r   r\   r[   rZ   rY   rR   rf   rS   rh   rU   rW   rX   s                   r   buildz"TreeAttentionMetadataBuilder.build   s      .4Q7&$7G   	I\#46H 1B*:,:)2
*6*=+8$/1/%#''##%.
 
 
 	
r   draft_indexc                     | j         }|dk    rt          j        d          | _         n6dd|j        z   }}| j         ||||f                                         | _         |                     d|d          }|| _         |S )Nr   rk   T)r   )r]   rE   emptyrS   
contiguousr   )re   r   r   orig_tree_attn_biasstartendattn_metadatas          r   build_for_draftingz/TreeAttentionMetadataBuilder.build_for_drafting   s     #1!"'+a..D A 4 BB3E"&"5eCis6J"K"V"V"X"XD 

1&:t
LL 2r   )F)r@   rA   rB   r   rH   rM   r   rE   rs   rv   rK   r   rC   rQ   r   r   __classcell__)r   s   @r   r9   r9      s        D$D #YD  	D
 D D D D D DB !	"
 "
"
 6"
 	"

 
"
 "
 "
 "
H5  
	       r   r9   sorted_tree_choices.r   c                     g }d}| D ]A}t          |          }||k    r|                    d           ||dz
  xx         dz  cc<   |}B|S )Nr   rk   )lenappend)r   r   
prev_depthpathdepths        r   r{   r{      sr    LJ#  D		J"""UQY1$

r   r   rI   rs   c                 P   t          |           dz   }t          j        ||ft          j         ||          }d}t	          |          D ]	}||||f<   
||d d df<   d}t	          t          |                    D ]}t	          ||                   D ]}	| ||	z            }
t          |
          dk    r!g }t	          t          |
          dz
            D ]8}|                    |                     |
d |dz                      dz              9|||	|z   dz   |f<   |||         z  }|S )Nrk   )rs   rI   r   )r   rE   fullinfranger   index)r   r   rI   rs   tree_lentree_attn_maskmask_valir   jcur_tree_choiceancestor_idxcs                r   r|   r|      s    &''!+HZ	8uyju  N H8__ ( ('q!t $N111a4 E3|$$%% ! !|A'' 
	C 
	CA1%!)<O?##q((L3//!344  ##'--ogAg.FGG!K    ;CN1u9q=,677a r   c                   &   e Zd Zdej        dfdededededee         dz  dedz  ded	edz  d
ededz  ddfdZ		 	 	 dde
j        j        de
j        de
j        de
j        de
j        dede
j        dz  de
j        dz  de
j        dz  de
j        fdZdS )r+   N	num_headsr3   scaler2   alibi_slopessliding_windowkv_cache_dtypelogits_soft_cap	attn_typekv_sharing_target_layer_namer   c                 n   || _         || _        t          |          | _        || _        | j         | j        z  | _        || _        |
| _        | t          j	        |t          j
                  }|| _        |d}|| _        |d| _        n|dz
  df| _        |	t          j        k    rt!          d          d S )N)rI   r   )r   rk   zeEncoder self-attention and encoder/decoder cross-attention are not implemented for TreeAttentionImpl.)r   r3   floatr   r2   num_queries_per_kvr   r   rE   tensorr}   r   r   r   r   DECODERNotImplementedError)re   r   r3   r   r2   r   r   r   r   r   r   s              r   rv   zTreeAttentionImpl.__init__!  s     #"5\\
("&.D4E"E,,H)# <EMJJJL("O.!"*D#1A#5q"9D---%%   .-r   layerquerykeyvaluekv_cacher   outputoutput_scaleoutput_block_scalec
           
      J   |
J d            ||	t          d          ||                    d          S |                    d          \  }
}| j        /t	          j        |||
||j        | j        |j        |j	                   |j
        }|j        }|j        j        d         dz
  |j        d         f}|j        x}rt          di d|||         d|
d|d	|||         d
|j        d|j        d|j        d|j        d| j        ddd| j        d| j        d|j        d| j        ddd|j                            |          d|j	                            |           |j        x}rt          di d|d|         d|
d|d	|d|         d
|j        d|j        d|j        d|j        d| j        ddd| j        d|j        d| j        d|j        d| j        ddd|j                            |          d|j	                            |           |S )a  Forward pass with TreeAttention.

        Args:
            query: shape = [num_tokens, num_heads, head_size]
            key: shape = [num_tokens, num_kv_heads, head_size]
            value: shape = [num_tokens, num_kv_heads, head_size]
            kv_cache: shape =
                [2, num_blocks, block_size, num_kv_heads, head_size]
            attn_metadata: Metadata for attention.
        Returns:
            shape = [num_tokens, num_heads * head_size]
        NzOutput tensor must be provided.zDfused output quantization is not yet supported for TreeAttentionImplr   rk   qkvoutcu_seqlens_qmax_seqlen_q	seqused_kmax_seqlen_ksoftmax_scalecausalTr   window_sizerW   softcap	q_descale	k_descale	v_descaleqq_biasr   )r   fill_unbindr   opsreshape_and_cache_flashrX   r   _k_scale_v_scalerR   rZ   rT   r~   ri   r   rS   rV   rU   r   r   r   rW   r   expandrl   r]   )re   r   r   r   r   r   r   r   r   r   	key_cachevalue_cacherR   rZ   descale_shapeprefill_metadecode_metas                    r   forwardzTreeAttentionImpl.forwardI  s   0 !!#D!!!#'9'E%V    <<??" "*!3!3	;,4 '*#	 	 	 *;);&6<Q?!CSYq\R(99< 	   )*;;<<) + ,->>??	
 *99 *77 '// *55 #jj t ".. !// )44 ,, $   .//>>>!"  .//>>># ( (77; 	   ***++) + ---..	
 )88 )66 &.. )44 #jj t ".. $22 !// (33 ,,  $!"  .//>>>#$  .//>>>% ( r   )NNN)r@   rA   rB   r   r   rK   r   rH   rM   rv   rE   nnModulerm   rQ   r   r   r   r   r+   r+      sw        )-#0#837& && & 	&
 & 5kD(& d
& & & !& '*Dj& 
& & & &` '+,026e exe |e \	e
 |e ,e -e t#e lT)e "L4/e 
e e e e e er   r+   )*__doc__ry   dataclassesr   typingr   r   rE   vllmr   r   vllm.configr   vllm.loggerr   vllm.v1.attention.backendr	   r
   r   r   r   r    vllm.v1.attention.backends.utilsr   .vllm.v1.attention.ops.triton_unified_attentionr   vllm.v1.kv_cache_interfacer   r@   loggerr   rQ   r9   rH   rO   rK   r{   rI   rs   rm   r|   r+   r   r   r   <module>r      s   * ) 



 ! ! ! ! ! ! % % % % % % % %  # # # # # # " " " " " " # # # # # #                     M L L L L L 4 4 4 4 4 4	X		& & & & &+ & & &R E, E, E, E, E, E, E, E,PX X X X X#;<Q#R X X Xv
4c3h+@ 
T#Y 
 
 
 
$eCHo.$s)$ ;$ L4	$
 \$ $ $ $NN N N N N N N N N Nr   