
    .`i                     n   d dl mZ d dlmZmZ d dlZd dlmZmZ d dl	m
Z
 d dlmZmZmZ d dlmZmZmZ d dlmZ d	ed
eeef         fdZded	edej        fdZdedej        fdZdedeeej        f         deeef         d
eeej        f         fdZdeej                 deeef         dedeeef         dej        d
eeej        f         fdZdej        ded
eeej        f         fdZdee         de de dej        dej        dej        de deej                 dej        ded
eeef         fd Z!dS )!    )Sequence)AnycastN)
VllmConfigget_layers_from_vllm_config)AttentionLayerBase)AttentionBackendAttentionMetadataBuilderCommonAttentionMetadata)AttentionSpecKVCacheConfigKVCacheSpec)bind_kv_cachevllm_configreturnc                     i }t          t          t                   t                    }t	          | |          }|                                D ]!\  }}|                    |           x}r|||<   "|S N)r   typer   r   r   itemsget_kv_cache_spec)r   kv_cache_spec
layer_typeattn_layers
layer_nameattn_modulespecs          q/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/worker/gpu/attn_utils.pyr   r      sv    ,.Md3i!344J-k:FFK#.#4#4#6#6 - -
K00===4 	-(,M*%    kv_cache_configdevicec                     i }g }d }| j         D ]}|j        }t          t          |                    }t	          t
          t                   t                    }	t          ||	|          }
|
|         	                                }|D ]}|||<    |
                                |j        |||          }|                    |           |                                dk    r,||                                }|                    |           ||fS )N
FLASHINFER)kv_cache_groupslayer_namesnextiterr   r   r   r   r   get_attn_backendget_builder_clsr   appendget_name_get_workspace_bufferset_workspace_buffer)r   r   r    attn_backendsattn_metadata_buildersflashinfer_workspacekv_cache_group_specr$   any_layer_namer   r   attn_backendr   attn_metadata_builders                 r   init_attn_backendr4   "   s6   
 8:M=?04.> Q Q)5d;//00$s)%788
1+z;WW">2CCEE% 	5 	5J(4M*%% > < < > >-	!
 !
 	%%&;<<<  ""l22#+'<'R'R'T'T$$%::;OPPP000r   c                 T   i }| j         D ]7}t          j        |j        t          j        |          }|j        D ]}|||<   8t                      }| j        D ]!}|j        D ]}|	                    |           "|t          |
                                          k    s
J d            |S )N)dtyper    z)Some layers are not correctly initialized)kv_cache_tensorstorchzerossizeint8	shared_bysetr#   r$   addkeys)r   r    kv_cache_raw_tensorskv_cache_tensortensorr   r$   groups           r   _allocate_kv_cacherD   D   s     57*; 6 6_1FSSS)3 	6 	6J/5 ,,	6 %%K 0 ( (+ 	( 	(JOOJ''''	(#27799:::::3 ;::  r   r@   r-   c           
      V   i }| j         D ]}|j        }t          |t                    sJ |j        D ]q}||         }|                                |j        z  dk    sJ |                                |j        z  }||         }	|	                    ||j        |j	        |j
                  	 |	                                t                    t                    k    sJ n@# t          t          f$ r, t          t!          t                                        Y nw xY wt          fdD                       fdt!          t                              D             }
|j        }|                    |          }|                              } |j        |
 ||<   s|S )Nr   c              3   (   K   | ]}|         V  d S r    ).0ikv_cache_shapes     r   	<genexpr>z$_reshape_kv_cache.<locals>.<genexpr>u   s(      "T"T>!#4"T"T"T"T"T"Tr   c                 :    g | ]}                     |          S rG   )index)rH   rI   kv_cache_stride_orders     r   
<listcomp>z%_reshape_kv_cache.<locals>.<listcomp>v   s7        &++A..  r   )r#   r   
isinstancer   r$   numelpage_size_bytesget_kv_cache_shape
block_sizenum_kv_heads	head_sizeget_kv_cache_stride_orderlenAttributeErrorNotImplementedErrortupleranger6   viewpermute)r   r@   r-   	kv_cachesr0   r   r   
raw_tensor
num_blocksr2   	inv_orderr6   rJ   rN   s               @@r   _reshape_kv_cacherc   X   s   
 *,I.>  C  C+9-77777-9 	C 	CJ-j9J##%%(EEJJJJ#))++}/LLJ(4L)<<(*'	 NJ(4(N(N(P(P%011S5H5HHHHHH"$78 J J J(-eC4G4G.H.H(I(I%%%J #"T"T"T"T>S"T"T"TTTN   s#899::  I
 "'E#//J#88J$6J$6	$BIj!!;	C< s   -6C$$:D! D!runner_kv_cachesforward_contextc                 j    t          ||          }t          |||          }t          |||            |S r   )rD   rc   r   )rd   re   r   r-   r    r@   r_   s          r   init_kv_cacherg      s?     .ovFF!/3GWWI)_.>???r   slot_mappingsc                 l    i }t          |j                  D ]\  }}| |         }|j        D ]}|||<   |S r   )	enumerater#   r$   )rh   r   slot_mappings_by_layerrI   kv_cache_groupslot_mappingr   s          r   build_slot_mappings_by_layerrn      s^     79&'FGG > >>$Q'(4 	> 	>J1=":..	>!!r   r.   num_reqs
num_tokensquery_start_loc_gpuquery_start_loc_cpuseq_lensmax_seq_lenblock_tablesc
                 F   t          |                                          }
|d |         }i }|	j        }t          |          D ]\\  }}||         }||         }t	          |||||||
||d
  
        }| |         }|                    d|          }|j        D ]}|||<   ]|S )NT)
query_start_locrr   rs   rt   ro   num_actual_tokensmax_query_lenblock_table_tensorrm   causalr   )common_prefix_lencommon_attn_metadata)intmaxr#   rj   r   buildr$   )r.   ro   rp   rq   rr   rs   rt   ru   rh   r   ry   attn_metadatar#   rI   r   block_tablerm   r}   r3   metadatar   s                        r   build_attn_metadatar      s     +//1122M		"H$&M%5O%o66 1 1="1o$Q'6/ 3#('*% 
  
  
 !7q 9(..!5 / 
 
 (3 	1 	1J(0M*%%	1r   )"collections.abcr   typingr   r   r8   vllm.configr   r   /vllm.model_executor.layers.attention_layer_baser   vllm.v1.attention.backendr	   r
   r   vllm.v1.kv_cache_interfacer   r   r   vllm.v1.worker.utilsr   dictstrr   r    r4   rD   Tensorrc   listrg   rn   r~   r   rG   r   r   <module>r      s   % $ $ $ $ $          ? ? ? ? ? ? ? ? N N N N N N         
         
 / . . . . .: $sK7G2H    1"11 L1 1 1 1D " L       ('"'sEL01' --.' 
#u|
	' ' ' 'T
5<(
#s(^
 #
 --.	

 L
 
#u|

 
 
 
	"<	""	" 
#u|
	" 	" 	" 	") !9:)) ) 	)
 ) l) ) 5<() <) #) 
#s(^) ) ) ) ) )r   