
    .`i1	                         d dl mZmZ d dlmZ d dlmZ d dlZd dlm	Z	 d dl
mZ d dlmZmZ d dlmZ erd d	lmZ d d
lmZ  e	e          Z G d de          ZdS )    )ABCabstractmethod)Iterator)TYPE_CHECKINGN)init_logger)AttentionBackend)LoadStoreSpecOffloadingManager)OffloadingHandler)
VllmConfig)KVCacheConfigc                       e Zd ZdZ	 	 	 	 ddZedefd            Zed	ee	e
j        f         d
ee	ee         f         deeee         ee         ef                  fd            ZdS )OffloadingSpecz Spec for an offloading connectorvllm_configr   kv_cache_configKVCacheConfig | Nonec                 4   t                               d           || _        || _        |j        }|J |j        | _        |j        j        | _	        t          | j                            d| j	                            | _        | j        | j	        z  dk    sJ d S )NzsInitializing OffloadingSpec. This API is experimental and subject to change in the future as we iterate the design.
block_sizer   )loggerwarningr   r   kv_transfer_configkv_connector_extra_configextra_configcache_configr   gpu_block_sizeintgetoffloaded_block_size)selfr   r   r   s       k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/kv_offload/spec.py__init__zOffloadingSpec.__init__   s     	H	
 	
 	
 '.(;!---.H)6A$'!!,0CDD%
 %
! (4+>>!CCCCCC    returnc                     dS )z
        Get an OffloadingManager that will be used
        by the scheduler-side offloading connector to track
        offloaded blocks and manage evictions.
        N )r   s    r    get_managerzOffloadingSpec.get_manager-   s	     	r"   	kv_cachesattn_backendsc                     dS )aM  
        Get offloading handlers along with their respective src and dst types.

        Args:
            kv_caches: A dictionary of layer_name -> gpu_kv_cache tensor.
            attn_backends: A dictionary of layer_name -> AttentionBackend.

        Yields:
            Tuples of (src_type, dst_type, offloading_handler).
        Nr%   )r   r'   r(   s      r    get_handlerszOffloadingSpec.get_handlers6   s	      	r"   N)r   r   r   r   )__name__
__module____qualname____doc__r!   r   r
   r&   dictstrtorchTensortyper   r   tupler	   r   r*   r%   r"   r    r   r      s        **D'D:PD D D D* .    ^ U\)* C&6!778 
%]+T--@BSST	U	   ^  r"   r   )abcr   r   collections.abcr   typingr   r1   vllm.loggerr   vllm.v1.attention.backendr   vllm.v1.kv_offload.abstractr	   r
    vllm.v1.kv_offload.worker.workerr   vllm.configr   vllm.v1.kv_cache_interfacer   r+   r   r   r%   r"   r    <module>r>      s	   $ # # # # # # # $ $ $ $ $ $              # # # # # # 6 6 6 6 6 6 H H H H H H H H > > > > > > 9&&&&&&888888	X		1 1 1 1 1S 1 1 1 1 1r"   