
    .`i                         d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ d dlmZ d dlmZ  G d de          ZdS )    )IteratorN)
VllmConfig)current_platform)AttentionBackend)KVCacheConfig)LoadStoreSpecOffloadingManager)ARCOffloadingManager)
CPUBackend)LRUOffloadingManager)CPULoadStoreSpecGPULoadStoreSpec)OffloadingSpec)CpuGpuOffloadingHandlers)OffloadingHandlerc            
            e Zd Zdedef fdZdefdZdee	e
j        f         dee	ee         f         deeee         ee         ef                  fdZ xZS )	CPUOffloadingSpecvllm_configkv_cache_configc                 $   t                                          ||           | j                            d          }|st	          d          |J d |j        D             }t          |          dk    sJ |                                }|t          |j                  z  |j	        j
        z  }|| j        | j        z  z  }|dk    rt          |          |z  nd| _        d | _        d | _        | j                            dd          | _        d S )Ncpu_bytes_to_usez?cpu_bytes_to_use must be specified in kv_connector_extra_configc                 &    h | ]}|j         j        S  )kv_cache_specpage_size_bytes).0kv_cache_groups     j/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/kv_offload/cpu.py	<setcomp>z-CPUOffloadingSpec.__init__.<locals>.<setcomp>!   s.     
 
 
 (8
 
 
       r   eviction_policylru)super__init__extra_configget	Exceptionkv_cache_groupslenpopkv_cache_tensorsparallel_config
world_sizeoffloaded_block_sizegpu_block_sizeint
num_blocks_manager	_handlersr"   )	selfr   r   r   
page_sizesr   kv_bytes_per_blockkv_bytes_per_offloaded_block	__class__s	           r   r%   zCPUOffloadingSpec.__init__   sO   o666,001CDD 	Q  
 ***
 
"1"A
 
 

 :!####$..**/2334)45 	
 (:%)<<(
$ ,a//  !!%AAA 	 37 ;?$($5$9$9:KU$S$Sr    returnc                 :   | j         s| j        j        }|d uo|j        }t	          | j        | j                  }| j        dk    rt          ||          | _         n:| j        dk    rt          ||          | _         nt          d| j         d          | j         S )N)
block_sizer2   r#   )backendenable_eventsarczUnknown eviction policy: z. Supported policies: lru, arc)r3   r   kv_events_configenable_kv_cache_eventsr   r/   r2   r"   r   r
   
ValueError)r5   r@   r>   r=   s       r   get_managerzCPUOffloadingSpec.get_manager>   s    } 	#/@ ,X1A1X  !4  G #u,, 4#=! ! ! %.. 4#=! ! ! !40D 4 4 4   }r    	kv_cachesattn_backendsc              #   *  K   | j         sJt          j                    st          d          t	          || j        | j        | j        |          | _         | j         J t          t          | j         j
        fV  t          t          | j         j        fV  d S )Nz=CPU Offloading is currently only supported on CUDA-alike GPUs)rE   r0   cpu_block_sizenum_cpu_blocks
gpu_caches)r4   r   is_cuda_aliker(   r   r0   r/   r2   r   r   gpu_to_cpu_handlercpu_to_gpu_handler)r5   rD   rE   s      r   get_handlerszCPUOffloadingSpec.get_handlersX   s      
 ~ 	#133 S   6+#2#8#$  DN ~))) 0$.2SSSSS 0$.2SSSSSSSr    )__name__
__module____qualname__r   r   r%   r	   rC   dictstrtorchTensortyper   r   tupler   r   rM   __classcell__)r9   s   @r   r   r      s        &TJ &T &T &T &T &T &T &TP.    4TU\)*T C&6!778T 
%]+T--@BSST	U	T T T T T T T Tr    r   )collections.abcr   rS   vllm.configr   vllm.platformsr   vllm.v1.attention.backendr   vllm.v1.kv_cache_interfacer   vllm.v1.kv_offload.abstractr   r	   vllm.v1.kv_offload.arc_managerr
   vllm.v1.kv_offload.backends.cpur   vllm.v1.kv_offload.lru_managerr   vllm.v1.kv_offload.mediumsr   r   vllm.v1.kv_offload.specr   !vllm.v1.kv_offload.worker.cpu_gpur    vllm.v1.kv_offload.worker.workerr   r   r   r    r   <module>re      sN   % $ $ $ $ $  " " " " " " + + + + + + 6 6 6 6 6 6 4 4 4 4 4 4 H H H H H H H H ? ? ? ? ? ? 6 6 6 6 6 6 ? ? ? ? ? ? I I I I I I I I 2 2 2 2 2 2 F F F F F F > > > > > >XT XT XT XT XT XT XT XT XT XTr    