
    .`i&X                        U d dl Z d dlZd dlmZmZ d dlmZmZ d dlm	Z
 d dlmZmZmZmZmZ d dlZd dlmZ d dlmZ d dlmZmZmZ d dlmZ d d	lmZmZ d d
lm Z m!Z!m"Z" d dl#m$Z$m%Z% d dl&m'Z' ddl(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z. erd dl/m0Z0m1Z1 ddl2m3Z3  ee4          Z5 G d d          Z6 G d d          Z7e6e7z  e-z  e,z  ee8e.f         z  Z9ee:d<    ede9          Z; G d d          Z< edd          Z= edd          Z> G d d eee=e>f                   Z?e@e,ed!         f         dz  ZAee:d"<   e@e,dz  ed!         f         ZBee:d#<    G d$ d%e?eAeBf                   ZC G d& d'eC          ZD G d( d)eC          ZE G d* d+eC          ZF G d, d-e?e,dz  e,f                   ZG G d. d/eG          ZH G d0 d1eG          ZIdS )2    N)ABCabstractmethod)MappingSequence)Lock)TYPE_CHECKINGGeneric	TypeAliasTypeVarcast)override)MsgpackSerdeSingleWriterShmObjectStorageSingleWriterShmRingBuffer)init_logger)	CacheInfoLRUCache)json_count_leavesjson_map_leavesjson_reduce_leaves)	GiB_bytes	MiB_bytes)
format_gib   )MultiModalBatchedFieldMultiModalFeatureSpecMultiModalFieldElemMultiModalKwargsItemMultiModalKwargsItemsNestedTensors)ModelConfig
VllmConfig)ResolvedPromptUpdatec                   <     e Zd ZdZdeded         ddf fdZ xZS )MultiModalProcessorCacheItemz
    The data to store inside `MultiModalProcessorOnlyCache`.

    Args:
        item: The processed tensor data corresponding to a multi-modal item.
        prompt_updates: The prompt updates corresponding to `item`.
    itemprompt_updatesr#   returnNc                 d    t                                                       || _        || _        d S N)super__init__r&   r'   selfr&   r'   	__class__s      i/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/multimodal/cache.pyr,   z%MultiModalProcessorCacheItem.__init__3   s0    
 		,    __name__
__module____qualname____doc__r   r   r,   __classcell__r/   s   @r0   r%   r%   *   si         -"- !!78- 
	- - - - - - - - - -r1   r%   c                   <     e Zd ZdZdeded         ddf fdZ xZS )$MultiModalProcessorCacheItemMetadataaI  
    The metadata to store inside `MultiModalProcessorSenderCache`.

    Args:
        item: The processed tensor data corresponding to a multi-modal item.
            Since P1 already stores the tensor data, we only store its size
            metadata in P0 to reduce memory usage. The size metadata is still
            needed to keep the same cache eviction policy as P0.
        prompt_updates: The prompt updates corresponding to `item`.
            This needs to stay on P0 because for some models, they are
            dependent on the processed tensor data (cached on P1).
    r&   r'   r#   r(   Nc                     t                                                       t                              |          | _        || _        d S r*   )r+   r,   MultiModalCacheget_item_size	item_sizer'   r-   s      r0   r,   z-MultiModalProcessorCacheItemMetadata.__init__L   s>    
 	(66t<<,r1   r2   r8   s   @r0   r:   r:   >   si         -"- !!78- 
	- - - - - - - - - -r1   r:   MultiModalCacheValue_V)boundc                       e Zd Zededefd            Zedddededefd            Z	ededefd	            Z
eddd
edee         dedeeef         fd            ZdS )r<   leafr(   c                 z   t          |t                    r|                     |j                  S t          |t                    r|j        S t          |t          t          t          f          r| 	                    |j
                  S t          |t          j                  r|j        S t          j        |          S r*   )
isinstancer%   get_leaf_sizer&   r:   r>   r   r   r   r=   datatorchTensornbytessys	getsizeof)clsrC   s     r0   rF   zMultiModalCache.get_leaf_sizec   s    d899 	0$$TY///d@AA 	">! "$8:MN
 
 	0 $$TY/// dEL)) 	;}T"""r1   FdebugvaluerO   c                    t          t          j        t          | j        |                    }|rFt          |          }t                              dt          |          t          |          |           |S )Nz.Calculated size of %s to be %s GiB (%d leaves))
r   operatoraddr   rF   r   loggerrO   typer   )rM   rP   rO   size
leaf_counts        r0   r=   zMultiModalCache.get_item_sizew   su     "L/#*;UCC
 
  	*511JLL@U4  	   r1   c                      t          |          S )a  
        Get the number of leaf elements in a multi-modal cache value.

        This provides a measure of structural complexity that can be useful
        for debugging cache performance and understanding data patterns.

        Args:
            value: The multi-modal cache value to analyze.

        Returns:
            The number of leaf elements in the nested structure.
        )r   )rM   rP   s     r0   get_item_complexityz#MultiModalCache.get_item_complexity   s     !'''r1   capacity_gb
value_typec                @     t          t          |z   fd          S )Nc                 2                         |           S )NrN   )r=   )xrM   rO   s    r0   <lambda>z/MultiModalCache.get_lru_cache.<locals>.<lambda>   s     1 1!5 1 A A r1   )rL   )r   r   )rM   rZ   r[   rO   s   `  `r0   get_lru_cachezMultiModalCache.get_lru_cache   s4     #AAAAA
 
 
 	
r1   N)r3   r4   r5   classmethodobjectintrF   r?   boolr=   rY   floatrU   r@   r   strr`    r1   r0   r<   r<   b   s       # #C # # # [#& 
 	  # 	
 
   [* ((< ( ( ( ( [(  

 

 



 H


 

 
#r'	

 

 

 [

 

 

r1   r<   _IT)contravariant_O)	covariantc                       e Zd ZdZedededefd            Zde	e         de
e         de
e         fdZedd
            Zd	S )BaseMultiModalCacheav  
    Abstract base class to read/write multi-modal items from cache.

    The idea of multi-modal caching is based on having a client and server
    where the client executes in the frontend process (=P0) and
    the server in the core process (=P1). The data flow is as follows:

    ```
                  is_cached() x N    get_and_update()
    P0: From API -----------------> -----------------> To P1

                 get_and_update()
    P1: From P0 -----------------> To model
    ```

    `is_cached()` can be called any number of times in P0. However,
    `get_and_update()` must be called in P0 and P1 one after another
    so that their cache eviction order remains the same.

    This ensures that the keys in P0 and P1 caches are mirrored,
    allowing us to determine whether a key is cached in P1 by looking
    up the P0 cache, without having to communicate with P1.
    mm_itemmm_hashr(   c                     t           )ag  
        Possibly update a multi-modal item based on whether it is
        in the underlying cache.

        This update is done out-of-place and updates the cache eviction order.

        Args:
            mm_item: The multi-modal item to update.
            mm_hash: The hash of `mm_item`.

        Returns:
            The update multi-modal item.
        NotImplementedError)r.   rn   ro   s      r0   get_and_update_itemz'BaseMultiModalCache.get_and_update_item   s
    & "!r1   mm_items	mm_hashesc                      t          |          t          |          k    sJ  fdt          ||          D             S )a  
        Possibly update a sequence of multi-modal items based on whether they
        are in the underlying cache.

        This update is done out-of-place and updates the cache eviction order.

        Args:
            mm_items: The multi-modal items to update.
            mm_hashes: The hash of each item in `mm_items`.

        Returns:
            A new list of updated multi-modal items.
        c                 B    g | ]\  }}                     ||          S rg   )rs   ).0rn   ro   r.   s      r0   
<listcomp>z6BaseMultiModalCache.get_and_update.<locals>.<listcomp>   s=     
 
 
  $$Wg66
 
 
r1   )lenzip)r.   rt   ru   s   `  r0   get_and_updatez"BaseMultiModalCache.get_and_update   sW    $ 8}}I....
 
 
 
$')$<$<
 
 
 	
r1   Nc                     t           )zClear the underlying cache.rq   r.   s    r0   clear_cachezBaseMultiModalCache.clear_cache   s
     "!r1   r(   N)r3   r4   r5   r6   r   rh   rf   rj   rs   r   listr|   r   rg   r1   r0   rm   rm      s         0 "" " 
	" " " ^"(
2,
 9
 
b	
 
 
 
2 " " " ^" " "r1   rm   r#   MultiModalProcessorCacheInItemMultiModalProcessorCacheOutItemc                       e Zd ZdZededefd            Zdee         dee         fdZ	ddZ
ededdfd	            Zed
ddedefd            ZdS )BaseMultiModalProcessorCachez(The required interface for caches on P0.ro   r(   c                     t           )a$  
        Check whether a multi-modal item is
        in the underlying cache.

        This **DOES NOT** update the cache eviction order.

        Args:
            mm_hash: The hash of the item to check.

        Returns:
            `True` if the item is cached, otherwise `False`.
        rq   r.   ro   s     r0   is_cached_itemz+BaseMultiModalProcessorCache.is_cached_item  s
     "!r1   ru   c                        fd|D             S )aD  
        Check whether a sequence of multi-modal items are
        in the underlying cache.

        This **DOES NOT** update the cache eviction order.

        Args:
            mm_hashes: The hash of each item to check.

        Returns:
            For each item, `True` if the item is cached, otherwise `False`.
        c                 :    g | ]}                     |          S rg   )r   )rx   ro   r.   s     r0   ry   z:BaseMultiModalProcessorCache.is_cached.<locals>.<listcomp>(  s'    FFF##G,,FFFr1   rg   )r.   ru   s   ` r0   	is_cachedz&BaseMultiModalProcessorCache.is_cached  s      GFFFIFFFFr1   Nc                     dS )z&Close the underlying cache, if needed.Nrg   r~   s    r0   closez"BaseMultiModalProcessorCache.close*  s    r1   c                     t           )z
        Update the cache eviction order for a multi-modal item.

        This is used to touch the item in the cache without changing
        its value.

        Args:
            mm_hash: The hash of the multi-modal item.
        rq   r   s     r0   touch_sender_cache_itemz4BaseMultiModalProcessorCache.touch_sender_cache_item.  s
     "!r1   Fdeltar   c                    t           )z
        Get (and reset) the multi-modal cache stats.

        Returns:
            The current multi-modal caching stats.
        rq   r.   r   s     r0   
make_statsz'BaseMultiModalProcessorCache.make_stats;  s
     "!r1   r   )r3   r4   r5   r6   r   rf   rd   r   r   r   r   r   r   r   rg   r1   r0   r   r     s         32"c "d " " " ^"G49 Gd G G G G    
"s 
"t 
" 
" 
" ^
" */ " " "4 "I " " " ^" " "r1   r   c                        e Zd ZdZd fdZededefd            Zed	e	dede
fd
            Zededdfd            Zedd            Zedddedefd            Z xZS )MultiModalProcessorOnlyCachea<  
    The cache which is used on P0 when IPC caching is disabled.

    How to update each item:

    - If the item is in the cache, replace the input with the cached item.
    - If the item is not in the cache, store that item (which includes
      tensor data and metadata) into the cache, and return the input.
    model_configr!   r(   Nc                     t                                                       |                                }t                              |j        t                    | _        d S r*   )r+   r,   get_multimodal_configr<   r`   mm_processor_cache_gbr%   _cacher.   r   	mm_configr/   s      r0   r,   z%MultiModalProcessorOnlyCache.__init__Q  sN     6688	%33+(
 
r1   ro   c                     || j         v S r*   r   r   s     r0   r   z+MultiModalProcessorOnlyCache.is_cached_item[      $+%%r1   rn   c                     | j                             |          x}|j        |j        fS |J d|            t	          | | j         |<   |S N#Expected a cached item for mm_hash=)r   getr&   r'   r%   r.   rn   ro   cached_items       r0   rs   z0MultiModalProcessorOnlyCache.get_and_update_item_  sa      ;??7333K@#[%???""$L'$L$L""";WEGr1   c                 :    | j                             |           d S r*   r   touchr   s     r0   r   z4MultiModalProcessorOnlyCache.touch_sender_cache_itemn      '"""""r1   c                 8    | j                                          d S r*   r   clearr~   s    r0   r   z(MultiModalProcessorOnlyCache.clear_cacher      r1   Fr   r   c                8    | j                             |          S Nr   r   statr   s     r0   r   z'MultiModalProcessorOnlyCache.make_statsv      {e,,,r1   r   r!   r(   Nr   r3   r4   r5   r6   r,   r   rf   rd   r   r   r   rs   r   r   r   r   r7   r8   s   @r0   r   r   F  s:        
 
 
 
 
 
 &c &d & & & X& /  
)	   X #s #t # # # X#    X */ - - -4 -I - - - X- - - - -r1   r   c                        e Zd ZdZd fdZededefd            Zed	e	dede
fd
            Zededdfd            Zedd            Zedddedefd            Z xZS )MultiModalProcessorSenderCachea  
    The cache which is used on P0 when IPC caching is enabled.

    How to update each item:

    - If the item is already in the cache, clear the input to avoid
      unnecessary IPC.

    - If the item is not in the cache, store the metadata of that item so
      that the eviction policy remains the same as the cache on P1,
      and return the input.
      By only storing the metadata, we avoid keeping the data itself in
      memory inside P0.
    r   r!   r(   Nc                     t                                                       |                                }t                              |j        t                    | _        d S r*   )r+   r,   r   r<   r`   r   r:   r   r   s      r0   r,   z'MultiModalProcessorSenderCache.__init__  sN     6688	%33+0
 
r1   ro   c                     || j         v S r*   r   r   s     r0   r   z-MultiModalProcessorSenderCache.is_cached_item  r   r1   rn   c                     | j                             |          x}	d |j        fS |J d|            t          | | j         |<   |S r   )r   r   r'   r:   r   s       r0   rs   z2MultiModalProcessorSenderCache.get_and_update_item  s^      ;??7333K@333""$L'$L$L"""CWMGr1   c                 :    | j                             |           d S r*   r   r   s     r0   r   z6MultiModalProcessorSenderCache.touch_sender_cache_item  r   r1   c                 8    | j                                          d S r*   r   r~   s    r0   r   z*MultiModalProcessorSenderCache.clear_cache  r   r1   Fr   r   c                8    | j                             |          S r   r   r   s     r0   r   z)MultiModalProcessorSenderCache.make_stats  r   r1   r   r   r   r8   s   @r0   r   r   {  s:        
 
 
 
 
 
 &c &d & & & X& /  
)	   X #s #t # # # X#    X */ - - -4 -I - - - X- - - - -r1   r   c                       e Zd ZdZd fdZddd	edefd
Zede	defd            Z
edede	defd            Zede	ddfd            Zedd            Zeddd	edefd            Zedd            ZddZdedede	defdZ xZS )ShmObjectStoreSenderCachea  
    The cache which is used on P0 when IPC caching is enabled.

    How to update each item:

    - If the item is already in the cache, clear the input to avoid
      unnecessary IPC.

    - If the item is not in the cache, store the data in shared memory.
    vllm_configr"   r(   Nc                    t                                                       |j        j        | _        |j                                        }t          t          |j        t          z            t          j        d          }t          |j        t          z  | j        |t                    | _        i | _        d| _        d| _        t)          dd          | _        d S )NTdata_buffer_sizenamecreate)max_object_size	n_readersring_bufferserde_classr   hitstotal)r+   r,   parallel_config
world_sizer   r   r   rc   r   r   envs#VLLM_OBJECT_STORAGE_SHM_BUFFER_NAMEr   mm_shm_cache_max_object_size_mbr   r   
_shm_cache	_p0_cache_hits_totalr   
_last_info)r.   r   r   r   r/   s       r0   r,   z"ShmObjectStoreSenderCache.__init__  s    %5@,BBDD	/ !@9!LMM9
 
 

 7%E	Qo#$	
 
 
 QS
#!444r1   Fr   r   c                f    t          | j        | j                  }|r|| j        z
  }|| _        |}|S )Nr   )r   r   r   r   )r.   r   info
info_deltas       r0   _statzShmObjectStoreSenderCache._stat  s=    dj<<< 	/J"DODr1   ro   c                 6    | j                             |          S r*   )r   r   r   s     r0   r   z(ShmObjectStoreSenderCache.is_cached_item  s    ((111r1   rn   c                     | j                             |          rf| xj        dz  c_        | xj        dz  c_        | j                             |          \  }}| j        |         \  }}|                     |||          |fS |J d|            | xj        dz  c_        	 | j                             ||d                   \  }}t          | j                  dt          | j         j	                  z  k    r| 
                                 |d         |d         j        f| j        |<   |                     |||d         j                  }||d         fS # t          t          f$ r(}t                              d||           |cY d }~S d }~ww xY w)Nr   r   r      z)Failed to cache mm_input with hash %s: %s)r   r   r   r   
get_cachedr   address_as_itemputrz   	key_indexremove_dangling_itemsmodality
ValueErrorMemoryErrorrT   rO   )	r.   rn   ro   addressmonotonic_idr'   r   address_itemes	            r0   rs   z-ShmObjectStoreSenderCache.get_and_update_item  s    ?$$W-- 	YJJ!OJJKK1KK$(O$>$>w$G$G!G\'+~g'>$NH''xHH.XX""$L'$L$L"""q	$(O$7$7$L$L!G\4>""a#do.G*H*H&HHH**,,,&-aj'!*2E&EDN7#//wqz': L  ++K( 	 	 	 LLDgqQQQNNNNNN	s   !B2E F%FFFc                 :    | j                             |           dS )ziTouch the item in shared memory cache to prevent eviction.
        Increments writer_flag on sender side.N)r   r   r   s     r0   r   z1ShmObjectStoreSenderCache.touch_sender_cache_item
  s      	g&&&&&r1   c                     | j                                          | j                                         d| _        d| _        t          dd          | _        d S )Nr   r   )r   r   r   r   r   r   r   r~   s    r0   r   z%ShmObjectStoreSenderCache.clear_cache  sP    
#!444r1   c                .    |                      |          S r   )r   r   s     r0   r   z$ShmObjectStoreSenderCache.make_stats  s    zzz&&&r1   c                 8    | j                                          d S r*   )r   r   r~   s    r0   r   zShmObjectStoreSenderCache.close      r1   c                     | j         j                                        }t          | j                                                  |z
  }|D ]
}| j        |= dS )z;Remove items that are no longer in the shared memory cache.N)r   r   keyssetr   )r.   cached_hashesdangling_hashesro   s       r0   r   z/ShmObjectStoreSenderCache.remove_dangling_items!  s^    16688dn113344}D& 	( 	(Gw''	( 	(r1   r   r   r   c                     t          |d|t                                }t          |d|t                                }t          j        ||g          }|S )Nr   )r   keyrG   fieldr   )r   r   r   
from_elems)r.   r   r   r   	addr_elemid_elemrn   s          r0   r   z)ShmObjectStoreSenderCache.address_as_item(  sn     ((**	
 
 
	 &(**	
 
 
 '19g2FGGr1   )r   r"   r(   Nr   )r3   r4   r5   r6   r,   rd   r   r   r   rf   r   r   r   rs   r   r   r   r   r   rc   r   r   r7   r8   s   @r0   r   r     s       	 	5 5 5 5 5 50 &+   d y     2c 2d 2 2 2 X2  /    
)	      X D 's 't ' ' ' X'
 5 5 5 X5 */ ' ' '4 'I ' ' ' X'       X ( ( ( (*-9<	       r1   r   c                   h    e Zd ZdZded         ded         fdZe	 d
dededz  ddfd	            Z	dS )BaseMultiModalReceiverCachez(The required interface for caches on P1.mm_featuresr   r(   c                     |D ]+}|j         p|j        }|                     ||j                   ,|D ]0}|j         p|j        }|                     |j        |          |_        1|S )a1  
        Update multimodal features with cached encoder outputs.
        Touch all identifier at first before update to avoid
        item in updated list evict during update.

        Uses mm_hash for cache key to share across LoRAs (falls back to
        identifier for backward compatibility).
        )ro   
identifiertouch_receiver_cache_itemrG   rs   )r.   r   feature	cache_keys       r0   get_and_update_featuresz3BaseMultiModalReceiverCache.get_and_update_features@  s     # 	D 	DG=7+=I**9glCCCC" 	M 	MG=7+=I33GL)LLGLLr1   Nro   rn   c                     t           )ap  
        Update the cache eviction order for a multi-modal item.

        This is used to touch the item in the cache without changing
        its value.

        Args:
            mm_hash: The hash of the multi-modal item.
            mm_item: The multi-modal item itself. This is optional and
                may not be needed by some cache implementations.
        rq   r.   ro   rn   s      r0   r   z5BaseMultiModalReceiverCache.touch_receiver_cache_itemU  s
    " "!r1   r*   )
r3   r4   r5   r6   r   r  r   rf   r   r   rg   r1   r0   r   r   ;  s         3212 
%	&   *  04" "" &," 
	" " " ^" " "r1   r   c                        e Zd ZdZd fdZededz  dedefd	            Ze	 ddededz  ddfd
            Z	edd            Z
 xZS )MultiModalReceiverCachea.  
    The cache which is used on P1 when IPC caching is enabled.

    How to update each item:

    - If the item is in the cache, replace the input with the cached item.
    - If the item is not in the cache, store that item (which includes tensor
      data) into the cache, and return the input.
    r   r!   r(   Nc                     t                                                       |                                }t                              |j        t                    | _        d S r*   )r+   r,   r   r<   r`   r   r   r   r   s      r0   r,   z MultiModalReceiverCache.__init__t  sN     6688	%33+ 
 
r1   rn   ro   c                 t    | j                             |          x}|S |J d|            || j         |<   |S r   )r   r   r   s       r0   rs   z+MultiModalReceiverCache.get_and_update_item~  sQ      ;??7333K@""$L'$L$L"""&Gr1   c                 :    | j                             |           d S r*   r   r  s      r0   r   z1MultiModalReceiverCache.touch_receiver_cache_item  s      	'"""""r1   c                 8    | j                                          d S r*   r   r~   s    r0   r   z#MultiModalReceiverCache.clear_cache  r   r1   r   r*   r   )r3   r4   r5   r6   r,   r   r   rf   rs   r   r   r7   r8   s   @r0   r  r  i  s         
 
 
 
 
 
 %,  
	   X  04# ## &,# 
	# # # X#    X    r1   r  c                        e Zd ZdZdddeddf fdZededz  d	edefd
            Z	e	 dd	ededz  ddfd            Z
edd            Z xZS )ShmObjectStoreReceiverCachez
    The cache which is used on P1 Worker Process when IPC caching is enabled.

    How to update each item:

    - If the item has an address, replace the input with the cached item.
    - If not, return the input.
    r   r"   shared_worker_lockr(   Nc                 h   t                                                       |j        j        | _        |j                                        }t          t          |j        t          z            t          j        d          }t          |j        t          z  | j        |t          |          | _        d S )NFr   )r   r   r   r   reader_lock)r+   r,   r   r   r   r   r   rc   r   r   r   r   r   r   r   r   r   )r.   r   r  r   r   r/   s        r0   r,   z$ShmObjectStoreReceiverCache.__init__  s    
 	%5@,BBDD	/ !@9!LMM9
 
 

 7%E	Qo#$*
 
 
r1   rn   ro   c                     |J d|            d|v r[t          t          |d         j                  }t          t          |d         j                  }| j                            ||          S |S )Nz%Expected an address item for mm_hash=r   r   )r   rc   rG   r   r   )r.   rn   ro   r   r   s        r0   rs   z/ShmObjectStoreReceiverCache.get_and_update_item  sw     ""$NG$N$N"""3	 2 788GW^%<%ABBL?&&w===r1   c                     |J d|v r_t          t          |d         j                  }t          t          |d         j                  }| j                            |||           dS dS )zlTouch the item in shared memory cache to prevent eviction.
        Increments reader_count on receiver side.Nr   r   )r   r   )r   rc   rG   r   r   )r.   ro   rn   r   r   s        r0   r   z5ShmObjectStoreReceiverCache.touch_receiver_cache_item  st     """3	 2 788GW^%<%ABBLO!!'7!VVVVV  r1   c                 8    | j                                          d S r*   )r   r   r~   s    r0   r   z'ShmObjectStoreReceiverCache.clear_cache  r   r1   r*   r   )r3   r4   r5   r6   LockTyper,   r   r   rf   rs   r   r   r7   r8   s   @r0   r  r    s        
!
 %
 
	
 
 
 
 
 
. %,  
	   X  04W WW &,W 
	W W W XW       X         r1   r  )JrR   rK   abcr   r   collections.abcr   r   multiprocessing.synchronizer   r  typingr   r	   r
   r   r   rH   typing_extensionsr   	vllm.envsr   8vllm.distributed.device_communicators.shm_object_storager   r   r   vllm.loggerr   vllm.utils.cacher   r   vllm.utils.jsontreer   r   r   vllm.utils.mem_constantsr   r   vllm.utils.mem_utilsr   inputsr   r   r   r   r   r    vllm.configr!   r"   processing.processorr#   r3   rT   r%   r:   rf   r?   __annotations__r@   r<   rh   rj   rm   tupler   r   r   r   r   r   r   r  r  rg   r1   r0   <module>r%     s    



 # # # # # # # # - - - - - - - - 8 8 8 8 8 8 C C C C C C C C C C C C C C  & & & & & &               
 $ # # # # # 0 0 0 0 0 0 0 0 V V V V V V V V V V 9 9 9 9 9 9 9 9 + + + + + +                 ;33333333::::::	X		- - - - - - - -(- - - - - - - -4 !*+  c= !	" i    WT-...F
 F
 F
 F
 F
 F
 F
 F
R WT&&&WTT"""J" J" J" J" J"#wr2v J" J" J"\ 

)? @
@ADH 	   
 .34*@!AA.    
=" =" =" =" ="68WWX=" =" ="@2- 2- 2- 2- 2-#? 2- 2- 2-j7- 7- 7- 7- 7-%A 7- 7- 7-tC C C C C < C C CL+" +" +" +" +",t35IIJ+" +" +"\- - - - -9 - - -`?  ?  ?  ?  ? "= ?  ?  ?  ?  ? r1   