
    -`io-                     H   d dl Z d dlmZ d dlmZmZmZ d dlmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ erd d
lmZ neZ ee          Zed         Zed         Zed         Zed         Zed         Zed         Zee G d d                                  Z dS )    N)field)TYPE_CHECKINGAnyLiteral)FieldSkipValidationfield_validator)	dataclass)config)init_logger)	GiB_bytes)
format_gibget_cpu_memory)ParallelConfig)             @         )autobfloat16fp8fp8_e4m3fp8_e5m2fp8_inc
fp8_ds_mla)r   float32float16)allalignnone)sha256sha256_cborxxhashxxhash_cbor)nativelmcachec                      e Zd ZU dZdZee         ed<   	  eddd          Z	e
ed<   	  ed	d
          Ze
ed<   	 dZeed<   	 dZeed<   	 dZedz  ed<   	 dZedz  ed<   	 dZeed<   	 dZeed<   	  edd
          Ze
ed<   	 dZeed<   	 dZedz  ed<   	 dZedz  ed<   	  edd          Zedz  ed<   	 dZeed<   	 dZeed<   	 dZeed<   	  e dd           Z!edz  ed!<   	  e dd           Z"edz  ed"<   	 dZ#eed#<   	 dZ$edz  ed$<   	 dZ%e
dz  ed%<   	 d&Z&e'ed'<   	 d(e(fd)Z)d* Z* e+dd+,          e,ded(efd-                        Z-d.e.d(dfd/Z/dS )0CacheConfigzConfiguration for the KV cache.N
block_sizeg?r   r   )defaultgtlegpu_memory_utilization   )r-   ge
swap_spacer   cache_dtypeFis_attention_freenum_gpu_blocks_overridesliding_windowTenable_prefix_cachingr$   prefix_caching_hash_algocpu_offload_gbcalculate_kv_scalescpu_kvcache_space_bytesmamba_page_size_padded)r-   r.   mamba_block_sizemamba_cache_dtypemamba_ssm_cache_dtyper#   mamba_cache_mode)r-   initnum_gpu_blocksnum_cpu_blockskv_sharing_fast_prefillkv_cache_memory_byteskv_offloading_sizer(   kv_offloading_backendreturnc                 H    h d}ddl m}m}  || |          } ||          S )a  
        WARNING: Whenever a new field is added to this config,
        ensure that it is included in the factors list if
        it affects the computation graph.

        Provide a hash that uniquely identifies all the configs
        that affect the structure of the computation
        graph from input ids/embeddings to the final hidden states,
        excluding anything before input ids/embeddings and after
        the final hidden states.
        >   r3   rD   rC   r5   r8   r0   r=   r<   rE   r6   r9   r   )get_hash_factorshash_factors)vllm.config.utilsrK   rL   )selfignored_factorsrK   rL   factorss        e/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/config/cache.pycompute_hashzCacheConfig.compute_hash   sP    
 
 
" 	EDDDDDDD""499|G$$$    c                 H    d | j                                         D             S )Nc                 4    i | ]\  }}|t          |          S  )str).0keyvalues      rQ   
<dictcomp>z,CacheConfig.metrics_info.<locals>.<dictcomp>   s$    HHHJCSZZHHHrS   )__dict__items)rN   s    rQ   metrics_infozCacheConfig.metrics_info   s'     IH$-2E2E2G2GHHHHrS   after)modec                 d    |                     d          rt                              d           |S )Nr   zUsing fp8 data type to store kv cache. It reduces the GPU memory footprint and boosts the performance. Meanwhile, it may cause accuracy drop without a proper scaling factor.)
startswithloggerinfo)clsr4   s     rQ   _validate_cache_dtypez!CacheConfig._validate_cache_dtype   s=     !!%(( 	KK"   rS   parallel_configc                 B   t          j        | j        t          z            }t	                      }|j        }||z  }t          |           dt          |           d}|d|z  k    rt          d|z             |d|z  k    rt          	                    d|           d S d S )Nz GiB out of the z6 GiB total CPU memory is allocated for the swap space.gffffff?zToo large swap space. g?z!Possibly too large swap space. %s)
mathceilr3   r   r   tensor_parallel_sizer   
ValueErrorrc   warning)rN   rg   swap_space_bytestotal_cpu_memorynum_gpus_per_nodecpu_memory_usagemsgs          rQ   verify_with_parallel_configz'CacheConfig.verify_with_parallel_config   s      9T_y%@AA)++ ,@+.?? *++ / /*++/ / / 	
 c$44445;<<<&6 666NN>DDDDD 76rS   )0__name__
__module____qualname____doc__r,   r   	BlockSize__annotations__r   r0   floatr3   r4   
CacheDTyper5   boolr6   intr7   r8   r9   PrefixCachingHashAlgor:   r;   r<   r=   r>   r?   
MambaDTyper@   rA   MambaCacheModer   rC   rD   rE   rF   rG   rH   KVOffloadingBackendrW   rR   r^   r	   classmethodrf   r   rs   rV   rS   rQ   r+   r+   '   sW         *),0Jy)000 %*E#!$B$B$BEBBB@ aA...J...6$K$$$ $t###H*.S4Z...J!%NC$J%%%H"&4&&&+6>3>>>G  "E!222NE222 !&%%%K +/S4Z...7)-C$J---B#(5!#<#<#<cDj<<<I %+z*** )/:...B (.n--- "'t%!@!@!@NC$J@@@:!&t%!@!@!@NC$J@@@:$)T))) )-3:,,,6 (,+++J
 2:.999G %c  %  %  %  %DI I I
 _]111
 z    [ 21E'E 
E E E E E ErS   r+   )!ri   dataclassesr   typingr   r   r   pydanticr   r   r	   pydantic.dataclassesr
   rM   r   vllm.loggerr   vllm.utils.mem_constantsr   vllm.utils.mem_utilsr   r   vllm.config.parallelr   rt   rc   rx   r{   r   r   r~   r   r+   rV   rS   rQ   <module>r      s          . . . . . . . . . . ; ; ; ; ; ; ; ; ; ; * * * * * * $ $ $ $ $ $ # # # # # # . . . . . . ; ; ; ; ; ; ; ; 3333333N	X		./	
 12
/0 PQ 12  
JE JE JE JE JE JE JE  JE JE JErS   