
    .`i>                     0   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
Z
d dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ dd	lmZmZmZ  ee          Ze	rd d
lmZ d dlmZ ndZddZ e G d d                      Z! G d de          Z"dS )    N)	dataclass)TYPE_CHECKING)envs)init_logger)is_quantized_kv_cache)AttentionBackendEnum   )CpuArchEnumPlatformPlatformEnum)
VllmConfig)AttentionSelectorConfigc                     t          t          d          r!t          t          j        |                     S t	          j                    dk    rt          j                    S t          d          )Nsched_getaffinityDarwinUnsupported OS)hasattroslenr   platformsystem	cpu_countNotImplementedError)pids    f/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/platforms/cpu.pyget_max_threadsr   !   s\    r&'' 42',,---			h	&	&|~~!"2333    c                   v    e Zd ZU dZeed<   dZeed<   dZeed<   ede	defd            Z
edefd	            Zd
S )LogicalCPUInfoidphysical_core	numa_nodevaluereturnc                 L    	 t          |          }n# t          $ r d}Y nw xY w|S )Nr    )int	Exception)clsr$   	int_values      r   _intzLogicalCPUInfo._int0   s>    	E

II 	 	 	III	s    !!obj_dictc                 D   |                      d          }|                      d          }|                      d          }|^|\|Zt          t                              |          t                              |          t                              |                    S | S )Ncpucorenoder!   r"   r#   )getr   r+   )r,   r!   r"   r#   s       r   json_decoderzLogicalCPUInfo.json_decoder8   s    \\%   V,,LL((	
m3y7H!!&&r**,11-@@(--i88    Or   N)__name__
__module____qualname__r!   r'   __annotations__r"   r#   classmethodstrr+   staticmethoddictr3    r   r   r   r   *   s         BLLLM3Is     [ t    \  r   r   c                   @   e Zd ZU ej        ZdZeed<   dZ	eed<   dZ
eed<   dZeed<   dZed	eej                 fd
            Zed!ded	efd            Zeddddd	efd            Zed!ded	efd            Zedej        d	dfd            Zed             Zeded	dfd            Zed	eee         ee         f         fd            Zed	efd            Z ed	efd            Z!ed	efd            Z"ed	efd            Z#ed	efd            Z$ed	efd             Z%dS )"CpuPlatformr.   device_namedevice_typeCPUdispatch_keygloodist_backendCPU_VISIBLE_MEMORY_NODESr%   c                 l   |                                  t          j        k    rt          j        t          j        gS |                                  t          j        k    rt          j        	                    d          rht          j        dgd                                          dk    r#t          j        t          j        t          j        gS t          j        t          j        gS |                                  t          j        k    rt          j        gS t          j        t          j        t          j        gS )Ndarwinz#sysctl -n hw.optional.arm.FEAT_BF16T)shell   1)get_cpu_architecturer
   POWERPCtorchbfloat16float32ARMsysr   
startswith
subprocesscheck_outputstripfloat16RISCV)selfs    r   supported_dtypeszCpuPlatform.supported_dtypesP   s    $$&&+*===NEM22&&((KO;;@W@WA
 A
; ':;4  %'' 
 u}EEM5=11&&((K,===0 M?"u}==r   r   	device_idc                     dS )Nr.   r<   )r)   rY   s     r   get_device_namezCpuPlatform.get_device_name{       ur   selected_backendr   attn_selector_configr   c                     |r+|t           j        k    rt                              d|           |j        rt          d          |j        rt          d          t           j                                        S )NzCannot use %s backend on CPU.zMLA is not supported on CPU.z)Sparse Attention is not supported on CPU.)r   CPU_ATTNloggerinfouse_mlar   
use_sparseget_path)r)   r]   r^   s      r   get_attn_backend_clsz CpuPlatform.get_attn_backend_cls   s}      	K 04H4Q Q QKK79IJJJ' 	F%&DEEE* 	S%&QRRR#,55777r   c                    ddl m} ddlm} t          j        }d}|t          j                            |          rd t          j	        |          D             ng }t          |          pd}t          j                    j        |z  }d}	t          ||	z            }t                              d ||                     n||z  }|S )	Nr   )	GiB_bytes)
format_gibz/sys/devices/system/nodec                 <    g | ]}|                     d           |S )r0   )rQ   ).0ds     r   
<listcomp>z7CpuPlatform.get_device_total_memory.<locals>.<listcomp>   s)    IIIqALL4H4HIIIIr   r	   g      ?z:VLLM_CPU_KVCACHE_SPACE not set. Using %s GiB for KV cache.)vllm.utils.mem_constantsrh   vllm.utils.mem_utilsri   r   VLLM_CPU_KVCACHE_SPACEr   pathexistslistdirr   psutilvirtual_memorytotalr'   ra   warning_once)
r)   rY   rh   ri   kv_cache_spacenode_dirnodesnum_numa_nodesfree_cpu_memoryDEFAULT_CPU_MEM_UTILIZATIONs
             r   get_device_total_memoryz#CpuPlatform.get_device_total_memory   s    6666663333334-! 7>>(++IIBJx00IIII 
 !ZZ_1N$355;~MO*-' 3N!NOONL
>**   
 i'Nr   deviceNc                 D    t           j                            |           dS )z:
        Set the device for the current platform.
        N)rL   r.   
set_device)r)   r   s     r   r   zCpuPlatform.set_device   s     
 		V$$$$$r   c                 (    t          j                    S )N)rL   no_gradr)   s    r   inference_modezCpuPlatform.inference_mode   s    }r   vllm_configc           	      	   |j         }|d|_        |j        }|j        d|_        |j        dz  dk    rt                              d           |j        }d|_        |j        s|j	        r#t          |j                  rt          d          |j                            d          r!t                              d	           d
|_        t                                          |_        |j        }|j        dk    r9|j        2|j        dk    r't                              d|j                   d|_        |j        d
k    rd|_        |j        r!t                              d           d|_        ddlm} g |j        _        |j        }|j        j        |j        k    r[t8          j                            dd          dk    rd}nd}|j        |_        ||_         |j!        "                    ddddd           |j#        |j$        |_        |j%        j&        dk    sJ dt8          j        d<   tO          tQ                                t8          j        d<   tR          j*        dk    r.tO          tW          j,                              t8          j        d<   nt          -                    d           dt8          j        d<   dt8          j        d<   t9          j.        d d!          }	d"|	v rKdt8          j        d#<   dt8          j        d$<   d%t8          j        d&<   d%t8          j        d'<   d%t8          j        d(<   t_          j0                    d)k    r-tc          j2                    tf          j4        tf          j5        fv rd*|	v sd+|	v st8          j6        7                    tV          j8                  }
t8          j6        7                    |
          }t8          j6        9                    |d,          t8          j6        9                    |
d-          g}g }|D ]G}|:                    tw          j;        t8          j6        9                    |d.                               H|r#|d         }|	r|	d/z  }	|	|z  }	|	t8          j        d <   tO          |j        j<                  t8          j        d0<   |]|j=        rXt          -                    d1           d|j        _        t}          |j         j?        |j        j@                  |j        _A        d S d S d S )2NT       r   z^CPU backend prefers block_size is multiples of 32, otherwise the performance is not optimized.FzXChunked-prefill and prefix-cache on the CPU backend is not compatible with FP8 KV cache.fp8zCCPU backend doesn't support KV cache quantization fallback to auto.autor	   mpzH%s is not supported on CPU, fallback to mp distributed executor backend.z#vllm.v1.worker.cpu_worker.CPUWorkerz5Dual-Batch Overlap is not supported on CPU, disabled.)CompilationModeVLLM_CPU_CI_ENV0eagerinductor)dcesize_assertsnan_assertsepilogue_fusionr.   spawnVLLM_WORKER_MULTIPROC_METHODNUMEXPR_MAX_THREADSnobindOMP_NUM_THREADSz+Disabling binding processes to CPU cores...1TORCHINDUCTOR_COMPILE_THREADS"VLLM_DISABLE_SHARED_EXPERTS_STREAM
LD_PRELOAD zlibiomp5.soKMP_BLOCKTIME
KMP_TPAUSEz	dist,distKMP_FORKJOIN_BARRIER_PATTERNKMP_PLAIN_BARRIER_PATTERNKMP_REDUCTION_BARRIER_PATTERNLinuxlibomplibgompz
torch.libslibzlibgomp*.so*:LOCAL_WORLD_SIZEz`MLA is enabled on a non-GPU platform; forcing chunked prefill and prefix caching to be disabled.)Bmodel_configdisable_cascade_attncache_config
block_sizera   warningscheduler_configasync_schedulingenable_chunked_prefillenable_prefix_cachingr   cache_dtypeRuntimeErrorrQ   r>   r~   cpu_kvcache_space_bytesparallel_config
world_sizedistributed_executor_backend
worker_cls
enable_dbovllm.configr   compilation_configcudagraph_capture_sizesmodeVLLM_COMPILEr   environr2   DYNAMO_TRACE_ONCEbackendinductor_compile_configupdatelora_configNONEdevice_configr@   r9   r   r   VLLM_CPU_OMP_THREADS_BINDrL   get_num_threadsrb   getenvr   r   r   rJ   r
   rO   rK   rq   dirname__file__joinextendglobtensor_parallel_sizerc   maxmax_model_lenDEFAULT_MAX_NUM_BATCHED_TOKENSmax_num_batched_tokens)r)   r   r   r   r   r   r   r   r   ld_preload_str	torch_pkg	site_roottorch_libs_pathspytorch_libgomp_so_candidates
torch_libspytorch_libgomp_sos                   r   check_and_update_configz#CpuPlatform.check_and_update_config   s6   "/#04L-"/"*&)L#"R'1,,NN>  
 '7,1)3	1	 $L$<==	 ?  
 #..u55 	.NNU   (.L$/:/R/R/T/T,%5&**<H<DDNN4  <   <@O8%//)NO&% 	/NNRSSS).O& 	0/////AC&>(;)./2NNN z~~/55<<!$&5&G#)0&6==$)#('+	    ".&5&:#(4==== 6=
12 -00A0A,B,B
())X55,/0E0G0G,H,HBJ()) KKEFFF 7:
23 <?
78 <44N** +.BJ''*BJ|$9DBJ566ABJ23:EBJ67 O((-//!456 6//93N3N 77I	22I Y55Y..  -/).  
-44Ibgll:~FFGG    - :%B1%E"! *"c)N"44+9
<( *-'<*
 *

%& #(<#KK=   CHK(?BE(6,KC CK(??? $###r   c                 ,   t          j                    dk    sJ t          j        ddd          }t	          j        dd|          }t          j        |t          j	                  d         }d	 |D             }t          t          d
          rt          j        d          nt          d          fd|D             }t                      |D ]}                    |j                   t#                    }t$          j        }|t          j        v rzt          j        |         dk    rdd t          j        |                             d          D             }fdt#          t-          t          |                              D             }||fS )Nr   zlscpu -J -e=CPU,CORE,NODET)rH   textz"node":\s*-\s*(,|\n)z"node": 0\1)object_hookcpusc                 >    g | ]}d |j         |j        |j        fv|S )r    r1   )rk   xs     r   rm   z>CpuPlatform.get_allowed_cpu_core_node_list.<locals>.<listcomp>s  s:     
 
 
!$=== ===r   r   r   r   c                 &    g | ]}|j         v |S r<   )r!   )rk   r   allowed_cpu_id_lists     r   rm   z>CpuPlatform.get_allowed_cpu_core_node_list.<locals>.<listcomp>~  s&    WWW!14CV;V;VA;V;V;Vr   r   c                 ,    g | ]}t          |          S r<   )r'   )rk   ss     r   rm   z>CpuPlatform.get_allowed_cpu_core_node_list.<locals>.<listcomp>  s    LLLSVVLLLr   ,c                     g | ]}|v |	S r<   r<   )rk   r   allowed_numa_nodess     r   rm   z>CpuPlatform.get_allowed_cpu_core_node_list.<locals>.<listcomp>  s+     ' ' 'qDV?V?V?V?V?Vr   )r   r   rR   rS   resubjsonloadsr   r3   r   r   r   r   setaddr#   sortedr>   device_control_env_varr   splitlist)	r)   lscpu_outputlogical_cpu_listr   allowed_numa_nodes_listenv_keyvisible_nodesr   r   s	          @@r   get_allowed_cpu_core_node_listz*CpuPlatform.get_allowed_cpu_core_node_liste  s     G++++ ".'t$
 
 
 v5~|TT15n&A2
 2
 2

2

 
%
 
 
 2*++ 	8"$"6q"9"9%&6777WWWW'7WWW !UU! 	0 	0A""1;////"();"<"<4bj  RZ%8B%>%>LLRZ-@-F-Fs-K-KLLLM' ' ' '!$s='9'9":":;;' ' '# '(888r   c                     dS )NFr<   r   s    r   is_pin_memory_availablez#CpuPlatform.is_pin_memory_available  r\   r   c                     dS )Nz4vllm.lora.punica_wrapper.punica_cpu.PunicaWrapperCPUr<   r   s    r   get_punica_wrapperzCpuPlatform.get_punica_wrapper  s    EEr   c                     dS )zW
        Get device specific communicator class for distributed communication.
        zFvllm.distributed.device_communicators.cpu_communicator.CpuCommunicatorr<   r   s    r   get_device_communicator_clsz'CpuPlatform.get_device_communicator_cls  s    
 XWr   c                     dS NTr<   r   s    r   supports_structured_outputz&CpuPlatform.supports_structured_output      tr   c                     dS r  r<   r   s    r   opaque_attention_opzCpuPlatform.opaque_attention_op  r  r   c                     dS r  r<   r   s    r   support_hybrid_kv_cachez#CpuPlatform.support_hybrid_kv_cache  r  r   r   )&r4   r5   r6   r   rA   _enumr?   r9   r7   r@   rB   rD   r   propertyr   rL   dtyperX   r8   r'   r[   rf   r~   r   r   r   r   r   tupler   r   boolr   r   r   r  r  r  r<   r   r   r>   r>   H   s        EKKL#L#7(>$u{"3 (> (> (> X(>T   C    [ 808 88 
	8 8 8 [8   C    [2 % % % % % [%   [ p* p p p p [pd '9uT#Y^@T5T/U '9 '9 '9 ['9R     [ F3 F F F [F XC X X X [X 4    [ D    [     [  r   r>   r	  )#r   r   r   r   rR   rP   dataclassesr   typingr   rt   regexr   rL   vllmr   vllm.loggerr   vllm.v1.attention.backendr   #vllm.v1.attention.backends.registryr   	interfacer
   r   r   r4   ra   r   r   vllm.v1.attention.selectorr   r   r   r>   r<   r   r   <module>r     s     				      



 ! ! ! ! ! !                         # # # # # # ; ; ; ; ; ; D D D D D D : : : : : : : : : :	X		 &&&&&&BBBBBBBJ4 4 4 4        :` ` ` ` `( ` ` ` ` `r   