
    .`i                         d dl Z d dlmZ d dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ  e
e          Z G d de          ZdS )    N)Any)
VllmConfig)get_world_group)init_logger)current_platform)TorchProfilerWrapper)set_random_seed)Worker#init_worker_distributed_environment)XPUModelRunnerc                   ~     e Zd ZdZ	 ddededededef
 fdZd	 Z	 e
j                    d
efd            Zd Z xZS )	XPUWorkerzA XPU worker class.Fvllm_config
local_rankrankdistributed_init_methodis_driver_workerc                 :   t                                          |||||           | j        }|j        dk    sJ t	          j                    sJ d | _        |j        }|j        dk    r2|j         d| j	         }t          ||| j        ddg          | _        d S d S )Nxputorchz-rank-CPUXPU)worker_namer   
activities)super__init__device_configdevice_typer   is_xpuprofilerprofiler_configinstance_idr   r   r   )
selfr   r   r   r   r   r   r!   r   	__class__s
            m/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/worker/xpu_worker.pyr   zXPUWorker.__init__   s     	T+BDT	
 	
 	
 *(E1111&((((( %)%5#w..(4GGDIGGK0'?!5>	  DMMM /.    c                     t          j                    rt          j                                        S t          j                                        \  }}t          j                                        }d}|||z   z
  }||fS )Ni   )r   is_data_center_gpur   r   mem_get_infomemory_allocated)r#   _total_gpu_memoryused_memorynon_torch_allocationsfree_gpu_memorys         r%   xpu_get_mem_infozXPUWorker.xpu_get_mem_info6   sw    .00 		59))+++"')"8"8":":A  )4466K$5!.+@U2UVO"$444r&   returnc           	         t           j                                         t           j                                         t           j                                        \  }}t           j                                        }d|dz  dd|dz  dd|dz  dd}t                              |           | j        	                                 | 
                                \  }}| j        |k    sJ d| j         d| d	            t           j                                        d
         }t           j                                         t           j                                        d         }| 
                                \  }}	|	|z
  }
|
|z
  }|dk    r||z  }|| j        j        z  |z
  }d|dz  dd|dz  dd|dz  dd|dz  dd	}t                              |           t          |          S )a  Profiles the peak memory usage of the model to determine how many
        KV blocks may be allocated without OOMs.
        The engine will first conduct a profiling of the existing memory usage.
        Then, it calculates the maximum possible number of GPU and CPU blocks
        that can be allocated with the remaining free memory.
        .. tip::
            You may limit the usage of GPU memory
            by adjusting the `gpu_memory_utilization` parameter.
        z/Before memory profiling run, total GPU memory: i   z.2fz MB, model load takes z MB, free gpu memory is z MB.z/Error in memory profiling. Initial free memory z, current free memory ze. This happens when the GPU memory was not properly cleaned up before initializing the vLLM instance.zallocated_bytes.all.peakzallocated_bytes.all.currentr   z1After memory profiling run, peak memory usage is z MB,torch mem is z MB, non-torch mem is )r   r   empty_cachereset_peak_memory_statsr)   r*   loggerinfomodel_runnerprofile_runr0   init_gpu_memorymemory_statscache_configgpu_memory_utilizationint)r#   r/   r,   current_allocated_bytesmsgr+   peak_memorytorch_allocated_bytesfree_mem	total_memtotal_allocated_bytesr.   available_kv_cache_memorys                r%   determine_available_memoryz$XPUWorker.determine_available_memoryB   su    			))+++,1I,B,B,D,D))"')"<"<">">F!1G!;FF F 7' ALF F #2G";EF F F 	 	C 	%%'''!2244 #o555M#'#7M MM M M 655 i,,../IJ	 %	 6 6 8 89V W"3355) )H 4 58M M 1$$00Kt0GG+U 	"
F$/'$9CF F1G;FF F !6 ?JF F #2G";E	F F F 	 	C,---r&   c                    | j         j        }t          |t          j                  r|j        dk    rt          j                    rt          j        d| j                   | _        t          j        | j                   t          j	        | j
        j                   t          j                                         t          j                            | j                  j        | _        nt#          d| j         j                   t%          j        dd          }t%          j        dt)          | j        j                            }|t$          j        d<   |t$          j        d<   t)          | j                  t$          j        d<   t1          | j        | j        | j        | j        t
          j                   t          j                            t          j        d                                          tA                      j!        	           tE          | j
        j#                   tI          | j        | j                  | _%        d S )
Nr   zxpu:zNot support device type: CCL_ATL_TRANSPORTofiLOCAL_WORLD_SIZE
LOCAL_RANK   )group)&r   device
isinstancer   typer   r   r   
set_devicecheck_if_supports_dtypemodel_configdtyper   r3   get_device_propertiestotal_memoryr9   RuntimeErrorosgetenvstrparallel_config
world_sizeenvironr   r   r   r   dist_backenddistributed
all_reducezerosr   device_groupr	   seedr   r7   )r#   rN   ENV_CCL_ATL_TRANSPORTENV_LOCAL_WORLD_SIZEs       r%   init_devicezXPUWorker.init_device   s   #*vu|,,	Xu$$ ')) %  ,'?do'?'?@@DK'4444T5F5LMMMI!!####(9#B#B$ $    V4;M;TVVWWW "	*=u E E!yD$8$C D D 
  
 +@
&')=
%&#&t#7#7
< +I(O)	
 	
 	
 	$$KNN  (9(9(F 	% 	
 	
 	

 	)./// +dk
 
r&   )F)__name__
__module____qualname____doc__r   r=   rZ   boolr   r0   r   inference_moderF   rf   __classcell__)r$   s   @r%   r   r      s         "'   	
 "%      <
5 
5 
5 U>.C >. >. >. >.@,
 ,
 ,
 ,
 ,
 ,
 ,
r&   r   )rX   typingr   r   torch.distributedvllm.configr   vllm.distributedr   vllm.loggerr   vllm.platformsr   vllm.profiler.wrapperr   vllm.utils.torch_utilsr	   vllm.v1.worker.gpu_workerr
   r   vllm.v1.worker.xpu_model_runnerr   rg   r5   r    r&   r%   <module>ry      s   
			            " " " " " " , , , , , , # # # # # # + + + + + + 6 6 6 6 6 6 2 2 2 2 2 2 Q Q Q Q Q Q Q Q : : : : : :	X		Z
 Z
 Z
 Z
 Z
 Z
 Z
 Z
 Z
 Z
r&   