
    .`i7                     <   d dl Z d dlmZ d dlmZmZmZ d dlZd dlm	Z	 d dl
mZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ erd dlmZmZ d dlmZm Z  ne!Ze!Ze!Ze!Z  ee"          Z# ed          Z$ G d d          Z% G d d          Z&dS )    N)Callable)TYPE_CHECKINGAnyTypeVar)
VllmConfigset_current_vllm_config)init_logger)LoRARequest)MULTIMODAL_REGISTRY)resolve_obj_by_qualname)update_environment_variables)KVCacheSpec)
run_method)GrammarOutputSchedulerOutput)AsyncModelRunnerOutputModelRunnerOutput_Rc                      e Zd ZdZ	 d%dedededededd	fd
Zde	ee
f         fdZd&dZd&dZd&dZdededd	fdZd&dZdej        fdZdeej        gef         defdZdefdZd&dZdedeez  d	z  fdZdedeez  fdZdefdZde defdZ!dedefd Z"dedefd!Z#de$e         fd"Z%e&defd#            Z'd&d$Z(d	S )'
WorkerBasezWorker interface that allows vLLM to cleanly separate implementations for
    different hardware. Also abstracts control plane communication, e.g., to
    communicate request metadata to other workers.
    Fvllm_config
local_rankrankdistributed_init_methodis_driver_workerreturnNc                    || _         |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j	        | _	        |j
        | _
        |j        | _        ddlm} || _        || j        _        || _        || _        || _        || _        d| _        d| _        dS )a  
        Initialize common worker components.

        Args:
            vllm_config: Complete vLLM configuration
            local_rank: Local device index
            rank: Global rank in distributed setup
            distributed_init_method: Distributed initialization method
            is_driver_worker: Whether this worker handles driver
                responsibilities
        r   )current_platformN)r   model_configcache_configlora_configload_configparallel_configscheduler_configdevice_configspeculative_configobservability_configkv_transfer_configcompilation_configvllm.platformsr   r   r   r   r   devicemodel_runner)selfr   r   r   r   r   r   s          n/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/worker/worker_base.py__init__zWorkerBase.__init__(   s    & ''4'4&2&2*: + <(6"-"@$/$D!"-"@"-"@333333 0$(!$	'>$ 0 ,0.2    c                     t           )z/Get specifications for KV cache implementation.NotImplementedErrorr-   s    r.   get_kv_cache_speczWorkerBase.get_kv_cache_specV       !!r0   c                     t           )z7Prepare model for execution through compilation/warmup.r2   r4   s    r.   compile_or_warm_up_modelz#WorkerBase.compile_or_warm_up_modelZ   r6   r0   c                     dS )z9Basic health check (override for device-specific checks).N r4   s    r.   check_healthzWorkerBase.check_health^       r0   c                     t           )zjInitialize device state, such as loading the model or other on-device
        memory allocations.
        r2   r4   s    r.   init_devicezWorkerBase.init_deviceb   
     "!r0   num_gpu_blocksnum_cpu_blocksc                     t           )z6Initialize the KV cache with the given size in blocks.r2   )r-   r@   rA   s      r.   initialize_cachezWorkerBase.initialize_cacheh   r6   r0   c                 h    t          | j        dd           }t          |          r |             d S d S )Nreset_mm_cache)getattrr,   callable)r-   reset_fns     r.   rE   zWorkerBase.reset_mm_cachel   s@    4,.>EEH 	HJJJJJ	 	r0   c                     t           Nr2   r4   s    r.   	get_modelzWorkerBase.get_modelq       !!r0   fnc                 <     ||                                            S )z1Apply a function on the model inside this worker.)rK   )r-   rM   s     r.   apply_modelzWorkerBase.apply_modelt   s    r$..""###r0   c                 H    ddl m}  ||                                           S )z;Return a transformers-style hierarchical view of the model.r   )format_model_inspection)vllm.model_inspectionrQ   rK   )r-   rQ   s     r.   get_model_inspectionzWorkerBase.get_model_inspectionx   s/    AAAAAA&&t~~'7'7888r0   c                     t           )zLoad model onto target device.r2   r4   s    r.   
load_modelzWorkerBase.load_model~   r6   r0   scheduler_outputc                     t           )zIf this method returns None, sample_tokens should be called immediately after
        to obtain the ModelRunnerOutput.

        Note that this design may be changed in future if/when structured outputs
        parallelism is re-architected.
        r2   r-   rV   s     r.   execute_modelzWorkerBase.execute_model   s
     "!r0   grammar_outputc                     t           )zFShould be called immediately after execute_model iff it returned None.r2   )r-   rZ   s     r.   sample_tokenszWorkerBase.sample_tokens   r?   r0   c                     t           )zaReturn the size of a single cache block, in bytes. Used in
        speculative decoding.
        r2   r4   s    r.   get_cache_block_size_bytesz%WorkerBase.get_cache_block_size_bytes   r?   r0   lora_requestc                     t           rJ   r2   )r-   r_   s     r.   add_lorazWorkerBase.add_lora   rL   r0   lora_idc                     t           rJ   r2   r-   rb   s     r.   remove_lorazWorkerBase.remove_lora   rL   r0   c                     t           rJ   r2   rd   s     r.   pin_lorazWorkerBase.pin_lora   rL   r0   c                     t           rJ   r2   r4   s    r.   
list_loraszWorkerBase.list_loras   rL   r0   c                 4    | j                                         S )z-Get vocabulary size from model configuration.)r   get_vocab_sizer4   s    r.   
vocab_sizezWorkerBase.vocab_size   s      //111r0   c                     dS )z&Clean up resources held by the worker.Nr:   r4   s    r.   shutdownzWorkerBase.shutdown   r<   r0   )Fr   N))__name__
__module____qualname____doc__r   intstrboolr/   dictr   r5   r8   r;   r>   rC   rE   nnModulerK   r   r   rO   rS   rU   r   r   r   rY   r   r\   r^   r
   ra   re   rg   setri   propertyrl   rn   r:   r0   r.   r   r   "   s         "',3 ,3,3 ,3 	,3
 "%,3 ,3 
,3 ,3 ,3 ,3\"4[(8#9 " " " "" " " "   " " " ""s "C "D " " " "   
"29 " " " "$h	{B7 $B $ $ $ $9c 9 9 9 9" " " "	" /	"	3	3d	:	" 	" 	" 	""+"	3	3" " " ""C " " " ""[ "T " " " ""3 "4 " " " "" " " " " ""CH " " " " 2C 2 2 2 X2     r0   r   c                   &   e Zd ZdZ	 	 ddededz  ddfdZddZd	eeef         ddfd
Zde	ee
e
f                  ddfdZde	ee
ef                  ddfdZde	e         ddfdZd Zde
ez  fdZde
fdZdeddfdZdedeez  dz  fdZddZdS )WorkerWrapperBaseao  
    This class represents one process in an executor/engine. It is responsible
    for lazily initializing the worker and handling the worker's lifecycle.
    We first instantiate the WorkerWrapper, which remembers the worker module
    and class name. Then, when we call `update_environment_variables`, and the
    real initialization happens in `init_worker`.
    r   Nrpc_rankglobal_rankr   c                 <    || _         || j         n|| _        |  |  dS )a#  
        Initialize the worker wrapper with the given vllm_config and rpc_rank.
        Note: rpc_rank is the rank of the worker in the executor. In most cases,
        it is also the rank of the worker in the distributed group. However,
        when multiple executors work together, they can be different.
        e.g. in the case of SPMD-style offline inference with TP=2,
        users can launch 2 engines/executors, each with only 1 worker.
        All workers have rpc_rank=0, but they have different ranks in the TP
        group.
        N)r~   r   )r-   r~   r   s      r.   r/   zWorkerWrapperBase.__init__   s2     !,7,?4==[ 	$$$r0   c                 J    | j         | j                                          d S d S rJ   )workerrn   r4   s    r.   rn   zWorkerWrapperBase.shutdown   s,    ;"K  """"" #"r0   rank_mappingc                 @    | j         |v r|| j                  | _         dS dS )z
        Adjust the rpc_rank based on the given mapping.
        It is only used during the initialization of the executor,
        to adjust the rpc_rank of workers after we create all workers.
        N)r~   )r-   r   s     r.   adjust_rankzWorkerWrapperBase.adjust_rank   s+     =L(((7DMMM )(r0   	envs_listc                     || j                  }d}||v r|t          j        v rt          j        |= t          |           d S )NCUDA_VISIBLE_DEVICES)r~   osenvironr   )r-   r   envskeys       r.   r   z.WorkerWrapperBase.update_environment_variables   sI     '$$;;3"*,, 
3$T*****r0   
all_kwargsc           	      T   || j                  }|                    d          }|
J d            || _        |                                 ddlm}  |             |j        }t          |j        t                    rt          |j                  }nt          d          |j        rt          |j                  }g }||j        vrt          |          D ]n}	|	                    d          rt!          ||	          rJ d| d	|	 d
| d            t#          t%          ||	                    r|                    |	           o|j        |fz   |_        t(                              d|||           |                    dd          }
|
Ld}|j        j        }|r|j        dk    rt          |          t(                              |           d| _        nt9          j        ||
          | _        t=          | j                  5   |di || _        ddd           dS # 1 swxY w Y   dS )z
        Here we inject some common logic before initializing the worker.
        Arguments are passed to the worker class constructor.
        r   Nz0vllm_config is required to initialize the workerr   )load_general_pluginszpassing worker_cls is no longer supported. Please pass keep the class in a separate module and pass the qualified name of the class as a string.__zWorker class z already has an attribute z2, which conflicts with the worker extension class .z8Injected %s into %s for extended collective_rpc calls %sshared_worker_lockzoMissing `shared_worker_lock` argument from executor. This argument is needed for mm_processor_cache_type='shm'.shmr:   ) r~   getr   %enable_trace_function_call_for_threadvllm.pluginsr   r#   
isinstance
worker_clsru   r   
ValueErrorworker_extension_cls	__bases__dir
startswithhasattrrG   rF   appendloggerinfopopr   multimodal_configmm_processor_cache_typewarning_oncemm_receiver_cacher   !worker_receiver_cache_from_configr   r   )r-   r   kwargsr   r   r#   worker_classr   extended_callsattrr   msg	mm_configs                r.   init_workerzWorkerWrapperBase.init_worker   s   
 DM*)/M)B)B&&> '&& '99;;;555555%5o0#66 		-D*. .LL H   / 	#:4$ $   N#<+AAA 455 	4 	4Dt,, ! &|T::  D D D D D,@D D D :
  (<d C CDD 4&--d333)5)?(C *& N( "	   $ZZ(<dCC%M 
 $0BI )Y>%GG oo%##C(((%)D"" $E&  " %T%566 	1 	1&,0000DK	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1s   HH!$H!kv_cache_configsc                     || j                  }| j        J t          | j                  5  | j                            |           d d d            d S # 1 swxY w Y   d S rJ   )r   r   r   r   initialize_from_config)r-   r   kv_cache_configs      r.   r   z(WorkerWrapperBase.initialize_from_config<  s    *4+;<+++$T%566 	@ 	@K..???	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@s   AAAc                     | j         J t          | j                   5  | j                                         d d d            d S # 1 swxY w Y   d S rJ   )r   r   r   r>   r4   s    r.   r>   zWorkerWrapperBase.init_deviceB  s    +++$T%566 	& 	&K##%%%	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	&s   AA	A	methodc                     	 t          | |||          S # t          $ r'}d|d}t                              |           |d }~ww xY w)NzError executing method z5. This might cause deadlock in distributed execution.)r   	Exceptionr   	exception)r-   r   argsr   er   s         r.   execute_methodz WorkerWrapperBase.execute_methodH  sv    	
 dFD&999 
	 
	 
	F& F F F  S!!!G
	s    
A"A  Ar   c                 ,    t          | j        |          S rJ   )rF   r   )r-   r   s     r.   __getattr__zWorkerWrapperBase.__getattr__[  s    t{D)))r0   rV   c                 n    | j         }|d S |j        D ]!}|                    |j                  |_        "d S rJ   )r   scheduled_new_reqsget_and_update_featuresmm_features)r-   rV   mm_cachereq_datas       r.   _apply_mm_cachez!WorkerWrapperBase._apply_mm_cache^  sR    )F(; 	 	H#+#C#C$$ $H  	 	r0   c                 `    |                      |           | j                            |          S rJ   )r   r   rY   rX   s     r.   rY   zWorkerWrapperBase.execute_modelh  s0     	-...{(()9:::r0   c                 r    | j         }||                                 | j                                         d S rJ   )r   clear_cacher   rE   )r-   r   s     r.   rE   z WorkerWrapperBase.reset_mm_cacheo  s=     2())+++""$$$$$r0   )r   Nro   )rp   rq   rr   rs   rt   r/   rn   rw   r   listru   r   r   r   r   r>   bytesr   r   r   r   r   r   rY   rE   r:   r0   r.   r}   r}      s         "&% %% 4Z% 
	% % % %,# # # #8S#X 84 8 8 8 8
+S#X'
+ 

+ 
+ 
+ 
+S1d4S>&: S1t S1 S1 S1 S1j@tCy @T @ @ @ @& & &S5[    &* * * * * D    ; /;	3	3d	:; ; ; ;% % % % % %r0   r}   )'r   collections.abcr   typingr   r   r   torchtorch.nnrx   vllm.configr   r   vllm.loggerr	   vllm.lora.requestr
   vllm.multimodalr   vllm.utils.import_utilsr   vllm.utils.system_utilsr   vllm.v1.kv_cache_interfacer   vllm.v1.serial_utilsr   vllm.v1.core.sched.outputr   r   vllm.v1.outputsr   r   objectrp   r   r   r   r}   r:   r0   r.   <module>r      s   
			 $ $ $ $ $ $ . . . . . . . . . .        ; ; ; ; ; ; ; ; # # # # # # ) ) ) ) ) ) / / / / / / ; ; ; ; ; ; @ @ @ @ @ @ 2 2 2 2 2 2 + + + + + + HHHHHHHHIIIIIIIIIOM#	X		WT]]J J J J J J J JZE% E% E% E% E% E% E% E% E% E%r0   