
    .`i3                        d dl Z d dlmZmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZmZmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d dl m!Z! d dl"m#Z#m$Z$ d dl%m&Z&m'Z' d dl(m)Z) erd dl*m+Z+  ee,          Z- ed          Z.eg df         Z/ G d de          Z0d dl1m2Z3 d dl1m4Z5 e5Z4e3Z2dS )    N)ABCabstractmethod)Callable)Future)cached_property)TYPE_CHECKINGLiteralTypeVaroverload)
VllmConfig)KVOutputAggregator)KVConnectorHandshakeMetadata)init_logger)LoRARequest)SupportedTask)resolve_obj_by_qualname)GrammarOutputSchedulerOutput)ReconfigureDistributedRequest)KVCacheConfigKVCacheSpec)DraftTokenIdsModelRunnerOutput)
WorkerBase)KVConnectorBase_Rc                      e Zd ZU dZdZeed<   dZeed<   ede	de
d          fd            Zde	ddfd	ZedBd
            Zdee         ddfdZdefdZdee         fdZdeeeef                  fdZe	 	 	 	 dCdeeegef         z  dedz  dededz  de d         dee         fd            Z!e	 	 	 	 dDdeeegef         z  dedz  dededz  de d         de"ee                  fd            Z!e	 dCdefd            Z!deeee#f                  fdZ$e	 dEde%de d         de&dz  fd            Z'e	 dFde%de d         de"e&dz           fd            Z'	 dEde%dede&dz  e"e&dz           z  fdZ'e	 dEd e(dz  de d         de&fd!            Z)e	 dFd e(dz  de d         de"e&         fd"            Z)	 dEd e(dz  dede&e"e&         z  fd#Z)dBd$Z*de+dz  fd%Z,e-defd&            Z.dFd'efd(Z/	 	 dGd)ed*edz  d+edz  ddfd,Z0edBd-            Z1dBd.Z2dHd1Z3e4dee5d2f         fd3            Z6d4e7defd5Z8d6edefd7Z9d6edefd8Z:de;e         fd9Z<dBd:Z=dId<efd=Z>dJd>ee         dz  fd?Z?d@e@ddfdAZAdS )KExecutorzAbstract base class for vLLM executors."

    An executor is responsible for executing the model on one device,
    or it can be a distributed executor that can execute the model on multiple devices.
    Fuses_raysupports_ppvllm_configreturnc                    | j         }|j        }t          |t                    r+t	          |t
                    st          d| d          |}n|dk    r	ddlm} |}n|dk    r	ddl	m
} |}n||dk    r	dd	lm} |}nm|d
k    rt          }n_t          |t                    r8t          |          }t	          |t
                    st          d| d          nt!          d|           |S )NzAdistributed_executor_backend must be a subclass of Executor. Got .rayr   )RayDistributedExecutormp)MultiprocExecutoruniUniProcExecutorexternal_launcherz&Unknown distributed executor backend: )parallel_configdistributed_executor_backend
isinstancetype
issubclassr   	TypeErrorvllm.v1.executor.ray_executorr&   #vllm.v1.executor.multiproc_executorr(   !vllm.v1.executor.uniproc_executorr+   ExecutorWithExternalLauncherstrr   
ValueError)r!   r-   r.   executor_classr&   r(   r+   s          m/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/executor/abstract.py	get_classzExecutor.get_class-   s{    &5'6'S$2D99 !	:HEE E%AE E E   :NN)U22LLLLLL3NN)T11MMMMMM.NN)U22IIIIII,NN)-@@@ :NN4c:: 
	45QRRNnh77 7%37 7 7   W9UWW       Nc                 V   || _         |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j	        | _	        | 
                                 d| _        t                      | _        d | _        d S )NF)r!   model_configcache_configlora_configload_configr-   scheduler_configdevice_configspeculative_configobservability_config_init_executoris_sleepingsetsleeping_tagskv_output_aggregator)selfr!   s     r:   __init__zExecutor.__init__W   s     ''4'4&2&2*: + <(6"-"@$/$D! '*uu?C!!!r<   c                     t           NNotImplementedErrorrK   s    r:   rF   zExecutor._init_executorj   s    !!r<   kv_cache_configsc                 `    |                      d|f           |                      d           dS )zp
        Initialize the KV caches and begin the model execution loop of the
        underlying workers.
        initialize_from_configargscompile_or_warm_up_modelNcollective_rpc)rK   rR   s     r:   rT   zExecutor.initialize_from_confign   s=    
 	4<L;NOOO677777r<   callbackc                     dS )zk
        Register a function to be called if the executor enters a permanent
        failed state.
        N )rK   rZ   s     r:   register_failure_callbackz"Executor.register_failure_callbackv   s	    
 	r<   c                 ,    |                      d          S )Ndetermine_available_memoryrX   rQ   s    r:   r_   z#Executor.determine_available_memory}   s    ""#?@@@r<   c                 ,    |                      d          S )Nget_kv_cache_specrX   rQ   s    r:   get_kv_cache_specszExecutor.get_kv_cache_specs   s    ""#6777r<   r\   methodtimeoutrV   kwargs	non_blockc                     dS )a9  
        Execute an RPC call on all workers.

        Args:
            method: Name of the worker method to execute, or a callable that
                is serialized and sent to all workers to execute.

                If the method is a callable, it should accept an additional
                `self` argument, in addition to the arguments passed in `args`
                and `kwargs`. The `self` argument will be the worker object.
            timeout: Maximum time in seconds to wait for execution. Raises a
                [`TimeoutError`][] on timeout. `None` means wait indefinitely.
            args: Positional arguments to pass to the worker method.
            kwargs: Keyword arguments to pass to the worker method.
            non_block: If `True`, returns a list of Futures instead of waiting
                for the results.

        Returns:
            A list containing the results from each worker.

        Note:
            It is recommended to use this API to only pass control messages,
            and set up data-plane communication to pass data.
        Nr\   rK   rc   rd   rV   re   rf   s         r:   rY   zExecutor.collective_rpc   s
    B 	r<   Tc                     d S rN   r\   rh   s         r:   rY   zExecutor.collective_rpc   s	     	r<   c                     t           rN   rO   rh   s         r:   rY   zExecutor.collective_rpc   
     "!r<   c                 ,    |                      d          S )N#get_kv_connector_handshake_metadatarX   rQ   s    r:   rm   z,Executor.get_kv_connector_handshake_metadata   s     ""#HIIIr<   scheduler_outputc                     d S rN   r\   rK   rn   rf   s      r:   execute_modelzExecutor.execute_model   	     	r<   c                     d S rN   r\   rp   s      r:   rq   zExecutor.execute_model   rr   r<   c                 D    |                      d|f|          }|d         S )Nrq   rV   rf   r   rX   )rK   rn   rf   outputs       r:   rq   zExecutor.execute_model   s5     $$#3"5 % 
 
 ayr<   grammar_outputc                     d S rN   r\   rK   rw   rf   s      r:   sample_tokenszExecutor.sample_tokens   rr   r<   c                     d S rN   r\   ry   s      r:   rz   zExecutor.sample_tokens   rr   r<   c                 D    |                      d|f|          }|d         S )Nrz   ru   r   rX   )rK   rw   rf   rv   s       r:   rz   zExecutor.sample_tokens   s4     $$>"3y % 
 
 ayr<   c                 0    |                      d           d S )Nexecute_dummy_batchrX   rQ   s    r:   r~   zExecutor.execute_dummy_batch   s    122222r<   c                 <    |                      d          }|d         S )Ntake_draft_token_idsr   rX   rK   rv   s     r:   r   zExecutor.take_draft_token_ids   s     &*&9&9:P&Q&Qayr<   c                     dS )N   r\   rQ   s    r:   max_concurrent_batcheszExecutor.max_concurrent_batches   s    qr<   is_startc                 6    |                      d|f           d S )NprofilerU   rX   )rK   r   s     r:   r   zExecutor.profile   s#    IXK88888r<   pathpatternmax_sizec                 T    |                      dt          |||                     d S )Nsave_sharded_state)r   r   r   re   )rY   dict)rK   r   r   r   s       r:   r   zExecutor.save_sharded_state   s@     	 T7XFFF 	 	
 	
 	
 	
 	
r<   c                     t           )zPChecks if the executor is healthy. If not, it should raise an
        exception.rO   rQ   s    r:   check_healthzExecutor.check_health   rk   r<   c                 0    |                      d           dS )zShutdown the executor.shutdownNrX   rQ   s    r:   r   zExecutor.shutdown  s    J'''''r<   	connectorr   c                 N    t          j        || j        j                  | _        dS )zInit KVOutputAggregatorN)r   from_connectorr-   
world_sizerJ   )rK   r   s     r:   init_kv_output_aggregatorz"Executor.init_kv_output_aggregator  s(    $6$Et+6%
 %
!!!r<   .c                 <    |                      d          }|d         S )Nget_supported_tasksr   rX   r   s     r:   supported_taskszExecutor.supported_tasks  s"     $$%:;;ayr<   lora_requestc                 v    |j         dk    s
J d            t          |                     d|f                    S )Nr   lora_id must be greater than 0.add_lorarU   )lora_int_idallrY   )rK   r   s     r:   r   zExecutor.add_lora  sB    '!+++-N+++4&&z&HHIIIr<   lora_idc                 l    |dk    s
J d            t          |                     d|f                    S )Nr   r   remove_lorarU   r   rY   rK   r   s     r:   r   zExecutor.remove_lora  s9    {{{={{{4&&}G:&FFGGGr<   c                 l    |dk    s
J d            t          |                     d|f                    S )Nr   r   pin_lorarU   r   r   s     r:   r   zExecutor.pin_lora  s9    {{{={{{4&&z
&CCDDDr<   c                 r    |                      d          }|D ]}||d         k    s
J d            |d         S )N
list_lorasr   z'All workers should have the same LORAs.rX   )rK   setsss      r:   r   zExecutor.list_loras  sI    #22<@@ 	K 	KAQ<<<!J<<<<Awr<   c                 0    |                      d           dS )z+Reset the multi-modal cache in each worker.reset_mm_cacheNrX   rQ   s    r:   r   zExecutor.reset_mm_cache%  s    ,-----r<   r   levelc                 >   | j         rt                              d           d S t          j                    }|                     dt          |                     t          j                    }ddh| _        d| _         t                              d||z
             d S )	NzExecutor is already sleeping.sleep)r   r   weightskv_cacheTz$It took %.6f seconds to fall asleep.)	rG   loggerwarningtimeperf_counterrY   r   rI   info)rK   r   time_before_sleeptime_after_sleeps       r:   r   zExecutor.sleep)  s     	NN:;;;F -//GDu,=,=,=>>>,..'424DGX4X	
 	
 	
 	
 	
r<   tagsc                 0   | j         st                              d           d S |r2|D ]/}|| j        vr$t                              d|| j                    d S 0t	          j                    }|                     dt          |                     t	          j                    }t                              d||z
  ||n| j                   |r |D ]}| j        	                    |           n| j        
                                 | j        s	d| _         d S d S )NzExecutor is not sleeping.z!Tag %s is not in sleeping tags %swake_up)r   r   z(It took %.6f seconds to wake up tags %s.F)rG   r   r   rI   r   r   rY   r   r   removeclear)rK   r   tagtime_before_wakeuptime_after_wakeups        r:   r   zExecutor.wake_up6  s\    	NN6777F 	  d000NN;S$BT   FF	 1
 ".00Idooo>>> -//6 22$DD$*<	
 	
 	

  	' / /"))#..../ $$&&&! 	%$D	% 	%r<   reconfig_requestc                     t           rN   rO   )rK   r   s     r:   reinitialize_distributedz!Executor.reinitialize_distributedQ  s
     "!r<   )r"   N)Nr\   NF)Nr\   NT)F)T)NN)r   r   r"   N)r   rN   )B__name__
__module____qualname____doc__r   bool__annotations__r    staticmethodr   r0   r;   rL   r   rF   listr   rT   FailureCallbackr]   intr_   r   r7   r   rb   r   r   r   r   floattupler	   rY   r   r   rm   r   r   rq   r   rz   r~   r   r   propertyr   r   r   r   r   r   r   r   r   r   r   r   r   rH   r   r   r   r   r   r   r\   r<   r:   r   r   #   s          HdK'z 'd:.> ' ' ' \'RDD 
D D D D& " " " ^"8tM7J 8t 8 8 8 8/    ADI A A A A8Dc;.>)?$@ 8 8 8 8  !%"$)   h
|R/00    	 
 t  5>  
b      X D  !%"#' h
|R/00  	
 t 4= 
R	   X LQ" "EI" " " ^"
J	d3445	6J J J J
 MR  /<CEN	T	!   X
 LP  /<CDM	!D(	)   X DI  /<@	T	!F+<t+C$D	D    PU +d2?Fu~	   X
 OS +d2?Ft}	!	"   X GL +d2?C	V$56	6   3 3 3 3md&:         X9 9 9 9 9 9 ##		
 	
	
 t	
 *		

 
	
 	
 	
 	
 " " " ^"
( ( ( (
 
 
 
 }c'9!:    _
J[ JT J J J JH3 H4 H H H HE E E E E ECH    . . . .
 
3 
 
 
 
% %DI, % % % %6" ="	" " " " " "r<   r   )r6   r*   )6r   abcr   r   collections.abcr   concurrent.futuresr   	functoolsr   typingr   r	   r
   r   vllm.configr   /vllm.distributed.kv_transfer.kv_connector.utilsr   1vllm.distributed.kv_transfer.kv_connector.v1.baser   vllm.loggerr   vllm.lora.requestr   
vllm.tasksr   vllm.utils.import_utilsr   vllm.v1.core.sched.outputr   r   vllm.v1.enginer   vllm.v1.kv_cache_interfacer   r   vllm.v1.outputsr   r   vllm.v1.worker.worker_baser   .vllm.distributed.kv_transfer.kv_connector.baser   r   r   r   r   r   r5   r6   _ExecutorWithExternalLauncherr+   _UniProcExecutorr\   r<   r:   <module>r      sA    # # # # # # # # $ $ $ $ $ $ % % % % % % % % % % % % < < < < < < < < < < < < " " " " " " N N N N N N      $ # # # # # ) ) ) ) ) ) $ $ $ $ $ $ ; ; ; ; ; ; D D D D D D D D 8 8 8 8 8 8 A A A A A A A A < < < < < < < < 1 1 1 1 1 1 ONNNNNN	X		WT]]2t8$q" q" q" q" q"s q" q" q"h	          
 #<   r<   