
    .`iJ,                         d dl mZ d dlmZmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZmZmZ d dlmZ d d	lmZ d d
lmZ  e	e          Z G d d          Z G d de          ZdS )    )contextmanager)AnyLiteralN)
VllmConfig)init_logger)	LoRAModel)LoRAModelManagerLRUCacheLoRAModelManagercreate_lora_manager)
PEFTHelper)LoRARequest)get_adapter_absolute_pathc            
          e Zd ZU dZeZee         ed<   efde	de
j        deeef         dee         fdZed             Zed	efd
            Z	 dde
j        j        de	dz  d	efdZded	efdZdeded	efdZded	efdZdee         dedz  d	dfdZd	efdZdee         d	dfdZ ded	efdZ!ded	efdZ"d Z#d	ee         fdZ$dS ) WorkerLoRAManagerzWorkerLoRAManager that manages LoRA models on the worker side.

    Every request, the requested LoRAs will be loaded (unless they are already
    loaded), and every other LoRA will be unloaded._manager_clsvllm_configdeviceembedding_moduleslora_model_clsc                 .   || _         || _        d| _        |j        j        | _        |j        j        | _        |j                                        | _        |j	        | _	        |j        j
                                        }|j        | _        || _        |  d S NF)_lora_model_clsr   _cached_dummy_lorascheduler_configmax_num_seqsmax_num_batched_tokensmodel_configget_vocab_size
vocab_sizelora_config	hf_configget_text_configmax_position_embeddingsr   )selfr   r   r   r   text_configs         l/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/lora/worker_manager.py__init__zWorkerLoRAManager.__init__    s      .!2EJ'8E(? 	# &2AACC&2 ".8HHJJ'2'J$///    c              #   .   K   d| _         dV  d| _         dS )z_Use this context manager to reuse the dummy lora model
        to avoid creating it repeatedly.NF)r   r$   s    r&   dummy_lora_cachez"WorkerLoRAManager.dummy_lora_cache9   s'       #'"'r(   returnc                     dS )NT r*   s    r&   
is_enabledzWorkerLoRAManager.is_enabledA   s    tr(   Nmodelc           
          t          || j        | j        | j        | j        | j        | j        |          }|| _        |j        S )N)r   r   r   r    r   lora_manager_clsr   )	r   r   r   r   r    r   r   _adapter_managerr0   r$   r0   r   lora_managers       r&   r   z%WorkerLoRAManager.create_lora_managerE   sS    
 +*#'#>(;!.#	
 	
 	
 !-!!r(   lora_requestc                    	 | j         j        }| j         j        }g }|D ]R}||v r|                    ||                    n|                    |           |dk    r|                    |           St          |          }t          |j                  }t          j	        || j
        |j                  }|                    | j                   | j         j        }	t          |	dd           }
| j                            ||||j        d| j        j        | j        |j        |
	  	        }nA# t*          $ r%}t-          d|j         d|j                   |d }~wt0          $ r}|d }~ww xY w|S )Nexpertshf_to_vllm_mappercpu)peft_helperlora_model_idr   dtypemodel_vocab_sizetensorizer_config_dictweights_mapperzLoading lora z failed: No adapter found for )r3   supported_lora_modulespacked_modules_mappingextendappendsetr   	lora_pathr   from_local_dirr#   r?   validate_legalr    r0   getattrr   from_local_checkpointlora_int_id
lora_dtyper   FileNotFoundError
ValueError	lora_name	Exception)r$   r6   rA   rB   expected_lora_lstmoduleexpected_lora_modulesrF   r;   r0   r9   loraes                r&   _load_adapterzWorkerLoRAManager._load_adapterW   s   5	%)%:%Q"%)%:%Q"+-0 5 5333%,,-CF-KLLLL%,,V444Y&&%,,V444$'(9$:$:!1,2HIII$3,3 K &&t'7888 )/E '/BD I I'==%'*6&1!%'3'J0 > 
 
DD ! 		 		 		 6 6 6 6)36 6    	 	 	G	 s$   D%D( (
E&2 EE&E!!E&rankc                 L   |j         |                                 v rdS t          | j        t                    r | j                            |j                   }n4| j                            |j         || j                  }| j        || _        | j        	                    |          S r   )
rK   list_adapters
isinstancer   r   cloner3   create_dummy_lorar   add_adapter)r$   r6   rW   
dummy_loras       r&   add_dummy_loraz WorkerLoRAManager.add_dummy_lora   s    #t'9'9';';;;5d-y99 	5066|7OPPJJ.@@($0F J &.*4'$00<<<r(   
adapter_idc                 6    | j                             |          S N)r3   pin_adapterr$   r`   s     r&   rc   zWorkerLoRAManager.pin_adapter   s    $00<<<r(   requestsmappingc                 l    |                      |           || j                            |           d S d S rb   )_apply_adaptersr3   set_adapter_mapping)r$   re   rf   s      r&   set_active_adaptersz%WorkerLoRAManager.set_active_adapters   sA    X&&&!55g>>>>> r(   c                 2    | j         j        o| j         j        S rb   )r3   supports_mmsupports_tower_connector_lorar*   s    r&   rm   z/WorkerLoRAManager.supports_tower_connector_lora   s    !- D%C	
r(   adapter_requestsc                 x   |                                  }d |D             }t          |          | j        j        k    r-t	          dt          |           d| j        j         d          t          |          }||z
  D ]}|                     |           ||z
  D ]}|                     ||                    d S )Nc                 "    i | ]}||j         |S r.   )r`   ).0adapter_requests     r&   
<dictcomp>z5WorkerLoRAManager._apply_adapters.<locals>.<dictcomp>   s3     
 
 

&
 
 
r(   zNumber of requested models (z1) is greater than the number of GPU model slots ().)rY   lenr3   adapter_slotsRuntimeErrorrE   remove_adapterr]   )r$   rn   existing_adapters
models_maprequested_idsr`   s         r&   rh   z!WorkerLoRAManager._apply_adapters   s     ..00
 
#3
 
 


 z??T2@@@<s: < <)7< < <  
 J+m; 	, 	,J
++++'*;; 	5 	5JZ
34444	5 	5r(   rr   c                     |j         |                                 v rdS |                     |          }| j                            |          }| j                            |j                   |S r   )r`   rY   rV   r3   r]   activate_adapterid)r$   rr   loaded_adapterloadeds       r&   r]   zWorkerLoRAManager.add_adapter   si    %););)=)===5++O<<&22>BB..~/@AAAr(   c                 6    | j                             |          S rb   )r3   rx   rd   s     r&   rx   z WorkerLoRAManager.remove_adapter   s    $33J???r(   c                 8    | j                                          d S rb   )r3   remove_all_adaptersr*   s    r&   r   z%WorkerLoRAManager.remove_all_adapters   s    1133333r(   c                 N    t          | j                                                  S rb   )rE   r3   rY   r*   s    r&   rY   zWorkerLoRAManager.list_adapters   s    4(6688999r(   rb   )%__name__
__module____qualname____doc__r	   r   type__annotations__r   r   torchr   dictstrr'   r   r+   propertyboolr/   nnModuler   r   r   rV   intr_   rc   rE   rj   rm   rh   r]   rx   r   rY   r.   r(   r&   r   r      sg        7 7
 ,<L$'(;;; +40 00 0  S>	0
 Y0 0 0 02 ( ( ^( D    X *." "x"  $&" 
	" " " "$8+ 8) 8 8 8 8t=; =c =d = = = ==c =d = = = =?CH ?sTz ?d ? ? ? ?

t 
 
 
 
5C 5T 5 5 5 5&3 4    @ @ @ @ @ @4 4 4:s3x : : : : : :r(   r   c                       e Zd ZU dZeZee         ed<   	 ddej	        j
        dedz  defdZdee         ddfd	Zd
edefdZdS )LRUCacheWorkerLoRAManagera   WorkerLoRAManager that manages LoRA models on the worker side.

    Uses an LRU Cache. Every request, the requested LoRAs will be loaded
    (unless they are already loaded) and least recently used LoRAs will
    be unloaded if the cache is above capacity.r   Nr0   r   r,   c           
          t          || j        | j        | j        | j        | j        | j        |          }|| _        |j        S )N)r2   r   r   r    r   r   r   )	r   r   r   r   r    r   r   r3   r0   r4   s       r&   r   z-LRUCacheWorkerLoRAManager.create_lora_manager   sS    
 +!.*(;#'#>#	
 	
 	
 !-!!r(   lora_requestsc                 
   d |D             }t          |          | j        j        k    r-t          dt          |           d| j        j         d          |                                D ]}|                     |           d S )Nc                 "    i | ]}||j         |S r.   )rK   )rq   r6   s     r&   rs   z=LRUCacheWorkerLoRAManager._apply_adapters.<locals>.<dictcomp>   s3     
 
 

$l
 
 
r(   zNumber of requested LoRAs (z0) is greater than the number of GPU LoRA slots (rt   )ru   r3   
lora_slotsrw   valuesr]   )r$   r   	loras_maprT   s       r&   rh   z)LRUCacheWorkerLoRAManager._apply_adapters   s    
 
 -
 
 
	
 y>>D1<<<9c)nn 9 9)49 9 9  
 $$&& 	# 	#DT""""	# 	#r(   r6   c                    |j         |                                 vs|j        r|                     |          }| j                            |j                   t          | j                  dz   | j        j        k    r5t          | j        t                    sJ | j                                         | j                            |          }n!| j                            |j                   d u}| j                            |j                    |S )N   )rK   rY   load_inplacerV   r3   rx   r~   ru   capacityrZ   r
   remove_oldest_adapterr]   get_adapterr}   )r$   r6   rT   r   s       r&   r]   z%LRUCacheWorkerLoRAManager.add_adapter   s	    $D,>,>,@,@@@( A %%l33D !00999 4())A-0E0NNN!$"79QRRRRR%;;===*66t<<FF
 %11,2JKKSWW  	..|/GHHHr(   rb   )r   r   r   r   r
   r   r   r   r   r   r   r   r   r   rE   r   rh   r   r]   r.   r(   r&   r   r      s         3 3 4LL$/0KKK
 *." "x"  $&" 
	" " " "$#S-= #$ # # # #" " " " " " " "r(   r   )
contextlibr   typingr   r   r   vllm.configr   vllm.loggerr   vllm.lora.lora_modelr   vllm.lora.model_managerr	   r
   r   vllm.lora.peft_helperr   vllm.lora.requestr   vllm.lora.utilsr   r   loggerr   r   r.   r(   r&   <module>r      sY   & % % % % %          " " " " " " # # # # # # * * * * * *         
 - , , , , , ) ) ) ) ) ) 5 5 5 5 5 5	X		v: v: v: v: v: v: v: v:rL L L L L 1 L L L L Lr(   