
    .`i1                     H   d dl Z d dlmZ d dlmZ d dlZd dlZd dlmZ d dl	m
Z
 d dlmZmZ d dlmZ d dlmZmZmZmZ d d	lmZ d d
lmZmZ d dlmZmZ d dlmZmZm Z m!Z!m"Z"m#Z# d dl$m%Z% d dl&m'Z'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z0 d dl1m2Z2 d dl3m4Z4 d dl5m6Z6  ee7          Z8 ed          Z9dZ: G d de2e;e9f                   Z< G d d          Z= G d de<e                   Z> G d de=          Z?e=fd ej@        d!e;d"e;d#e;d$ed%e
d&ejA        d'eBe=         d(e=fd)ZCdS )*    N)Callable)TypeVar)nn)
VllmConfig)
LoRAConfigModelConfig)init_logger)BaseLayerWithLoRAFusedMoE3DWithLoRALoRAMappingLoRAMappingType)	LoRAModel)LoRALayerWeightsPackedLoRALayerWeights)PunicaWrapperBaseget_punica_wrapper)
from_layerfrom_layer_logits_processorget_supported_lora_modulesis_moe_modelprocess_packed_modules_mappingreplace_submodule)FusedMoE)SupportsLoRAsupports_multimodal)is_pooling_model)MultiModelKeys)PPMissingLayer)MULTIMODAL_REGISTRY)LRUCache)is_pin_memory_available)MultiModalBudgetTlanguage_modelc                   T     e Zd Zdedeegef         f fdZdededz  f fdZ xZ	S )AdapterLRUCachecapacitydeactivate_fnc                 X    t                                          |           || _        d S N)super__init__r(   )selfr'   r(   	__class__s      k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/lora/model_manager.pyr,   zAdapterLRUCache.__init__1   s)    """*    keyvalueNc                     t                               d|           |                     |           t                                          ||          S )NzRemoving adapter int id: %d)loggerdebugr(   r+   
_on_remove)r-   r1   r2   r.   s      r/   r6   zAdapterLRUCache._on_remove5   sF    2C8883ww!!#u---r0   )
__name__
__module____qualname__intr   objectr,   r#   r6   __classcell__r.   s   @r/   r&   r&   0   s        + +XseVm5L + + + + + +.c .!d( . . . . . . . . . .r0   r&   c                      e Zd ZdZ	 d5dedededededej        d	e	dz  fd
Z
ded	e	ddfdZd	e	deddfdZdefdZedefd            Zedefd            Zedefd            ZdedefdZdefdZdefdZdedefdZdeddfdZd Zd ZdeddfdZed ee dz           dee dz           fd!            Z!	 d5ded"ed#e"eef         dz  defd$Z#defd%Z$dede%dz  fd&Z&d'eddfd(Z'd)eddfd*Z(d)ede)defd+Z*d)edede dz  fd,Z+d-edefd.Z,d/edefd0Z-deddfd1Z.d-edefd2Z/de"eef         fd3Z0d-ededz  fd4Z1dS )6LoRAModelManagerz7A manager that manages multiple LoRA-fine-tuned models.Nmodelmax_num_seqsmax_num_batched_tokens
vocab_sizelora_configdevicevllm_configc                    || _         t          | j                   | _        | j        sJ d| j         j        j         d            i | _        i | _        d| _        || _        || _	        || _
        | j        | j        k    sJ t          j        |dz            dz  | _        dg| j        z  | _        || _        t%          | j                   | _        t)          | j                   | _        i | _        i | _        d| _        t1          | j                   }|o| j         j        | _        |o| j         j        | _        |                     ||           |                                  | | j         _        dS )a  Create a LoRAModelManager and adapter for a given model.

        Args:
            model: the model to be adapted.
            max_num_seqs: the maximum number of sequences model can run in a
                single batch.
            max_num_batched_tokens: the maximum number of tokens model can run
                in a single batch.
            vocab_size: the vocab size of the model.
            lora_config: the LoRA configuration.
        z#No supported LoRA modules found in .LoRA   N) r@   r   supported_lora_modulesr.   r7   _registered_adapters_active_adaptersadapter_typerD   rE   rA   r'   
lora_slotsmathceilrB   lora_index_to_idrC   r   packed_modules_mappingr   packed_modulesmodules_last_mappingr   is_3d_moe_weight_is_3d_moe_modelis_non_gated_moe_is_non_gated_moe_init_punica_wrapper_create_lora_moduleslora_manager)	r-   r@   rA   rB   rC   rD   rE   rF   is_moes	            r/   r,   zLoRAModelManager.__init__>   sr   * $)
&@&L&L#* 	
 	
R$*2F2ORRR	
 	
* ;=!13"&(}////&*i0F0J&K&Ka&O#37&4?2J$&DTZ&P&P# 0 < <465715dj)) & F4:+F!'!GDJ,G!!"8+FFF!!###"&
r0   returnc                    t          | j                  ot          | j        d          | _        i | _        | j        r|                     ||           d S t          || j        | j        | j	                  }|| j        t          <   d S )Nget_mm_mappingmax_batchesrE   rD   )r   r@   hasattrsupports_mmpunica_wrapper_mapping_maybe_init_mmr   rA   rE   rD   DEFAULT_LANGUAGE_WRAPPER_KEY)r-   rB   rF   llm_punica_wrappers       r/   r[   z%LoRAModelManager._init_punica_wrappers   s    
  
++ 6 
$455	 	 EG# 	-CDDDDD!3& -{ ,	" " " # '(DEEEr0   c                    |j         }t          }d| _        | j                                        | _        t          | j        j                  dk    sJ t          || j	        | j
        | j                  }| j        j        d         }|| j        |<   | j        j        r@|                    |          j        | _        | j        ot%          | j        d          | _        | j        sd S t&                              d           t+          ||          }t-          | j                                                                                  }| j                            |                                          }	t          |	| j	        |z  | j
        | j                  }
| j        j        D ]}|
| j        |<   | j        j        rt%          | j        d          rZ| j                            |	          }t          || j	        |z  | j
        | j                  }| j        j        D ]}|| j        |<   d S t&                              d           d S d S )	NF   rb   r   get_num_mm_encoder_tokenszLoRA for the tower and connector of multimodal models is experimental and may contain bugs. Please report any related issues on GitHub if you encounter them.get_num_mm_connector_tokenszConnector LoRA support disabled: model does not implement get_num_mm_connector_tokens(). This method is required to determine the connector's token budget for LoRA operations.)model_configr   supports_tower_connector_lorar@   ra   
mm_mappinglenr$   r   rA   rE   rD   rf   enable_tower_connector_loracreate_processorinfomm_processor_infore   rd   r4   warningr"   maxget_allowed_mm_limitsvaluesrl   get_encoder_budgettower_model	connectorrm   warning_once)r-   rF   rB   rn   mm_registryri   	lm_prefix	mm_budgetlimit_per_promptnum_encoder_tokenstower_punica_wrapperprefixconnector_tokensconnector_punica_wrappers                 r/   rg   zLoRAModelManager._maybe_init_mm   s   
 %0$<)-2**.**C*C*E*E 4?122a7777 0");(	
 
 
 O215	1C#I.7 	%0%A%A,%O%O%TD"151A 2g
7G GD. 1 	F,	
 	
 	
 %[+>>	 #"88::AACC!
 !
 "ZAA((**
 

  2),<<;(	 
  
  
 o1 	G 	GF2FD'// ?$ 	tz#@AA #':#I#I&$ $  ,>$ $ 14D D; $ 0	, , ,( #o7 S SF:RD/77S S ##R    	 	r0   c                 *    t          | j                  S r*   )rq   rL   r-   s    r/   __len__zLoRAModelManager.__len__   s    4,---r0   c                     | j         j        S r*   )rD   max_cpu_lorasr   s    r/   r'   zLoRAModelManager.capacity   s    --r0   c                     | j         j        S r*   )rD   	max_lorasr   s    r/   rO   zLoRAModelManager.lora_slots   s    ))r0   c                     | j         S r*   )rO   r   s    r/   adapter_slotszLoRAModelManager.adapter_slots   s
    r0   lora_idc                 
   || j         v rdS t          d t          | j                  D             d          }|t	          d          |\  }}d| j         |<   | j        |         }t                              d|j        |           |j        | j        |<   | j	        
                                D ]T\  }}|                     ||          }|s|                    |           3|                    ||j        |j                   UdS )z;Move LoRA into a GPU buffer to be used in the forward pass.Fc              3   (   K   | ]\  }}|||fV  d S r*    ).0ir   s      r/   	<genexpr>z4LoRAModelManager.activate_adapter.<locals>.<genexpr>   s8        Aw? G"??? r0   NzNo free lora slotsz+Activating LoRA. int id: %d, slot index: %dT)rM   next	enumeraterR   
ValueErrorrL   r4   r5   idrU   items_get_lora_layer_weights
reset_loraset_loralora_alora_b)	r-   r   first_free_slotindex_
lora_modelmodule_namemodulemodule_loras	            r/   activate_adapterz!LoRAModelManager.activate_adapter   sC   
 d+++5 "+D,A"B"B  
 
 
 "1222"q)-g&.w7
9:=%	
 	
 	
 (2}e$#'<#5#5#7#7 
	 
	K66z;OOK !!%(((OO""    tr0   c                 r    	 | j                             |          }d | j         |<   d S # t          $ r Y d S w xY wr*   )rR   r   r   )r-   r   r   s      r/   _deactivate_adapterz$LoRAModelManager._deactivate_adapter  sR    	)//88E+/D!%((( 	 	 	DD	s   $( 
66lorac                 N    |                      |           || j        |j        <   d S r*   )_create_merged_loras_inplacerL   r   )r-   r   s     r/   _add_adapterzLoRAModelManager._add_adapter  s+    ))$///-1!$'***r0   c                      t          d          )%Pin a LoRAModel in the manager cache.zVPinning is not supported in LoRAModelManager. Use LRUCacheLoRAModelManager for pinning)NotImplementedErrorr-   r   s     r/   pin_adapterzLoRAModelManager.pin_adapter  s    !7
 
 	
r0   mappingc                    | j         r| j        s!| j         r| j        j        d         nt          }nz|j        t          j        k    r| j        j        r| j        j        d         }nF|j        t          j	        k    r| j        j
        r| j        j
        d         }n| j        j        d         }|                     |          }|J |                    || j        | j        dz   | j                   d S )Nr   rk   )re   ro   rp   r$   rh   typer   TOWERr{   	CONNECTORr|   _get_punica_wrapperupdate_metadatarR   rO   rC   )r-   r   target_prefixpunica_wrappers       r/   _set_adapter_mappingz%LoRAModelManager._set_adapter_mapping   s      	>T%G 	> #2.q111 M
 \_222t7R2 O7:MM\_6664?;T6 O5a8MM O:1=M11-@@)))&&!OaO		
 	
 	
 	
 	
r0   c                     | j                                          dg| j        z  | _        | j                                         dS )z'Remove all LoRAModels from the manager.N)rL   clearrO   rR   rM   r   s    r/   remove_all_adaptersz$LoRAModelManager.remove_all_adapters9  sC    !'')))!% 8##%%%%%r0   c                    dt           dt           fd}| j                            d          D ]\  }}t          |t                    r|                     |          s2|                     |          }|,t                              d| j        j	        j
        |           u| j        r3|                    d          rt                              d|d	
           |                    d          d         }| j                            |g           }t          |t"                    r| j        rdgnddg}t'          | j        |t)          || j        | j        || j        j                            }d|v rkd} ||          }	|	r|	 d| }| j                            |          }
t'          | j        |t3          |
|| j        | j        | j        j                            }| j        rt          |t6                    s|                     ||           |                     |           |                    |           d S )Nr   r_   c                 8    |                      d          d         S )NrH   r   )
rpartition)r   s    r/   _parent_modulez=LoRAModelManager._create_lora_modules.<locals>._parent_module@  s    
 ))#..q11r0   F)remove_duplicatez]Regarding %s, vLLM currently only supports adding LoRA to language model, %s will be ignored.z
mixer.gatezHLoRA is not supported for non-gated MoE gate module. %s will be ignored.local)scoperH   w13w1w3lm_headlogits_processor)strr@   named_modules
isinstancer   _match_target_modulesr   r4   rv   r.   r7   rZ   endswith
debug_oncesplitrS   getr   rX   r   r   rO   rD   configget_submoduler   re   r
   register_module_register_packed_modulesset_mapping)r-   r   r   r   r   partspacked_moduled_lst
new_modulelogits_processor_module_nameparent_modulelogits_processor_modules              r/   r\   z%LoRAModelManager._create_lora_modules?  s   	2 	2 	2 	2 	2 	2 $(:#;#;U#;#S#S X	3 X	3K&.11 --k:: !55kBBN%;J(1	    % +*>*>|*L*L !!+!	 "    %%c**2.E!%!<!@!@!K!K&(++ X 150E%WeWWDRV<"*
O$&J% 
 
J K''/A, .{ ; ;  (II+GII 1 +/**B*B0+ +' /J0//(
) 
 

"  
:?P(Q(Q   j999))+666"">2222qX	3 X	3r0   r   r   r
   c                 ~    t          |t                    sJ d| dt          |                       || j        |<   d S )NzModule z+ must be a BaseLayerWithLoRA instance, got )r   r
   r   rU   )r-   r   r   s      r/   r   z LoRAModelManager.register_module  s_    &"344 	
 	
"k " "<<" "	
 	
4 %+[!!!r0   lorasc                     t          |           dz  dk    s
J d            g }t          dt          |           d          D ]7}|                    | ||dz                       |                    d           8|S )zPad LoRA weight pairs to triplets for non-gated MoE.

        For non-gated MoE, each expert has 2 entries (w1, w2) that need to be
        padded to triplets (w1, w2, None) to match pack_moe expectations.
           r   z1Expected pairs of LoRA weights for non-gated MoE.N)rq   rangeextendappend)r   paddedr   s      r/   _pad_lora_pairs_to_tripletsz,LoRAModelManager._pad_lora_pairs_to_triplets  s     5zzA~"""$W"""02q#e**a(( 	  	 AMM%AE	*+++MM$r0   rankembedding_modulesc           
      $   t          ||i           }| j                                        D ]c\  }}|                     |          r*t	          |t
                    r|                     |          F|                    d          }|| j        vr|J |d         |v rt          |j
        d          r|j
        j        n|j
        j        j        d         }t          |j
        d          r|j
        j        n|j
        j        j        d         }	t          j        |||	||j        d         j        d          }
|
|j        |<   |j        j        d	k    rt          j        ||j        |j        ||j        d         j        d         z  |j        d         j        d          }
|
|j        |<   t          j        ||j        |j        ||j        d         j        d         z  |j        d         j        d          }
|
|j        |d
z   <   t          j        ||j        d         j        d         |j        d         j        d         ||j        d         j        d          }
|
|j        |<   :|                    d          }| j        |d                  }g }t=          |          D ]u\  }}t          j        |dz   |z   |j        |         j        d         |j        |         j        d         ||j        |         j        d          }
|                    |
           v|j        j        dk    rL| j         r(tC          |          dk    r| "                    |          }tG          j$        ||| j                   }
ntG          j%        |          }
|
|j        |<   e|S )z-Create zero-initialized LoRAModel for warmup.NrH   r   org_vocab_sizerk   embedding_dimr   cpur   .base_layerFusedMoEWithLoRArY   )&r   r@   r   r   r   r
   r   r   rT   rd   
base_layerr   weightshaper   r   create_dummy_lora_weightslora_a_stackeddtyper   r.   r7   w2_input_sizew2_output_sizew2_lora_a_stackedw13_input_sizew13_output_sizew13_lora_a_stackedlora_b_stackedrS   r   r   rZ   rq   r   r   pack_moepack)r-   r   r   r   r@   r   r   r   	input_dim
output_dimr   replacementssublorasr   rs                  r/   create_dummy_loraz"LoRAModelManager.create_dummy_lora  s    '4,,#':#;#;#=#= W	0 W	0K..{;;!&*;<< ++K88@%%c**E$"555(4449 111 #6#46FGG?)88#.5;A>  #6#4oFF?)77#.5;A> 
 ,E#!"-a06 D 04EK,,%.2FFF ,E#,-v7:@CC039 D 04EK,+E#-. 3A6<Q?@1!4: D @DEKm ;<<+E#-a06r:-a06r:-a06 D 04EK,,#))#..#:59E:<%l33 	* 	*DAq+E#c)A--a06r:-a06r:-a06 D OOD))))#,0BBB - N#h--!2C2C#'#C#CH#M#M1: +@V  DD 26x@@D+/K((r0   c                 D    t          fd| j        D                       S )Nc              3   v   K   | ]3}t          j        d                     |                    p|k    V  4dS )z.*\.{target_module}$)target_moduleN)rematchformat)r   r  r   s     r/   r   z9LoRAModelManager._match_target_modules.<locals>.<genexpr>  sj       
 

 	 H'..].KK[  , +	
 
 
 
 
 
r0   )anyrK   )r-   r   s    `r/   r   z&LoRAModelManager._match_target_modules  s@     
 
 
 

 "&!<
 
 
 
 
 	
r0   c                     | j         s| j        t                   S t          | j                                        t
          d          D ]&}|                    |          r| j        |         c S 'dS )zW
        Determine whether this module supports LoRA and which wrapper to use.
        T)r1   reverseN)re   rf   rh   sortedkeysrq   
startswith)r-   r   r   s      r/   r   z$LoRAModelManager._get_punica_wrapper#  s    
  	M./KLL
 T8==??SRVWWW 	; 	;F%%f-- ;26::::; tr0   module_full_namec                    |                     d          }|d         }| j                            |g           }t          |          dk    rd S d                    |d d                   fd|D             | j        |<   d S )NrH   r   rk   c                 (    g | ]}rd z   |z   n|S )rH   r   )r   r  r   s     r/   
<listcomp>z=LoRAModelManager._register_packed_modules.<locals>.<listcomp>=  s9     1
 1
 1
23-FSL1A1
 1
 1
r0   )r   rS   r   rq   joinrT   )r-   r  r   r   r  r   s        @r/   r   z)LoRAModelManager._register_packed_modules4  s     &&s++Bi266{BGG |!!F%*%%1
 1
 1
 1
7C1
 1
 1
,---r0   r   c                 h   | j                                         D ]\  }}g }t                      }d}|D ]F}|                     ||          }|                    |           |rd}|                    |           G|sdt          t          |                    D ]}	||	         rd ||	<   | j        rB|	                    |          s-|
                    dd          }
|	                    |          r|
}|                    d          rT| j        r(t          |          dk    r|                     |          }t          j        ||| j                  |j        |<   nt          j        |          |j        |<   |D ]}|j                            |d            |j                                        D ]}|                                 | j                                        D ]1\  }}t-          |t.                    r|                     |||           2t3          t5          |j                                                            }|j        J t-          |j        t8                    r"t3          t5          |j                            }n|j        j        }t=          |          dk    ot?                      }|r|j                                        D ]}t-          |j        t8                    rt          t          |j                            D ]^}|j        |         |j        |                                          |j        |<   |j!        |                                          |j!        |<   _|j                                         |_        |j!                                         |_!        d S d S )	NFTmodel. z.expertsr   r   r   )"rT   r   setr   r   addr   rq   r   check_lora_namereplacer   rZ   r   r   r  r   r  popry   optimizerU   r   r   _stack_moe_lora_weightsr   iterr   listrE   r   r!   
pin_memoryr   )r-   r   r   new_module_namesreplacement_lorasreplaced_modulehas_replacementr  r   r   replaced_module_namer   
first_loralora_devicer&  r   s                   r/   r   z-LoRAModelManager._create_merged_loras_inplaceA  s   -1-@-F-F-H-H %	3 %	3)K)?A(+O#O% + +33JBB!((... +&*O#''***" 301122 , ,$Q' '+!!$$$ 7Z-G-G-T-T 7'2':':8R'H'H$--k:: 7"6K##J// ) c2C.D.Dq.H.H(,(H(H)) )% 1G0O%%)%;1 1 1
 -- 1G0K%1 1
 - * 3 3 $$VT22223 $++-- 	 	DMMOOOO#'<#5#5#7#7 	N 	NK&"455 N,,ZMMM'+D1A1H1H1J1J,K,K'L'L
 ,,,j'.. 	3tJ$56677KK$+2K %%.L3J3L3L
 
	;"(//11 	; 	;dk400 ;!&s4;'7'7!8!8 M M;u-5$-1[-?-J-J-L-LE*-1[-?-J-J-L-LE**	M #'+"8"8":":DK"&+"8"8":":DKK
	; 
	;	; 	;r0   c                    |                      ||          }|rOt          j        |j                  r7|                      ||dz             }|}|J |J | j        rj|j        d         j        d         }|j                            |d|j        j        d                   |_        |j                            |d|j        j        d                   |_        |j                            |j        j        d         d|          |_        |j                            |j        j        d         d|          |_        |j        	                    ddd          
                                |_        |j        	                    ddd          
                                |_        |j        |j        g|_        |j        |j        g|_        d S |j        j        d         |j        z  }|j                            |d          }|j                            |d          }	|j        d d ddf                             |d          }
|j        dd ddf                             |d          }|j                            |d          }|j                            |d          }g }g }t          |          D ]}|                    ||                    |                    ||                    |                    |	|                    |                    |
|                    |                    ||                    |                    ||                    ||_        ||_        d S d S d S )Nr   r   rk   r   r   )dim.)r   torch	is_tensorr   rX   r   r   reshaper   permute
contiguousr   chunkr   r   )r-   r   r   r   r   gate_up_proj_loradown_proj_loranum_expertsgate_proj_a	up_proj_agate_proj_b	up_proj_bdown_proj_adown_proj_br   r   r   s                    r/   r#  z(LoRAModelManager._stack_moe_lora_weights  s    22:{KK  L	,5?;+=>> L	, !% < <K-7! ! )N %000!---$ A,$7:@C ,=+C+K+K%6%=%CB%G, ,!( )7(=(E(E^%:%@%D) )%
 ,=+C+K+K%,215r;, ,!( )7(=(E(E")/2B) )%
 ,=+C+K+Kq!, ,*,, "( )7(=(E(Eq!) )*,, %
 &,")&"
 &,")&""" *06q9[=MM/6<<[a<PP-4::;A:NN	/6sssCx@FFR G   .4QTT3Y?EER F  	 -399+19MM,399+29NN{++ 0 0AMM+a.111MM+a.111MM)A,///MM+a.111MM+a.111MM)A,////%+"%+"""YL	, L	, L	, L	,r0   c                     |}| j         r\|                    |          sG|                    dd          }|                    |          r|}t                              d           |                    |          S )Nr  r  z\For the pool model, successfully loaded the LoRA weights after removing the prefix 'model.'.)r   r  r   r4   	info_onceget_lora)r-   r   r   org_module_names       r/   r   z(LoRAModelManager._get_lora_layer_weights  s     &  		)C)CK)P)P 		 &--h;;K))+66 "-  :   ""?333r0   
adapter_idc                 |    || j         vrdS |                     |           | j                             |d            dS NFT)rM   r   r!  r-   rC  s     r/   deactivate_adapterz#LoRAModelManager.deactivate_adapter  sG    T2225  ,,,!!*d333tr0   adapterc                     t                               d|j        |j                   |j        | j        v rdS t	          | j                  | j        k    rt          d          |                     |           dS )N%Adding lora. Model id: %d, int id: %dFzNo free adapter slots.T)r4   r5   r   rL   rq   r'   RuntimeErrorr   )r-   rH  s     r/   add_adapterzLoRAModelManager.add_adapter  sr    <gj'*UUU:2225t())T]::7888'"""tr0   c                 X    | j         |k    r|                     |           || _         d S d S r*   )rV   r   )r-   r   s     r/   set_adapter_mappingz$LoRAModelManager.set_adapter_mapping  s:    ((%%g...!(D )(r0   c                 |    |                      |           || j        vrdS | j                            |d            dS rE  )rG  rL   r!  rF  s     r/   remove_adapterzLoRAModelManager.remove_adapter  sG    
+++T6665!%%j$777tr0   c                 *    t          | j                  S r*   )dictrL   r   s    r/   list_adapterszLoRAModelManager.list_adapters  s    D-...r0   c                 6    | j                             |          S r*   )rL   r   rF  s     r/   get_adapterzLoRAModelManager.get_adapter  s    (,,Z888r0   r*   )2r7   r8   r9   __doc__r   r:   r   r0  rE   r   r,   r[   rg   r   propertyr'   rO   r   boolr   r   r   r   r   r   r   r   r\   r   r   staticmethodr%  r   r   rR  r  r   r   r   r   r   r   r#  r   rG  rL  rN  rP  rS  rU  r   r0   r/   r?   r?   ;   s       AA *.3' 3'3' 3' !$	3'
 3'  3' 3'  $&3' 3' 3' 3'j&)8B	   2KK !$K 
	K K K KZ. . . . . .# . . . X. *C * * * X* s    X$$ 
$ $ $ $L3    2 2 2 2 2
3 
4 
 
 
 

K 
D 
 
 
 
2& & &`3 `3 `3D+3 +8K + + + + $t+,	%	&   \& 48	` `` `  S>D0	`
 
` ` ` `D
 
 
 
 
s 7H47O    "
 
 
 
 
 
G;y G;T G; G; G; G;RS,#S,-?S,NQS, S, S, S,j4#4254	D	 4 4 4 4 S T    9     ); )4 ) ) ) )
     /tCN3 / / / /9c 9i$.> 9 9 9 9 9 9r0   r?   c                   :     e Zd Zdedeegef         f fdZ xZS )LoRALRUCacher'   deactivate_lora_fnc                 L    t                                          ||           d S r*   )r+   r,   )r-   r'   r\  r.   s      r/   r,   zLoRALRUCache.__init__  s$    #566666r0   )r7   r8   r9   r:   r   rX  r,   r<   r=   s   @r/   r[  r[    sV        7 7(C5$;:O 7 7 7 7 7 7 7 7 7 7r0   r[  c                        e Zd ZdZ	 ddej        dededededej	        d	e
dz  f fd
Zdeeef         fdZdedefdZdedef fdZdefdZdedefdZdefdZdefdZ xZS )LRUCacheLoRAModelManagerz;A model manager that manages multiple LoRAs with LRU cache.Nr@   rA   rB   rC   rD   rE   rF   c           	          t                                          |||||||           t          | j        | j                  | _        t          | j        | j                  | _        d S r*   )	r+   r,   r[  r'   rG  rL   rO   r   rM   )	r-   r@   rA   rB   rC   rD   rE   rF   r.   s	           r/   r,   z!LRUCacheLoRAModelManager.__init__  sx     	"	
 	
 	
 3?M423
 3
! /;OT5/
 /
r0   r_   c                 4    t          | j        j                  S )zList all registered LoRAModels.)rR  rL   cacher   s    r/   rS  z&LRUCacheLoRAModelManager.list_adapters4  s    D-3444r0   r   c                     t                               d|j        |j                   |j        | j        vr|                     |           d}n!| j                            |j                   d}|S )zAdd a LoRAModel to the manager.rJ  TF)r4   r5   r   rL   r   touch)r-   r   	was_addeds      r/   rL  z$LRUCacheLoRAModelManager.add_adapter8  sn    <dgtwOOO7$333d###II %++DG444Ir0   r   c                     || j         vr6t          | j                   | j        k    r| j                                          t	                                          |          }| j                             |           |S r*   )rM   rq   rO   remove_oldestr+   r   rd  )r-   r   resultr.   s      r/   r   z)LRUCacheLoRAModelManager.activate_adapterD  ss    
 4000D)**do==!//111))'22##G,,,r0   c                 l    t          | j                  dk    r| j                                         dS dS )Nr   TF)rq   rL   rg  r   s    r/   remove_oldest_adapterz.LRUCacheLoRAModelManager.remove_oldest_adapterR  s7    t())A--%335554ur0   c                 Z    |                      |           |                     |           dS )r   T)_pin_lora_in_cpu_cache_pin_lora_in_gpu_cacher   s     r/   r   z$LRUCacheLoRAModelManager.pin_adapterX  s/    ##G,,,##G,,,tr0   c                     	 | j                             |           d S # t          $ r}t          d| d          |d }~ww xY w)NzPinning failed. LoRA z is not registered.)rL   pinr   )r-   r   errs      r/   rl  z/LRUCacheLoRAModelManager._pin_lora_in_cpu_cache^  sf    	%))'22222 	 	 	DDDD 	s    
A<Ac                 v    || j         vr|                     |           | j                             |           d S r*   )rM   r   ro  r   s     r/   rm  z/LRUCacheLoRAModelManager._pin_lora_in_gpu_cachef  sA    $///!!'***!!'*****r0   r*   )r7   r8   r9   rV  r   Moduler:   r   r0  rE   r   r,   rR  r   rS  rX  rL  r   rj  r   rl  rm  r<   r=   s   @r/   r_  r_    s       EE *.
 
y
 
 !$	

 
  
 
  $&
 
 
 
 
 
45tCN3 5 5 5 5
	 
d 
 
 
 
 
     t    3 4    c    +c + + + + + + + +r0   r_  r@   rA   rB   rC   rD   rF   rE   lora_manager_clsr_   c                     t          | t                    s t          dt          |            d           |d| ||||||d|}	|	S )z(Create a LoRA adapter for a given model.zModel z is not supported for LoRA.)r@   rA   rB   rC   rD   rF   rE   r   )r   r   r   r   )
r@   rA   rB   rC   rD   rF   rE   rs  kwargsr]   s
             r/   create_lora_managerrv  n  sz     e\** LJ$u++JJJKKK## 	!5	 	 	 	L r0   )DrP   collections.abcr   typingr   regexr  r0  r   vllm.configr   vllm.config.lorar   r   vllm.loggerr	   vllm.lora.layersr
   r   r   r   vllm.lora.lora_modelr   vllm.lora.lora_weightsr   r   vllm.lora.punica_wrapperr   r   vllm.lora.utilsr   r   r   r   r   r   $vllm.model_executor.layers.fused_moer   vllm.model_executor.modelsr   r   %vllm.model_executor.models.interfacesr   )vllm.model_executor.models.module_mappingr    vllm.model_executor.models.utilsr   vllm.multimodalr   vllm.utils.cacher    vllm.utils.platform_utilsr!   vllm.v1.worker.utilsr"   r7   r4   r#   rh   r:   r&   r?   r[  r_  rr  rE   r   rv  r   r0   r/   <module>r     s    $ $ $ $ $ $                  " " " " " " 4 4 4 4 4 4 4 4 # # # # # #            + * * * * * K K K K K K K K J J J J J J J J                : 9 9 9 9 9 H H H H H H H H B B B B B B D D D D D D ; ; ; ; ; ; / / / / / / % % % % % % = = = = = = 1 1 1 1 1 1	X		GCLL/ . . . . .hsAv& . . .T9 T9 T9 T9 T9 T9 T9 T9n7 7 7 7 7?9- 7 7 7
T+ T+ T+ T+ T+/ T+ T+ T+~ 0@ 9   	
   L +,      r0   