
    .`iA)                        U d dl Z d dlmZmZ d dlZd dlmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z# d d
l$m%Z% d dl&m'Z' d dl(m)Z)m*Z* erd dl+m,Z, d dl-m.Z. d dl/m0Z0  ee1          Z2d a3d Z4e#eeeee!e eeeeeee"eehZ5e6e7e                  e8d<   de	j9        de:fdZ;	 d+de	j9        de<dede=dedz  de	j9        fdZ>	 d+ddddde<dededz  defdZ?de	j9        de@d e	j9        de	j9        fd!ZA	 d+d"e@d#ed$         deBe@e:f         fd%ZCd"e@de:fd&ZDde	j9        de=e@         fd'ZEd(e@de@fd)ZFde	j9        deGe@e=e@         f         fd*ZHdS ),    N)TYPE_CHECKINGOptional)HfHubHTTPErrorHFValidationError)nn)PretrainedConfig)envs)
LoRAConfig)init_logger)BaseLayerWithLoRAColumnParallelLinearWithLoRA#ColumnParallelLinearWithShardedLoRAFusedMoE3DWithLoRAFusedMoEWithLoRALogitsProcessorWithLoRA/MergedColumnParallelLinearVariableSliceWithLoRA"MergedColumnParallelLinearWithLoRA)MergedColumnParallelLinearWithShardedLoRAMergedQKVParallelLinearWithLoRA&MergedQKVParallelLinearWithShardedLoRAQKVParallelLinearWithLoRA QKVParallelLinearWithShardedLoRAReplicatedLinearWithLoRARowParallelLinearWithLoRA RowParallelLinearWithShardedLoRAVocabParallelEmbeddingWithLoRA)FusedMoE)
LinearBase)get_moe_expert_mappingget_packed_modules_mapping)LogitsProcessor)ParallelLMHead)WeightsMapperc                  $    t           dz  a t           S )N   )_GLOBAL_LORA_ID     c/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/lora/utils.pyget_lora_idr*   2   s    qOr(   _all_lora_classesmodelreturnc                     t          d |                                 D                       rt                              d           dS dS )z@Checks if the model contains FusedMoE layers and warns the user.c              3   @   K   | ]}t          |t                    V  d S N)
isinstancer   ).0modules     r)   	<genexpr>zis_moe_model.<locals>.<genexpr>N   s,      
F
FF:fh''
F
F
F
F
F
Fr(   z8MoE model detected. Using fused MoE LoRA implementation.TF)anymoduleslogger	info_once)r,   s    r)   is_moe_modelr9   L   sH    

F
Femmoo
F
F
FFF STTTt5r(   layer	max_loraslora_configpacked_modules_listmodel_configc                     t           D ]A}|                    | |||          r& ||           }|                    |||           |c S B| S )N)source_layerr<   r=   r>   )r+   can_replace_layercreate_lora_weights)r:   r;   r<   r=   r>   lora_clsinstance_layers          r)   
from_layerrE   T   s{     & 
" 
"%%# 3%	 & 
 
 	" &Xe__N..y+|TTT!!!!	" Lr(   r!   lm_headr"   c                     t          | |j        |j        j        |j        j        |                                          }|                    |||           |S r0   )r   embedding_dimweightdtypedeviceget_sharded_to_full_mappingrB   )r:   rF   r;   r<   r>   rets         r)   from_layer_logits_processorrN   i   sZ     "++-- C I{LAAAJr(   module_name
new_modulec                     |                      d                    |                    d          dd                             }|                    d          d         }t          |||           |S )z1Replace a submodule in a model with a new module..N)get_submodulejoinsplitsetattr)r,   rO   rP   parenttarget_names        r)   replace_submodulerZ   {   sj       +*;*;C*@*@"*E!F!FGGF##C((,KFK,,,r(   nameweights_mapperr#   c                 l   |                      d          r5|                     dd          } |r|                    |           n| } d| z   } n|r|                    |           n| } |                      d          rdnd}|                     d          }|d         dk    rC|d         d	k    s|d         d
k    r+d                    ||d                   }||d         d	k    fS |d         dk    s|d         dk    r+d                    ||d                   }||d         dk    fS t          |  d          )a  Parse the name of lora weights.

    args:
        name: the name of the fine-tuned LoRA, e.g.
            base_model.model.dense1.weight
        weights_mapper: maps the name of weight, e.g.
            `model.` -> `language_model.model.`,
    return:
        tuple(module_name, is_lora_a):
            module_name: the name of the module, e.g. model.dense1,
            is_lora_a whether the tensor is lora_a or lora_b.
    zbase_model.model.    r   rR   rS   rI   lora_Alora_Blora_embedding_Alora_embedding_Bz is unsupported LoRA weight)
startswithreplace	_map_namerV   rU   
ValueError)r[   r\   start_indexpartsnew_names        r)   parse_fine_tuned_lora_namerl      s^   & *++ J||/441?I~''---T"T)1?I~''---T
 ':;;B!!KJJsOOERyH%)x"7"759;P;P88E+b.122rh...Ry&&&%)7I*I*I88E+b.122r&8888
999
:
::r(   c                 0    d}|                      |          S )N)z.embed_tokens.base_layer.weightz.lm_head.base_layer.weight)endswith)r[   embedding_suffixess     r)   is_base_embeddding_weightsrp      s     ==+,,,r(   c                    t                      }|                                 D ]\  }}t          |dd          }||D ]}|                    |           t	          |t
          f          r.|                    |                    d          d                    t	          |t          f          r.|                    |                    d          d                    t          |          S )z2
    In vLLM, all linear layers support LoRA.
    embedding_modulesNrR   rS   )	setnamed_modulesgetattraddr1   r   rV   r   list)r,   supported_lora_modulesr[   r3   rr   s        r)   get_supported_lora_modulesry      s    
 (+uu++-- < <f $F,?FF() 1 1&**40000 fzm,, 	<"&&tzz#r':;;;fxk** 	<"&&tzz#r':;;;&'''r(   	lora_pathc                     t           j                                       r S                      d          rt           j                                       S t           j                                       rt           j                                       S t          j        rddl	m
}m ddlm}  fd}||f}d}n fd}t          t          f}d}	  |            }n'# |$ r t                               |            cY S w xY w|S )	a'  
    Resolves the given lora_path to an absolute local path.

    If the lora_path is identified as a Hugging Face model identifier,
    it will download the model and return the local snapshot path.
    Otherwise, it treats the lora_path as a local file path and
    converts it to an absolute path.

    Parameters:
    lora_path (str): The path to the lora model, which can be an absolute path,
                     a relative path, or a Hugging Face model identifier.

    Returns:
    str: The resolved absolute local path to the lora model.
    ~r   )InvalidParametersnapshot_download)	HTTPErrorc                                  S )N)model_idr'   )rz   r~   s   r)   <lambda>z+get_adapter_absolute_path.<locals>.<lambda>   s    //CCC r(   z&Error downloading the ModelScope modelc                  .    t          j                   S )N)repo_id)huggingface_hubr~   )rz   s   r)   r   z+get_adapter_absolute_path.<locals>.<lambda>   s    o?	RRR r(   z'Error downloading the HuggingFace model)ospathisabsre   
expanduserexistsabspathr	   VLLM_USE_MODELSCOPE modelscope.hub.snapshot_downloadr}   r~   requestsr   r   r   r7   	exception)rz   r}   r   download_fndownload_exceptions	error_loglocal_snapshot_pathr~   s   `      @r)   get_adapter_absolute_pathr      sR   $ 
w}}Y  C   -w!!),,, 
w~~i   *wy)))  >XXXXXXXX&&&&&&CCCCC(*:;<		 SRRR-/@A=	)kmm    	###	 s   
C !DDc                     t          |           rGt          |           x}r't          |           }| j        sd |D             |d<   |S t	          d          t          |           S )Nc                 J    g | ] \  }}}}d |v|                     d          !S )z..rR   )rstrip)r2   _weight_names      r)   
<listcomp>z2process_packed_modules_mapping.<locals>.<listcomp>  sC     5 5 5,;1;..  &&s++...r(   expertszGTo support LoRA for MoE model, 'get_expert_mapping' must be implemented)r9   r   r    is_3d_moe_weightAttributeError)r,   moe_packed_mappingpacked_modules_mappings      r)   process_packed_modules_mappingr   	  s    E 1!7!>!>> 	 &@%F%F") 	
5 50B5 5 5&y1 *) ;  
 *%000r(   r0   )Ir   typingr   r   r   huggingface_hub.utilsr   r   torchr   transformersr   vllmr	   vllm.config.lorar
   vllm.loggerr   vllm.lora.layersr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   $vllm.model_executor.layers.fused_moer   !vllm.model_executor.layers.linearr   vllm.model_executor.utilsr   r    +vllm.model_executor.layers.logits_processorr!   3vllm.model_executor.layers.vocab_parallel_embeddingr"    vllm.model_executor.models.utilsr#   __name__r7   r&   r*   r+   rs   type__annotations__Moduleboolr9   intrw   rE   rN   strrZ   tuplerl   rp   ry   r   dictr   r'   r(   r)   <module>r      sa   
				 * * * * * * * *     C C C C C C C C       ) ) ) ) ) )       ' ' ' ' ' ' # # # # # #                                     & : 9 9 9 9 9 8 8 8 8 8 8 X X X X X X X X ?KKKKKKRRRRRR>>>>>>	X		   # &#'$-3*$!3 3t-./   (	 d     -1 9  	
 #T) Y   4 -1   	
 #T)    $9#&46IY    <@); );
);'8);
39); ); ); );X-S -T - - - -(bi (DI ( ( ( (04 4 4 4 4 4n1") 1S$s)^8L 1 1 1 1 1 1r(   