
    .`i)                        d Z ddlZddlZddlmZ ddlmZmZ ddlZddlm	Z	 ddl
mZ ddlmZmZ ddlmZmZmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ  ee          Zdddddededee	j                  dz  dedz  de	j         f
dZ!de	j         dedej"        ddfdZ#edej	        j         dej"        fd            Z$ e%e&e'ee	j                  ef         f                     Z(	 dede'ee	j                  ef         fdZ)dede'ee	j                  ef         fdZ*dedee	j                  fdZ+dedefdZ,e G d d                      Z-d edee	j                  fd!Z.dS )"z+Utilities for selecting and loading models.    N)contextmanager)	dataclassfield)nn)assert_never)	AttentionMLAAttention)ModelConfig
VllmConfigset_current_vllm_config)init_logger)QuantizationConfigQuantizeMethodBase)SupportsQuant)is_pin_memory_available )prefixmodel_classmodel_configvllm_configr   r   r   returnc                   || j         }|t          |          \  }}| j        t          | j        |           t	          j        |j                  }d |j                                        D             }d|v r;d|v r7t          | d|          5   || |          cddd           S # 1 swxY w Y   d}t          j        |t          d	
           t                              d|           i }d|v r||d<   d|v r
|j        |d<   d|v r
| j        |d<   d|v r
| j        |d<   d|v r
| j        |d<   d|v r
| j        |d<   t          | d|          5   |di |cddd           S # 1 swxY w Y   dS )z1Initialize a model with the given configurations.Nc                     g | ]	}|j         
S  )name).0params     z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/model_loader/utils.py
<listcomp>z$initialize_model.<locals>.<listcomp>,   s    III%*III    r   r   T)check_compiler   )r   r   a/  vLLM model class should accept `vllm_config` and `prefix` as input arguments. Possibly you have an old-style model class registered from out of tree and it is used for new vLLM version. Check https://docs.vllm.ai/en/latest/design/arch_overview.html for the design and update the model class accordingly.   )
stacklevelz:Trying to guess the arguments for old-style model class %sconfigcache_configquant_configlora_configscheduler_configr   )r   get_model_architecturer&   configure_quant_configinspect	signature__init__
parametersvaluesr   warningswarnDeprecationWarningloggerwarning	hf_configr%   r'   r(   )	r   r   r   r   _
signatures
all_paramsmsgkwargss	            r   initialize_modelr;      s    "//==Q+{7EEE";#788JII**?*F*F*H*HIIIJ
""x:'='=$[VTTT 	G 	G;;vFFF	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G	A  M#)a8888
NND  
 F:!x:'1x##!,!9~##!,!9~
"" + 7}Z''%0%A!"	 D	P	P	P % %{$$V$$% % % % % % % % % % % % % % % % % %s$   B**B.1B.E&&E*-E*modeltarget_devicec                 @   t          | dd          rt                              d|            d S ddlm}  || |           |                                 D ]h\  }}t          |dd           }t          |t                    r=t          ||          5  |	                    |           d d d            n# 1 swxY w Y   i|                                 D ]K\  }}t          |t          t          f          r*t          |d          r|	                    |j                   Ld S )N,process_weights_after_loading_already_calledFz9process_weights_after_loading already called for model %sr   )7maybe_save_metadata_and_attributes_for_weight_reloadingquant_methodprocess_weights_after_loading)getattrr3   
debug_once4vllm.model_executor.model_loader.online_quantizationr@   named_modules
isinstancer   device_loading_contextrB   r   r	   hasattrdtype)r<   r   r=   r@   r6   modulerA   s          r   rB   rB   Q   s    uDeLL  	G	
 	
 	
 	      <;E<PPP((** 	C 	C	6v~t<<l$677 	C (>> C C::6BBBC C C C C C C C C C C C C C C
 ((** E E	6fy,788 	EW3>
 >
 	E
 001CDDDE Es   B22B6	9B6	rK   c              #   Z  K   |j         dk    r| V  d S i }|                                 D ]>\  }}|j        j         dk    r)|j        ||<   |j                            |          |_        ?	 | V  t                      }|                                 D ]\  }}||v r||         }|j         dk    r~t          j        |j                                        |j        	                                |j        j
        |j        j        d|          }|                    |j                   ||_        |j                            |          |_        d S # t                      }|                                 D ]\  }}||v r||         }|j         dk    r~t          j        |j                                        |j        	                                |j        j
        |j        j        d|          }|                    |j                   ||_        |j                            |          |_        w xY w)Ncpu)sizestriderJ   layoutdevice
pin_memory)typenamed_parametersrQ   datator   torchempty_stridedrN   rO   rJ   rP   copy_)rK   r=   original_device_statesr   prR   original_devicecpu_datas           r   rH   rH   y   s0     U""68 **,, . .a8=E!!+,8"4(VYY}--AF8 -..
..00 	8 	8GD!---0Ft0L"'500$2V[[]] v}}fl v}$#-     H NN16***%AFFVYY77AF!	8 	8 -..
..00 	8 	8GD!---0Ft0L"'500$2V[[]] v}}fl v}$#-     H NN16***%AFFVYY77AF!	8s   *E CH*c                    ddl m}m} t          | j        dg           }| j                            ||           \  }}||                                 k    r3| j        dk    sJ | j        dk    rt          
                    d|           | j        }|dk    rng|d	k    r&t                              d
            ||          }n;|dk    r&t                              d            ||          }nt          |           ||fS )Nr   )as_embedding_modelas_seq_cls_modelarchitecturesr   vllmautoz%s has no vLLM implementation, falling back to Transformers implementation. Some features may not be supported and performance may not be optimal.noneembedzConverting to embedding model.classifyz,Converting to sequence classification model.)#vllm.model_executor.models.adaptersr_   r`   rC   r5   registryresolve_model_cls_get_transformers_backend_cls
model_implr3   warning_onceconvert_typerD   r   )r   r_   r`   ra   	model_clsarchrn   s          r   _get_model_architecturerq      sF   XXXXXXXXL2ORHHM"+==! >  OIt
 |99;;;;&&0000"f,,2 	    ,Lv		 	 :;;;&&y11				#	#HIII$$Y//		\"""d?r    c                    t          | j        | j        | j        | j        | j        t          t          | j        dg                     f          }|t          v rt          |         S t          |           }|t          |<   |S )Nra   )hashr<   rn   runner_typetrust_remote_coderl   tuplerC   r5   _MODEL_ARCH_BY_HASHrq   )r   key
model_archs      r   r)   r)      s    
%$*#',0/2FFGG	
	 	C !!!"3''(66J)r    c                 ,    t          |           d         S )Nr   r)   rb   s    r   get_model_clsr|          !,//22r    c                 ,    t          |           d         S )N   r{   rb   s    r   get_architecture_class_namer      r}   r    c                       e Zd ZU dZeeee         f         ed<    ee          Z	eee
eef         f         ed<   d Zdede
eee         f         dz  fd	ZdS )
ParamMappingz
    A class to handle parameter mapping for model weight loading.
    It creates a bidirectional mapping between packed parameters and their
    constituent parts.
    packed_mapping)default_factoryinverse_packed_mappingc                     | j                                         D ]F\  }}t          |          dk    r|d         |k    r%t          |          D ]\  }}||f| j        |<   Gd S )Nr   r   )r   itemslen	enumerater   )selfpacked_name
sub_paramsindex
param_names        r   __post_init__zParamMapping.__post_init__   s    '+':'@'@'B'B 	 	#K:!##
1(D(D%.z%:%:  !z;+J77		 	r    module_namer   Nc                 z    | j                                         D ] \  }}|                    |          r||fc S !d S )N)r   r   endswith)r   r   rx   values       r   get_sub_moduleszParamMapping.get_sub_modules   sT    -3355 	" 	"JC##C(( "Ez!!!"tr    )__name__
__module____qualname____doc__dictstrlist__annotations__r   r   rv   intr   r   r   r    r   r   r      s           d3i((((9>t9T9T9TDeCHo!56TTT	 	 	3 5d3i3H43O      r    r   r&   c                     t          |t                    sDt          |dd          }t          |dd          }||                     |           ||| _        dS dS dS )as  
    Pass packed_modules_mapping by reference to quant_config so that
    quant_config can properly match fused modules

    Note that model attributes are passed by reference to quant_config,
    enabling them to be updated by model_class.__new__ (ex. chatglm, qwen)

    Once the `SupportsQuant` mixin has been added to all models, this
    function can be removed
    hf_to_vllm_mapperNpacked_modules_mapping)
issubclassr   rC   apply_vllm_mapperr   )r&   r   r   r   s       r   r*   r*     s     k=11 A#K1DdKK .FMM (**+<===%2@L///A A &%r    )/r   r+   r0   
contextlibr   dataclassesr   r   rW   r   typing_extensionsr   vllm.attention.layerr   r	   vllm.configr
   r   r   vllm.loggerr   3vllm.model_executor.layers.quantization.base_configr   r   %vllm.model_executor.models.interfacesr   vllm.utils.platform_utilsr   r   r3   r   rS   Moduler;   rQ   rB   rH   r   r   rv   rw   rq   r)   r|   r   r   r*   r   r    r   <module>r      sL   2 1   % % % % % % ( ( ( ( ( ( ( (        * * * * * * 8 8 8 8 8 8 8 8 H H H H H H H H H H # # # # # #        @ ? ? ? ? ? = = = = = =	X		 *.'+3% 3% 3%3% 3% bi4'	3%
 $3% Y3% 3% 3% 3%l%E9%E$/%E@E%E	%E %E %E %EP %858? %85< %8 %8 %8 %8R =d3d29os&: ;;<>>  6 +  %RYQT@T:U        F tBIPS?S9T    &3 3RY 3 3 3 33k 3c 3 3 3 3        8A$A37	?A A A A A Ar    