
    -`iA                       U d dl Z d dlmZ d dlmZmZ d dlmZ d dlm	Z	m
Z
mZmZmZ d dlZd dlmZmZmZmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZ d d
lmZ d dl m!Z! d dl"m#Z#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6 d dl7m8Z8m9Z9m:Z:m;Z; d dl<m=Z=m>Z> d dl?m@Z@mAZA d dlBmCZC d dlDmEZE d dlFmGZG e	r4d dlHmIZI d dlJmKc mLc mMZN d dlOmKc mPZQ d dlRmSZS d dlTmUZU d dlJmVZV d dlWmXZX n4e
ZI eEd eY            d          ZN eEd eY            d          ZQe
ZSe
ZUe
ZVe
ZX e&eZ          Z[ede!f         Z\ed         Z]ede]f         Z^ed          Z_ed!         Z`ed"         Zaebece
f         eeIgeIf         z  Zded#         Zeed$         Zfg g d%g d&Zgebe!ehe]         f         eid'<   ed(         Zje# e ed)*          +           G d, d-                                  Zkd.ecd/ecehec         z  dz  fd0Zlg d1Zmehenecene!e]f         f                  eid2<   d3 Zoddd4d5ecd6e!dz  d7e]dz  d8enecene!e]f         f         dz  fd9Zpejq        ejq        ejr        ejr        ejs        d:Ztd;ecfd<Zud=d=d=d=d=d>Zvd?ecd@ejw        fdAZxd?ecd@ejw        fdBZyd?ecdCejw        dDezfdEZ{ddFdGecdHeId@ecejw        z  dDezdIecdz  d8ejw        fdJZ|dHeId@ejw        d6ecd8ejw        fdKZ}	 	 dUdLeIdMedNebdz  dOe~dz  dPezdQe~dz  dRe~dz  dSe
dz  d8e~fdTZdS )V    N)Callable)InitVarfield)cached_property)TYPE_CHECKINGAnyLiteralcastget_args)
ConfigDictFieldfield_validatormodel_validator)	dataclass)ModelArchitectureConfig)MMCacheTypeMMEncoderTPModeMultiModalConfig)PoolerConfig)
RunnerType)configgetattr_iter)init_loggercurrent_platform)ConfigFormat
get_configget_hf_image_processor_configget_hf_text_configget_pooling_config)get_sentence_transformer_tokenizer_configis_encoder_decoderis_rope_parameters_nestedtry_get_dense_modulestry_get_generation_configtry_get_tokenizer_config
uses_mropeuses_xdrope_dim)is_ggufis_remote_ggufmaybe_patch_hf_config_from_ggufsplit_remote_gguf)MODEL_ARCH_CONFIG_CONVERTORSModelArchConfigConvertorBase)ObjectStorageModelis_runai_obj_uri)maybe_model_redirect)
LazyLoader)AttentionBackendEnumPretrainedConfig)
LoadConfig)ParallelConfig)QuantizationMethods)LogitsProcessormodel_executorz'vllm.model_executor.layers.quantizationzvllm.model_executor.modelsauto)noneembedclassifyrewardmm_encoder_only)r;   hfslowmistraldeepseek_v32)r;   halffloat16bfloat16floatfloat32)
raw_logitsraw_logprobsprocessed_logitsprocessed_logprobs)r;   vllmtransformers
terratorch)	attentionlinear_attentionmamba)r=   r>   r?   )generatepoolingdraft_RUNNER_CONVERTS)decoderencoderencoder_onlyencoder_decoderattention_freehybridT)arbitrary_types_allowed)r   c                      e Zd ZU dZdZeed<   	 dZeed<   	 dZe	ed<   	 dZ
eed<   	  ed	
          Zeed<   	 dZeez  ed<   	 dZeed<   	 dZeej        z  ed<   	 dZeed<   	  ed          Zeed<   	  ed          Zeed<   	 d	Zed	z  ed<   	 dZeed<   	 d	Zee         d	z  ed<   	 d	Zed	z  ed<   	 d	Z ed	z  ed<   	 d	Z!ed	z  ed<   	  ed	d          Z"eed<   	 d	Z#ed	z  ed<   	 d	Z$e%ez  d	z  ed<   	 dZ&eed <   	 dZ'eed!<   	 dZ(eed"<   	 d#Z)eed$<   	 d%Z*e+ed&<   	 dZ,eed'<   	 dZ-eed(<   	 dZ.eed)<   	 dZ/eed*<   	 d	Z0eee         z  d	z  ed+<   	 dZ1ee2z  ed,<   	 d	Z3eez  d	z  ed-<   	  ee4.          Z5e6ed/<   	 d	Z7ed	z  ed0<   	 dZ8eed1<   	  ee4.          Z9e4ee:f         ed2<   	 dZ;eed3<   	 dZ<ee=z  ed4<   	 d	Z>ed	z  ed5<   	 d	Z?eee@eA         z           d	z  ed6<   	 d	ZBed	z  ed7<   	 d	ZCeDd	z  ed8<   	 d	ZEeFd	z  ed9<   	 d	ZGeHe4eee4eef         z  f         d	z           ed:<   d	ZIeHed	z           ed;<   d	ZJeHe4ee4ee:f         f         d	z           ed<<   d	ZKeHe4ee:f         d	z           ed=<   d	ZLeHeMd	z           ed><   d	ZNeHeOd	z           ed?<   d	ZPeHed	z           ed@<   d	ZQeHed	z           edA<   d	ZReHeSd	z           edB<   d	ZTeHeUez  d	z           edC<   d	ZVeHed	z           edD<   d	ZWeHed	z           edE<   d	ZXeHeMd	z           edF<   dGefdHZYdIee4ee:f         z  dJe4ee:f         dGd	fdKZZdLedMe4ee:f         dGd	fdNZ[d:e4eee4eef         z  f         d	z  d;ed	z  d<e4ee4ee:f         f         d	z  d=e4ee:f         d	z  d>eMd	z  d?eOd	z  d@ed	z  dAed	z  dBeSd	z  dCeUez  d	z  dDed	z  dEed	z  dFeMd	z  dGd	fdOZ\dGe]fdPZ^ e_dddQR          e`dSe:dTeadGe:fdU                        Zb e_ddVR          dedGefdW            Zc e_ddXR          e`dSe:dGe:fdY                        Zd eedVR          dd[            ZfdGefd\ZgdGefd]Zheid^             ZjeidGee         fd_            ZkeidGefd`            ZldededGd	fdaZmdb Zndcee         dGeofddZpdcee         de	dGeofdeZqdcee         dfeodGerfdgZsdcee         dfeodedGerfdhZtddiZuddjZvddkZwddlZxdm ZydnezdGd	fdoZ{dpe|dGd	fdqZ}dGed	z  fdrZ~dGefdsZdGefdtZdGefduZeidGefdv            ZedGefdw            ZdGefdxZdGefdyZdpe|dGefdzZdpe|dGefd{ZdGefd|ZdGefd}Zdpe|dGeeef         fd~Zdpe|dGefdZ	 ddpe|dedGefdZdGed	z  fdZdGeFfdZdGe4ee:f         fdZdGe4ee:f         fdZeidGefd            ZeidGefd            ZeidGefd            ZeidGefd            ZeidGefd            ZeidGefd            ZeidGefd            ZeidGefd            ZeidGefd            ZeidGefd            ZeidGefd            ZeidGefd            Zeid             ZeidGefd            ZeidGefd            ZeidGefd            Zeid             ZeidGefd            ZeidGej        fd            Zeid             ZdefdZeidGefd            ZeidGefd            ZeidGefd            ZeidGefd            ZeidGefd            Zd	S )ModelConfigzConfiguration for the model.zQwen/Qwen3-0.6Bmodel model_weightsr;   runnerconvertN)default	tokenizertokenizer_modeFtrust_remote_codedtyper   seed)init	hf_confighf_text_confighf_config_pathallowed_local_media_pathallowed_media_domainsrevisioncode_revisiontokenizer_revision)rf   gemax_model_lenspec_target_max_model_lenquantizationallow_deprecated_quantizationenforce_eagerenable_return_routed_experts   max_logprobsrK   logprobs_modedisable_sliding_windowdisable_cascade_attnskip_tokenizer_initenable_prompt_embedsserved_model_nameconfig_formathf_tokendefault_factoryhf_overrideslogits_processor_patterngeneration_configoverride_generation_configenable_sleep_mode
model_imploverride_attention_dtypelogits_processorsio_processor_pluginpooler_configmultimodal_configlimit_mm_per_promptenable_mm_embedsmedia_io_kwargsmm_processor_kwargsmm_processor_cache_gbmm_processor_cache_typemm_shm_cache_max_object_size_mbr@   mm_encoder_tp_modemm_encoder_attn_backendinterleave_mm_stringsskip_mm_profilingvideo_pruning_ratereturnc                 H    h d}ddl m}m}  || |          } ||          S )a  
        WARNING: Whenever a new field is added to this config,
        ensure that it is included in the factors list if
        it affects the computation graph.

        Provide a hash that uniquely identifies all the configs
        that affect the structure of the computation
        graph from input ids/embeddings to the final hidden states,
        excluding anything before input ids/embeddings and after
        the final hidden states.
        >!   rk   rd   re   r   rg   r   r   r{   r   r   ro   rh   r   r   r   r   r   r   rt   r   r   r   r   r   rq   r   r   r   rp   r   r   rx   r   r   )get_hash_factorshash_factors)vllm.config.utilsr   r   )selfignored_factorsr   r   factorss        e/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/config/model.pycompute_hashzModelConfig.compute_hash<  sQ    "
 "
 "
H 	EDDDDDDD""499|G$$$    targetupdatesc                    |                                 D ]\  }}t          |t                    rzt          |t                    r|                    |          }nt	          ||d          }|<t          |t                    st          |d          r|                     ||           t          |t                    r|||<   t          |||           dS )z9Recursively updates a config or dict with nested updates.N__dict__)items
isinstancedictgetgetattrhasattr_update_nestedsetattr)r   r   r   keyvaluenested_targets         r   r   zModelConfig._update_nestedq  s     "--// 	, 	,JC%&& fd++ ?$*JJsOOMM$+FC$>$>M !,}d33 -}j99 - ''u=== &$'' ,#sU++++)	, 	,r   r   	overridesc                     ddl m} |                                D ]P\  }}t          ||d          }|'t	          ||          r|                     ||           ?t          |||           QdS )zCApply dict overrides, handling both nested configs and dict values.r   r4   N)rO   r5   r   r   r   r   r   )r   r   r   r5   r   r   attrs          r   _apply_dict_overridesz!ModelConfig._apply_dict_overrides  s     	211111#//++ 	, 	,JC63--DJt5E$F$F##D%0000 U++++	, 	,r   c                    t          | j        | j                  | _        t          | j                  | _        | j        | j        | _        | j        | j        | _        t          | j                  | _        t          | j        t                    rt          | j                  | _        t          | j                  ri }| j        }i }nEi }i }| j                                        D ]%\  }}t          |t                    r|||<    |||<   &d }|                     | j        | j                   ddlm} | j        % |j                    st'          j        dd           | j        r |j                    st/          d          t1          | j        p| j        | j        | j        | j        | j        ||          }t9          | j        |          }|| _        |r|                     ||           t?          | j                  | _         tC          | j         dd           | _"        | #                                | _$        tK          | j        | j&        | j        	          | _'        | (                                | _)        | j*        d
k    r#tV          ,                    d           d}d| _*        | j-        }| j.        }|/                    ||           }|0                    ||           }| 1                    || j2                  | _3        | 4                    || j3        | j*                  | _5        | j3        dk    r'|s%tl          d         }| j5        |vrt/          d          | j3        dk    rF|sDtl          d         }| j5        |vr.dd7                    |          z   dz   }t/          d| d          |8                    ||           \  }}|| _9        || _:        tV          ;                    d|           | j3        dk    r| j<        t{                      | _<        t}          | j        | j                  }|E|                                D ]0\  }} tC          | j<        |          t          | j<        ||            1| j9        j@        }!| j<        jA        |!| j<        _A        | j9        jB        }"| j<        jC        |"| j<        _C        t          | j        | j        | jE        | j3        dk    | j                  | _E        | jF        | _G        | H                    | jF                  | _F        | jI        r!d| _J        tV          ;                    d           | j9        jK        ry|	dk    r(| j9        jL        stV          ,                    d           d}	t          |||||||||	|
|||          }#d |#                                D             }#t          d i |#| _N        t          | j                  r| jP        rt/          d          | jQ        rd | j         _R        d| _S        | T                                 | U                                 | V                                 | W                                 d S )!Nr   r   z;override-attention-dtype is set but not using ROCm platform   )
stacklevelz0Sleep mode is not supported on current platform.)hf_overrides_kwhf_overrides_fnattention_chunk_size)r   rr   r@   zn`--convert mm_encoder_only` is deprecated and will be removed in v0.15. Please use --mm-encoder-only` instead.Tr<   rT   z0This model does not support `--runner generate`.rU   <|>zHThis model does not support `--runner pooling`. You can pass `--convert z" to adapt it into a pooling model.zResolved architecture: %s)is_pooling_modelrr   z=Encoder-decoder model detected, disabling mm processor cache.datazhThis model does not support `--mm-encoder-tp-mode data`. Falling back to `--mm-encoder-tp-mode weights`.weights)limit_per_promptr   r   r   r   r   r   r@   r   r   r   r   r   c                     i | ]
\  }}|||S N ).0kvs      r   
<dictcomp>z-ModelConfig.__post_init__.<locals>.<dictcomp>]  s&          AQ]1]]]r   zLoading a multimodal GGUF model needs to use original tokenizer. Please specify the unquantized hf model's repo name or path using the --tokenizer argument.Fr   )Xget_served_model_namera   r   r1   rg   rt   rr   r   ro   strcallabler   r   r   $maybe_pull_model_tokenizer_for_runaivllm.platformsr   r   is_rocmwarningswarnr   is_sleep_mode_available
ValueErrorr   ri   rs   r   r+   rm   r   r   rn   r   r   _get_encoder_configencoder_configr   r   hf_image_processor_configget_model_arch_configmodel_arch_configre   loggerwarning_oncearchitecturesregistryis_text_generation_modelr   _get_runner_typerd   runner_type_get_convert_typeconvert_typerW   joininspect_model_cls_model_info_architectureinfor   r   r    r   default_seq_pooling_typeseq_pooling_typedefault_tok_pooling_typetok_pooling_type_get_and_verify_dtyperj   rw   original_max_model_lenget_and_verify_max_lenr"   r   supports_multimodal#supports_multimodal_encoder_tp_datar   r   r)   is_multimodal_modelr   sliding_windowconfig_updated#_try_verify_and_update_model_config_verify_quantization_verify_cuda_graph_verify_bnb_config)$r   r   r   r   r   r   r   r   r@   r   r   r   r   r   r   r   dict_overridesr   r   r   rm   r   r   is_generative_modelr   generate_convertspooling_convertsconvert_option
model_infoarchbase_configr   r   r   r   mm_config_kwargss$                                       r   __post_init__zModelConfig.__post_init__  s   $ "7J."
 "
 *$*55
>!!ZDN"*&*mD#-dn==d)3// 	L"6t7J"K"KDD%&& 	# O"/O-/NN !ON"/5577 1 1
UeT** 1*/N3''+0OC(("O11$*dnMMM333333(4=U=M=U=W=W4MM   
 ! 	Q*R*:*R*T*T 	QOPPP-4:"M++
 
 
	 4J
 
	
 # 	B&&y.AAA0@@$+!7%
 %
! #6688)FJ*
 *
 *
& "&!;!;!=!=<,,,9  
 #O!DL*=&??tTT#44]DII00LL 224+T\
 
 z))2E) 0 < (999 !STTTy((1A(/	: (888!$sxx0@'A'A!AC!G //=/ / /   $55mTJJ
D%!/666 y((!)%1^^",TZGGK&'--// : :DAqt1155= 2Aq999'+'7'P$!2:6N"3'+'7'P$!2:6N"3"7JNJ!-:]#
 #
 #

 '+&8#!889KLL" 	Y)*D&KKWXXX / 	J"f,,(L - ##F   &/"#!4!1 /$7&;(?0O /#5(?&;"3#5         !1!7!7!9!9      &6%I%I8H%I%ID" 4>"" 	t'? 	D   & 	6 26D. $00222!!###!!!!!!!!r   c                     t          j        | j        j        t                    } || j        | j                  }|                                S r   )r-   r   rm   
model_typer.   rn   re   )r   convertor_cls	convertors      r   r   z!ModelConfig.get_model_arch_configw  sK     58N%'C
 
 "M$.$2EFF	  """r   wrap)moder   handlerc                      ||S  ||          S )zFSkip validation if the value is `None` when initialisation is delayed.r   )clsr   r  s      r   _skip_none_validationz!ModelConfig._skip_none_validation  s     =Lwu~~r   afterc                 *    |                                 S r   )lower)r  rh   s     r   _lowercase_tokenizer_modez%ModelConfig._lowercase_tokenizer_mode  s    ##%%%r   beforec                 X    t          |t                    r|                                S |S r   )r   r   r  )r  r   s     r   validate_quantization_beforez(ModelConfig.validate_quantization_before  s)     eS!! 	!;;== r   r   c                 6   t          | j        t                    s2t          dt	          | j                  j         d| j        d          t          | j        t                    s2t          dt	          | j                  j         d| j        d          | S )zCalled after __post_init__z tokenizer must be a string, got z: z@. Please provide a valid tokenizer path or HuggingFace model ID.z.max_model_len must be a positive integer, got z. Example: max_model_len=2048)r   rg   r   r   type__name__rw   intr   s    r   validate_model_config_afterz'ModelConfig.validate_model_config_after  s     $.#.. 	Q''0Q Q48NQ Q Q  
 $,c22 	.D.//8. .<@<N. . .  
 r   c                    d}|| j         | j        k    rdndz  }|| j        rdndz  }d}d}t          | j        d                   x}r|\  }\  }}| j        dk    r| j        }|dk    r|d	v r|d
k    r|dz  }n|dk    r|dz  }n|dz  }|S )z|Determine which Transformers modeling backend class will be used if
        `model_impl` is set to `transformers` or `auto`.Transformers
MultiModalrb   MoENr   r;   rU   >   r=   r>   r=   EmbeddingModelr>   ForSequenceClassificationForCausalLM)rm   rn   is_moetry_match_architecture_defaultsr   rd   )r   r  rd   taskdefaults_s         r   _get_transformers_backend_clsz)ModelConfig._get_transformers_backend_cls  s     t~1DDD||"L+uu+6t7I!7LMMM8 	) (A~;&  [F Y4+@#@#@w''##22= C
r   c                 N    | j         j        }|                                 }||k    S )zDCheck if the model is using the Transformers modeling backend class.)r   architecturer,  )r   used_clstransformers_backend_clss      r   using_transformers_backendz&ModelConfig.using_transformers_backend  s+    #0#'#E#E#G#G 333r   c                     t           j        S r   )	me_modelsModelRegistryr  s    r   r   zModelConfig.registry  s    &&r   c                     | j         j        S r   )r   r   r  s    r   r   zModelConfig.architectures  s    %33r   c                     | j         S )z$The architecture vllm actually used.)r   r  s    r   r.  zModelConfig.architecture  s     !!r   c                    | j         rdS t          |          st          |          sdS t          |          rit          |          }|                    |g d           || _         |j        | _        ||k    r'|                    |g d           |j        | _        dS t          |          r7t          |          }|                    |g d           |j        | _        dS dS )zPull model/tokenizer from Object Storage to temporary
        directory when needed.

        Args:
            model: Model name or path
            tokenizer: Tokenizer name or path
        N)url)z*.modelz*.pyz*.json)allow_pattern)z*.ptz*.safetensorsz*.binz	*.tensorsz*.pth)ignore_pattern)rc   r0   r/   
pull_filesdirra   rg   )r   ra   rg   object_storage_modelobject_storage_tokenizers        r   r   z0ModelConfig.maybe_pull_model_tokenizer_for_runai  sJ     	F '' 	+;I+F+F 	FE"" 	#5%#@#@#@  ++%B%B%B ,    "'D-1DJ 	!!$//$ $ $ 0 	 	 	 "6!9 I&& 	:'9i'H'H'H$$//WWW 0    69DNNN	: 	:r   c                 |    | j         }t          |          rt          |          \  }}t          || j                  S r   )ra   r*   r,   r!   rr   )r   ra   r+  s      r   r   zModelConfig._get_encoder_config  s<    
%   	0(//HE18NNNr   r   c                     | j         }t          | j        | j                  rdS |D ]g}||                                v r2|                    ||           r dS |                    ||           r dS t          |          }|r|\  }\  }}|c S hdS )NrU   rT   )r   r    ra   rr   get_supported_archsr   r   r(  )r   r   r   r  matchr+  r   s          r   _get_default_runner_typez$ModelConfig._get_default_runner_type  s     = dj$-88 	9! 
	# 
	#Dx335555,,]DAA %$9944]DII &%::3D99E #&+##K""""# zr   c                     |dk    r|S |                      |          }|dk    rt                              d|           |S )Nr;   rT   z]Resolved `--runner auto` to `--runner %s`. Pass the value explicitly to silence this message.)rC  r   r   )r   r   rd   r   s       r   r   zModelConfig._get_runner_type   s[    
 VM33MBB *$$KKE   r   r   c                    | j         }|D ]u}||                                v r>|dk    r|                    ||           r dS |dk    r|                    ||           r dS t	          ||          }|r|\  }\  }}|c S v|dk    rdS dS )NrT   r<   rU   )r   r=   )r   rA  r   r   r(  )r   r   r   r   r  rB  r+  r   s           r   _get_default_convert_typez%ModelConfig._get_default_convert_type4  s    
 =! 	$ 	$Dx335555*,,1R1R!42 2, "66)++0I0I!41 1+ "663DkRRRE $',$$A|####$ )##7vr   c                     |dk    rt                               d           dS |dk    r|S |                     ||          }|dk    rt                               d|           |S )Nr?   zd`--convert reward` is deprecated and will be removed in v0.15. Please use `--convert embed` instead.r=   r;   r<   z_Resolved `--convert auto` to `--convert %s`. Pass the value explicitly to silence this message.)r   warningrF  r   )r   r   r   re   r   s        r   r   zModelConfig._get_convert_typeS  s     hNN8   7fN55m[QQ 6!!KKE   r   c                 b  	 t           j        }| j        $t          t           j        | j                  | _        | j        j        }||d         }g d		fd|D             }|	z   }|D ]p}t          j        |          }|                    || j                  }|=|t          t           j                  v r|	vrt          d| d          |}|| _         nq|dk    r|nd }| j        || _        n&| j        |k    rt          d| d| j         d	          | j        ?| j        |vrt          d
| j         d| d          ddlm}  |j        | j                   | j        t           j        v r>| j        r"t                               d| j                   d S t          d| j                  d S )Nquant_method)bitblasgptq_marlin_24gptq_marlingptq_bitblas
awq_marlinipexinc	moe_wna16modeloptmodelopt_fp4petit_nvfp4mxfp4cpu_awqc                     g | ]}|v|	S r   r   )r   qr   s     r   
<listcomp>z4ModelConfig._verify_quantization.<locals>.<listcomp>  s*     $ $ $Qi5G5G5G5G5Gr   zQuantization method z is an override but is has not been added to the `overrides` list above. This is necessary to ensure that the overrides are checked in order of preference.rb   z3Quantization method specified in the model config (zS) does not match the quantization method specified in the `quantization` argument ().zUnknown quantization method: z. Must be one of .r   r   zXThe quantization method %s is deprecated and will be removed in future versions of vLLM.zThe quantization method %s is deprecated and will be removed in future versions of vLLM. To bypass, set `--allow-deprecated-quantization`.)me_quantQUANTIZATION_METHODSry   r
   r8   r   quantization_configget_quantization_configoverride_quantization_methodr   r   r   r   verify_quantizationDEPRECATED_QUANTIZATION_METHODSrz   r   rH  )
r   supported_quantization	quant_cfgrJ  quantization_methodsnamemethodquantization_overrider   r   s
            @r   r   z ModelConfig._verify_quantizationo  s   !)!>( $X%A4CT U UD *>	 $^4L  I"$ $ $ $1$ $ $  $8)#C  -  !9$??(.(K(Kt0) )% )4
 )E F FFF 	11(L4 L L L   $9L(=D%E! 5$ ,82+=+=<<4L ($0!!"l22 .$. . ). . .   ( (>>> ;D4E ; ;!7; ; ;   877777001BCCC HHH1 F%     != %	   IHr   c                     | j         }|rH| j        sCt          j                    r2t                              d| j        j                   d| _        d S d S d S d S )NzGCUDA graph is not supported for %s on ROCm yet, fallback to eager mode.T)r"   r{   r   r   r   rH  r   r
  )r   unsupported_rocms     r   r   zModelConfig._verify_cuda_graph  s    2 	&D$6 	&;K;S;U;U 	&NN!&1  
 "&D	& 	& 	& 	& 	& 	&r   c                     | j         dk    }| j        j        du}|r | j        j                            dd          nd}t	          |||| j         g          r#t                              d           d| _        dS dS )z
        The current version of bitsandbytes (0.46.1) with 8-bit models does not
        yet support CUDA graph.
        # TODO Remove this when bitsandbytes supports.
        bitsandbytesNload_in_8bitFzQCUDA graph is not supported on BitsAndBytes 8bit yet, fallback to the eager mode.T)ry   r   r_  r   allr{   r   rH  )r   is_bitsandbyteshas_quantization_configis_8bits       r   r   zModelConfig._verify_bnb_config  s     +~="&"8"LTX"X 'D"6::>5QQQ 	
 '&&	
 
 	& NN.  
 "&D	& 	&r   c                 2    | j         st          d          d S )NzYNumber of experts in the model must be greater than 0 when expert parallelism is enabled.)r'  r   r  s    r   _verify_with_expert_parallelismz+ModelConfig._verify_with_expert_parallelism  s-    { 	6  	 	r   c                     t          | dd          rd S | j        }|d S ddlm} |                    |d           }||                    |            d S d S )Nr   Fr   )MODELS_CONFIG_MAP)r   r.  !vllm.model_executor.models.configrv  r   verify_and_update_model_config)r   r.  rv  r  s       r   r   z/ModelConfig._try_verify_and_update_model_config   s    4)511 	F(F	
 	
 	
 	
 	
 	
  ##L$77?..t44444 ?r   load_configc                     t          | j        d          rBddlm}  || |          }|r0|| j        j        d<   d| j        j        vrd| j        j        d<   d S d S d S d S )Ndual_chunk_attention_configr   )get_sparse_attention_configsparse_attention_configsparse_attention_enabledT)r   rm   -vllm.model_executor.model_loader.weight_utilsr|  r{  )r   ry  r|  sparse_attn_configs       r   "verify_dual_chunk_attention_configz.ModelConfig.verify_dual_chunk_attention_config  s     4>#@AA 	      "=!<T;!O!O! 
 ' :- />EF F
  N>2  	 	
 

F Fr   parallel_configc           	      (   | j         j        }|j        }||z  dk    rt          d| d| d          |j        r|                                  |j        }|dk    r/| j                            | j	        |           st          d          |j        }|dk    rw| j        sr|                                 }||k    sJ d| d| d	            ||z  }||k    sJ d
| d| d| d|             ||z  }||z  dk    sJ d| d|j         d            d S d S d S )Nr   z!Total number of attention heads (z-) must be divisible by tensor parallel size (r[     zlPipeline parallelism is not supported for this model. Supported models implement the `SupportsPP` interface.ztensor parallel size z) must be greater than total num kv heads z0 when enable decode context parallel for GQA/MQAzNdecode context parallel size must less than or equal to (tensor parallel size z // total num kv heads z) = z
, but got z%Total number of q per kv attn heads (zS) must be divisible by dcp world size when enable decode context parallel for GQA ()r   total_num_attention_headstensor_parallel_sizer   enable_expert_parallelrt  pipeline_parallel_sizer   is_pp_supported_modelr   NotImplementedErrordecode_context_parallel_sizeuse_mlaget_total_num_kv_heads)	r   r  r  r  r  r  total_num_kv_headsmax_dcp_sizenum_q_per_kvs	            r   verify_with_parallel_configz'ModelConfig.verify_with_parallel_config(  s    %)$:$T!.C$';;q@@-4M - -(- - -   1 	300222!0!G!A%%dm.Q.Q/
 /
% &I  
 (7'S$'!++DL+!%!<!<!>!>'*<<<<7(< 7 7+=7 7 7 =<< 03EEL/<???:)=: : 2: :8D: : 8: : @?? 58JJL">>!CCCE E E $@E E E DCC# ,+++" DCr   c                 .    t          | j        dd          S )z?Get the sliding window size from the HF text config if present.r   Nr   rn   r  s    r   get_sliding_windowzModelConfig.get_sliding_windowZ  s    t*,<dCCCr   c                     | j         j        S r   )r   
vocab_sizer  s    r   get_vocab_sizezModelConfig.get_vocab_size^  s    %00r   c                     | j         j        S r   )r   hidden_sizer  s    r   get_hidden_sizezModelConfig.get_hidden_sizea      %11r   c                 >    d}t          | j        || j                  S )N)projection_dimprojection_sizer   )r   rn   r  )r   namess     r   get_inputs_embeds_sizez"ModelConfig.get_inputs_embeds_sized  s-     68L
 
 
 	
r   c                     | j         j        S r   )r   is_deepseek_mlar  s    r   r  zModelConfig.is_deepseek_mlam  s    %55r   c                 P    d}t          | j        d          sdS | j        j        |v S )z8Whether to use bidirectional attention for mm positions.)gemma3molmo2	paligemmar
  F)r   rm   r
  )r   MM_PREFIX_LM_MODELSs     r   is_mm_prefix_lmzModelConfig.is_mm_prefix_lmq  s6    

 t~|44 	5~(,???r   c                     | j         j        S r   )r   	head_sizer  s    r   get_head_sizezModelConfig.get_head_size}  s    %//r   c                     | j         j        S )z%Returns the total number of KV heads.)r   r  r  s    r   r  z"ModelConfig.get_total_num_kv_heads  s    %88r   c                 l    | j         rdS |                                 }t          d||j        z            S )z'Returns the number of KV heads per GPU.r  )r  r  maxr  )r   r  r  s      r   get_num_kv_headszModelConfig.get_num_kv_heads  s>    < 	1!88::
 1(O,PPQQQr   c                 .    | j         j        }||j        z  S r   )r   r  r  )r   r  	num_headss      r   get_num_attention_headsz#ModelConfig.get_num_attention_heads  s    *D	O@@@r   c                     | j         j        S r   )r   num_expertsr  s    r   get_num_expertszModelConfig.get_num_experts  r  r   c                     | j         j        S r   )r   total_num_hidden_layersr  s    r   get_total_num_hidden_layersz'ModelConfig.get_total_num_hidden_layers  s    %==r   c                     ddl m} |                                 }|j        |j        z  |j        z  }|j        } ||||          \  }}||fS )Nr   )get_pp_indices)vllm.distributed.utilsr  r  rankr  r  )r   r  r  r  pp_rankpp_sizestartends           r   get_layers_start_end_indicesz(ModelConfig.get_layers_start_end_indices  sq     	:99999"&"B"B"D"D  O$HH23 "8#^$;WgNN
sczr   c                 <    |                      |          \  }}||z
  S r   )r  )r   r  r  r  s       r   get_num_layerszModelConfig.get_num_layers  s#    66GG
sU{r   rQ   
block_typec                    dk    }| j          o| j         o| j         }|                     |          \  }}|r	|r||z
  ndS | j        r	|rdn||z
  S | j        r-| j        j        }t          d |||         D                       S t          | j        dd           }|k| j	        j
        dk    r8|r!t          d |||         D                       S |                     |          S t          fd|||         D                       S t          | j        dd           }	|	r!t          d	 |	||         D                       S t          | j        d
d           }
|
qdk    r!t          d |
||         D                       S dk    r!t          d |
||         D                       S t          fd|
||         D                       S ||	|
t          d d          d S d S d S )NrQ   r   c              3   0   K   | ]}|j         j         V  d S r   )rQ   no_op)r   bcs     r   	<genexpr>z;ModelConfig.get_num_layers_by_block_type.<locals>.<genexpr>  s*      QQ"2<--QQQQQQr   layers_block_typezamba2c              3   "   K   | ]
}|d k    V  dS )r]   Nr   r   ts     r   r  z;ModelConfig.get_num_layers_by_block_type.<locals>.<genexpr>  s7       # #./AM# # # # # #r   c              3   $   K   | ]
}|k    V  d S r   r   r   r  r  s     r   r  z;ModelConfig.get_num_layers_by_block_type.<locals>.<genexpr>  s'      WWq1
?WWWWWWr   attn_type_listc              3   "   K   | ]
}|d k    V  dS )r  Nr   r  s     r   r  z;ModelConfig.get_num_layers_by_block_type.<locals>.<genexpr>  s&      EEa16EEEEEEr   layer_typesc              3   "   K   | ]
}|d k    V  dS )full_attentionNr   r  s     r   r  z;ModelConfig.get_num_layers_by_block_type.<locals>.<genexpr>  s9        23--     r   rR   c              3   "   K   | ]
}|d k    V  dS )rR   Nr   r  s     r   r  z;ModelConfig.get_num_layers_by_block_type.<locals>.<genexpr>  s9        45//     r   c              3   $   K   | ]
}|k    V  d S r   r   r  s     r   r  z;ModelConfig.get_num_layers_by_block_type.<locals>.<genexpr>  s'      UU1qJUUUUUUr   zThe model is an hybrid without a layers_block_type or an attn_type_list, or a layer_types in the hf_config, cannot determine the num of z layers)	is_hybrid	has_noopsis_attention_freer  rm   block_configssumr   rn   r   text_model_typer  r   )r   r  r  attn_block_typeis_transformerr  r  r  layers_block_type_valuer  layer_types_values     `        r   get_num_layers_by_block_typez(ModelConfig.get_num_layers_by_block_type  s    %3Tt~#5Td>T:T 	 66GG
s 6	"183;;q8# 3	 (811S5[8^ .	 N8MQQeCi8PQQQQQQ '.#%8$' '# '2)9XEE& D" # #3J5QT93U# # #       $22?CCCWWWW4KERUI4VWWWWWW %T^5EtLLN FEE>%)+DEEEEEE !(t L L ,,,  7Hs7S       #555  9J5QT99U      UUUU8I%PS)8TUUUUUU (/"*%- G3=G G G  	 0/**--r   c                 j    t          | j        dd          }|t          | j        dd          }|d}|S )z;
        Returns the mamba chunk size if it exists
        mamba_chunk_sizeN
chunk_size   r  )r   r  s     r   get_mamba_chunk_sizez ModelConfig.get_mamba_chunk_size  sF    
 T02DdKK
 !4lDIIJ Jr   c                 <    | j         t          d          | j         S )z
        Get the multimodal configuration of the model.

        Raises:
            ValueError: If the model is not multimodal.
        NzThe model is not multimodal.)r   r   r  s    r   get_multimodal_configz!ModelConfig.get_multimodal_config  s%     !);<<<%%r   c                     | j         dv r/t          | j        p| j        | j        | j        | j                  }n!t          | j         | j        | j                  }|i S  |j                    S )a  
        This method attempts to retrieve the non-default values of the
        generation config for this model.

        The generation config can contain information about special tokens, as
        well as sampling parameters. Which is why this method exists separately
        to `get_diff_sampling_param`.

        Returns:
            A dictionary containing the non-default generation config.
        >   r;   rN   )ri   rr   r   )ri   r   )r   r%   ro   ra   ri   rr   r   to_diff_dict)r   r   s     r   r%   z%ModelConfig.try_get_generation_config  s     !%555.#1tz"&"8"0	  FF /&"&"8"0  F >I"v"$$$r   c                    | j         }|dk    ri n|                                  j        | j                   g d}t	          fd|D                       r+fd|D             }d|v r|                    d          |d<   ni }|r9|dk    r3t                              d|dk    rd	n|t          |          d
           |S )a  
        This method returns a dictionary containing the non-default sampling
        parameters with `override_generation_config` applied.

        The default sampling parameters are:

        - vLLM's neutral defaults if `self.generation_config="vllm"`
        - the model's defaults if `self.generation_config="auto"`
        - as defined in `generation_config.json` if
            `self.generation_config="path/to/generation_config/dir"`

        Returns:
            A dictionary containing the non-default sampling parameters.
        rN   )repetition_penaltytemperaturetop_ktop_pmin_pmax_new_tokensc              3       K   | ]}|v V  	d S r   r   r   pr   s     r   r  z6ModelConfig.get_diff_sampling_param.<locals>.<genexpr>M  s'      55qqF{555555r   c                 R    i | ]#} j         |          | j         |          $S r   )r   r  s     r   r   z7ModelConfig.get_diff_sampling_param.<locals>.<dictcomp>N  s;     # # #%&jfjmm>W:6:a==>W>W>Wr   r  
max_tokenszDefault vLLM sampling parameters have been overridden by %s: `%s`. If this is not intended, please relaunch vLLM instance with `--generation-config vllm`.r;   z$the model's `generation_config.json`local)scope)	r   r%   updater   anypopr   r   r   )r   srcavailable_paramsdiff_sampling_paramr   s       @r   get_diff_sampling_paramz#ModelConfig.get_diff_sampling_param/  s5    $f}}$*H*H*J*J 	d5666
 
 
 5555$455555 	%# # # #*:# # #
  #6664G4K4K$5 5#L1 #% 	3&==3 ;>--66S'((      #"r   c                 *    t          | j                  S )z*Extract the HF encoder/decoder model flag.)r"   rm   r  s    r   r"   zModelConfig.is_encoder_decoderf  s     "$.111r   c                 Z   | j         }t          |dd          pd| j        v pt          |dd          dk    put          |d          oet	          |j        t                    o|j                            dd          p0t	          |j        t                     ot          |j        dd          S )NalibiFBloomForCausalLMposition_encoding_typerb   attn_config)rn   r   r   r   r   r  r   r   )r   cfgs     r   
uses_alibizModelConfig.uses_alibik  s    ! C%(( !T%77s4b99WD ]++ 
 #3?D99 @O//??
 's=== E#COWeDD	
r   c                 *    t          | j                  S r   )r'   rm   r  s    r   r'   zModelConfig.uses_mrope  s    $.)))r   c                 *    t          | j                  S r   )r(   rm   r  s    r   r(   zModelConfig.uses_xdrope_dim  s    t~...r   c                     | j         d uS r   )r   r  s    r   r   zModelConfig.is_multimodal_model  s    %T11r   c                     | j         j        S r   )r   "supports_multimodal_raw_input_onlyr  s    r   "is_multimodal_raw_input_only_modelz.ModelConfig.is_multimodal_raw_input_only_model  s    BBr   c                     | j         j        S r   )r   requires_raw_input_tokensr  s    r   r  z%ModelConfig.requires_raw_input_tokens  s    99r   c                 0    | j         j        p
| j        dk    S )Nr>   )r   supports_cross_encodingr   r  s    r   is_cross_encoderzModelConfig.is_cross_encoder  s     4W8IZ8W	
r   c                     | j         j        S r   )r   supports_ppr  s    r   is_pp_supportedzModelConfig.is_pp_supported  s    ++r   c                     | j         j        S r   )r   r  r  s    r   r  zModelConfig.is_attention_free  s    11r   c                     | j         j        sdS t          | j        dd           }|d u pt	          d |D                        S )NFr  c              3   "   K   | ]
}|d k    V  dS )rQ   Nr   )r   layers     r   r  z(ModelConfig.is_hybrid.<locals>.<genexpr>  s8       .
 .
%*E[ .
 .
 .
 .
 .
 .
r   )r   r  r   rm   ro  )r   r  s     r   r  zModelConfig.is_hybrid  si    ) 	5 dnmTBBd" 
# .
 .
.9.
 .
 .
 +
 +
 '
 	
r   c                     | j         j        S r   )r   r  r  s    r   r  zModelConfig.has_noops  s    ))r   c                     | j         j        S r   )r   has_inner_stater  s    r   r  zModelConfig.has_inner_state  s    //r   c                     | j         j        S r   )r   supports_mamba_prefix_cachingr  s    r   r  z)ModelConfig.supports_mamba_prefix_caching  s    ==r   c                 *    | j         ot          j         S r   )r  envsVLLM_MLA_DISABLEr  s    r   r  zModelConfig.use_mla  s    #AD,A(AAr   c                 t    t          t          | j        dd                     pt          | j        dd          S )Nmatryoshka_dimensionsis_matryoshkaF)boolr   rm   r  s    r   r%  zModelConfig.is_matryoshka  s=    GDN,CTJJKK 
wNOUP
 P
 	
r   c                 .    t          | j        dd           S )Nr$  r   rm   r  s    r   r$  z!ModelConfig.matryoshka_dimensions  s    t~'>EEEr   c                     t          | j        dd           }|t                              d           |S t          | j        dd          S )Nuse_pad_tokenzDuse_pad_token has been deprecated; please use use_sep_token instead.use_sep_tokenT)r   rm   r   r   )r   r*  s     r   r+  zModelConfig.use_sep_token  sT    
  FF$V   ! t~===r   c                 d   t          | j        | j        | j                  }| j        dk    r2|| j        k    r't                              d| j                   | j        S |t          j        vr(t                              d|| j                   | j        S t                              d|           |S )a{  
        "head" refers to the last Linear layer(s) of an LLM,
        such as the lm_head in a generation model,
        or the score or classifier in a classification model.

        `head_dtype` currently only supports pooling models.

        - The pooling model defaults to using fp32 head,
        you can use --hf-overrides '{"head_dtype": "model"}' to disable it.
        )r   rj   r   rU   zR`head_dtype` currently only supports pooling models, fallback to model dtype [%s].zTThe current platform does not support [%s] head dtype, fallback to model dtype [%s].zhead dtype: %s)	_get_head_dtyperm   rj   r   r   r   r   supported_dtypes
debug_once)r   
head_dtypes     r   r0  zModelConfig.head_dtype  s     %>AQ
 
 

 y((Z4:-E-E0
  
 :->>>0
	   :*J777r   c                     t          | j        | j                  }||d         d         S |                                 S )Nrr   ru   out_features)r$   ra   rr   r  )r   dense_moduless     r   embedding_sizezModelConfig.embedding_size  s@    -dj4=QQQ$ $^44##%%%r   c           
      P   d }| j         dk    r;t          | j        dd          dk    r!t          | j        | j        | j                  }t          | j        | j	        ||| j
        |                                 | j        | j                  }t                              d|           |S )NrU   position_embedding_typerb   absolute)ri   rr   )rm   r   tokenizer_configrw   r   r   rx   r   zUsing max model len %s)r   r   rm   r&   rg   ri   rt   _get_and_verify_max_lenrn   r   r   r  rx   r   r   r   )r   rw   r9  s      r   r   z"ModelConfig.get_and_verify_max_len  s      	))(A2FF*TT7"&"80     
 0)"4-'#'#>2244&*&D.	
 	
 	
 	,m<<<r   c                     | j         :| j        j        }|dk    rdS t          | j        dd          }|sdn| j        j        S | j        rdS | j        rdS | j        rdS dS )	NCLSrZ   	is_causalTr]   r\   r[   rX   )	r   r   r   r   rm   	attn_typer  r  r"   )r   r   r=  s      r   r>  zModelConfig.attn_type  s    )#/H5((%~#DNKFF	-6V~~D<L<VV^ 	8# 	##$ 	$$9r   c                    | j         }| j        x}r|dk    rt                              d           dS |dk    rd|j        dv s|j        dk    r(t                              d|j        |j                   dS t                              d|j        |j                   d	S |d
k    S |d
k    rt                              d           dS t                              d           d	S )NrZ   zFPooling models with bidirectional attn do not support chunked prefill.FrX   MEANr<  STEPzQPooling models with causal attn and %s/%s pooling do not support chunked prefill.zJPooling models with causal attn and %s/%s pooling support chunked prefill.Tr[   z6Encoder decoder models do not support chunked prefill.z*Generative models support chunked prefill.r>  r   r   debugr   r   r   r>  r   s      r   is_chunked_prefill_supportedz(ModelConfig.is_chunked_prefill_supported.  s   N	 ..= (	N**6   uI%%!2oEE$5??LL:%6%6	   !5LL3%6%6	    4  111 ---UVVVuLLEFFF4r   c                 B   | j         }| j        x}r|dk    rt                              d           dS |dk    rd|j        dv s|j        dk    r(t                              d|j        |j                   dS t                              d|j        |j                   d	S dS |d
k    rt                              d           dS |dk    rt                              d           dS |dk    rt                              d           dS t                              d           d	S )NrZ   zEPooling models with bidirectional attn do not support prefix caching.FrX   r@  rB  zPPooling models with causal attn and %s/%s pooling do not support prefix caching.zIPooling models with causal attn and %s/%s pooling support prefix caching.Tr]   zTHybrid models do not support prefix caching since the feature is still experimental.r\   z\Attention free models do not support prefix caching since the feature is still experimental.r[   z5Encoder decoder models do not support prefix caching.z)Generative models support prefix caching.rC  rE  s      r   is_prefix_caching_supportedz'ModelConfig.is_prefix_caching_supported\  si   N	 ..= 4	N**5   uI%%!2oEE$5??LL9%6%6	   !5LL2%6%6	    4 5 H$$-   u...5   u///TUUUuHIIItr   c                 2    |                                  dk    S )Nr   )r  r  s    r   r'  zModelConfig.is_moe  s    ##%%))r   c                 2    t          | j        dd           d uS )Nr_  r(  r  s    r   is_quantizedzModelConfig.is_quantized  s    t~'<dCC4OOr   )r   r`   r   r`   )r   N)rQ   )r  
__module____qualname____doc__ra   r   __annotations__rc   rd   RunnerOptionre   ConvertOptionr   rg   rh   TokenizerModeri   r&  rj   
ModelDTypetorchrk   r  r   rm   r5   rn   ro   rp   rq   listrr   rs   rt   rw   rx   ry   r8   rz   r{   r|   r~   r   LogprobsModer   r   r   r   r   r   r   r   r   r   HfOverridesr   r   r   r   r   r   	ModelImplr   r   r  r9   r   r   r   r   r   r   r   r   r   r   r   rH   r   r   r   r@   r   r   r   r3   r   r   r   r   r   r   r  r   r   r   classmethodr   r  r  r  r   r  r,  r1  propertyr   r   r.  r   r   r   rC  r   ConvertTyperF  r   r   r   r   rt  r   r6   r  r7   r  r  r  r  r  r  r   r  r  r  r  r  r  r  tupler  r  LayerBlockTyper  r  r  r%   r   r"   r  r'   r(   r   r  r  r  r  r  r  r  r  r  r  r%  r$  r+  r0  r5  r   AttnTypeStrr>  rF  rH  r'  rK  r   r   r   r`   r`   d   s         '&"E3""" M3 "FL!!!L#G]###C U4(((Is((("*0NMC'000; $t###&,E:#,,,' D#MMM(
 #(%U"3"3"3I333/',u%'8'8'8N$888X!%NC$J%%%"$&c&&&+ /349t+222HcDjE $M3:$$$  &*d
)))  t333M3333
K -1sTz000D59L%+d2999/ +0!4///;M4 */ $...+L#M #1M<000 $)D(((H "'$&&&
 !&%%%& "'$&&&0
 15sT#Y-444 )/M3%...;
 #'HdSj4&&&% !&d ; ; ;L+;;;M+/cDj///7 $s###J 27t1L1L1LS#XLLLJ $t###%"(Ji((( ,0cDj///&BFtC$"7784?FFF&*t***: *.M<$&--- 26'$.555.KOc3c3h+?&?!@4!GHOOO-1gdTk*111AEOWT#tCH~"56=>EEE:>c3h$!67>>>37754<0777;?W[4%78???;?#WS4Z%8???,0OWTD[)000:>$ 67>>>JNW%9C%?$%FGNNN2674$;/666.2wtd{+22204-4443%c 3% 3% 3% 3%j, 4S>1, c3h, 
	, , , ,8, , S>, 
	, , , ,"W" "#sT#s(^';";<tCW" +	W"
 c4S>12T9W" "#s(^d2W"  %t|W" "-t!3W" *-tW" W" ,d2W" "6!;d!BW"  $d{W"  $;W" "DLW"  
!W" W" W" W"r#	 # # # # _[/???#  S    [ @? _%G444&s &s & & & 54& _^(333     [ 43
 _'"""   #" s    64D 4 4 4 4 ' ' X' 4tCy 4 4 4 X4 "c " " " X".:# .:# .:RV .: .: .: .:`O O OCy 
   0Cy  
	   (Cy   
	   >Cy   	
 
   8a a a aF	& 	& 	& 	&& & & &8   5 5 5" 
   .0'0 
0 0 0 0dDC$J D D D D1 1 1 1 12 2 2 2 2
 
 
 
 
 6 6 6 6 X6 	@ 	@ 	@ 	@ _	@0s 0 0 0 09 9 9 9 9R R3 R R R RA~ A# A A A A2 2 2 2 2>S > > > >-	sCx   n      &1C C'C #C 
	C C C CJcDj    "
&'7 
& 
& 
& 
&%4S> % % % %>5#c3h 5# 5# 5# 5#n 2D 2 2 2 X2 
D 
 
 
 X
, *D * * * X* / / / / X/ 2T 2 2 2 X2 CD C C C XC :4 : : : X: 
$ 
 
 
 X

 , , , , X, 24 2 2 2 X2 
4 
 
 
 X
 *4 * * * X* 0 0 X0 >t > > > X> B B B B XB 
t 
 
 
 X

 F F XF >t > > > X> !EK ! ! ! X!F & & X&C    4 ;    X" +d + + + X+Z 7T 7 7 7 X7r * * * * X* Pd P P P XP P Pr   r`   ra   r   c                 H    |s| S t          |t                    r|d         S |S )a  
    If the input is a non-empty list, the first model_name in
    `served_model_name` is taken.
    If the input is a non-empty string, it is used directly.
    For cases where the input is either an empty string or an
    empty list, the fallback is to use `self.model`.
    r   )r   rU  )ra   r   s     r   r   r     s5      #T** $ ##r   ))r&  rT   r<   )ForConditionalGenerationr`  )	ChatModelr`  )LMHeadModelr`  )ForTextEncodingrU   r=   )r$  re  )r%  rU   r>   )ForTokenClassificationrf  )ForAudioClassificationrf  )ForImageClassificationrf  )ForVideoClassificationrf  )ClassificationModelrf  )ForRewardModelingre  )RewardModelre  )Modelre  _SUFFIX_TO_DEFAULTSc               #   $   K   t           E d {V  d S r   )ro  r   r   r   iter_architecture_defaultsrq    s$      """"""""""r   )r   r   r.  r   r   r   c                    t                      D ]5\  }\  }}|||k    r%|||k    r|                     |          r|||ffc S 6d S r   )rq  endswith)r.  r   r   suffixdefault_runner_typedefault_convert_types         r   r(  r(    s     
$	%	%	G 	G 	   K3F$F$F%9M)M)M%%f-- *N /1EFFFFF4r   )rE   rF   rH   rI   rG   r  c                 6    t                               |           S r   )_STR_DTYPE_TO_TORCH_DTYPEr   )r  s    r   str_dtype_to_torch_dtypery    s    $((...r   z>Numerical instability. Please use bfloat16 or float32 instead.)gemma2r  gemma3_textplamo2glm4r
  rj   c                 <    | t           v r|t          j        k    rdS dS )NFT)_FLOAT16_NOT_SUPPORTED_MODELSrT  rF   )r
  rj   s     r   _is_valid_dtyper    s$    222u7M7Mu4r   c                 |    | t           v r2|t          j        k    r"t           |          }t          d| d|           dS )NzThe model type z# does not support float16. Reason: T)r  rT  rF   r   )r
  rj   reasons      r   _check_valid_dtyper    sP    222u7M7M.z:WjWWvWW
 
 	
 4r   config_dtyper   c                n    ddl m}  fd|j        D             }|rt          j        |v rt          j        }n|d         }|t          j        k    r|}||v r|S  |j                    } |j                    }||}n|                                }	|d|	 d}t          
                    d|||           |S )Nr   r   c                 4    g | ]}t          |          |S r   )r  )r   rj   r
  s     r   rZ  z'_resolve_auto_dtype.<locals>.<listcomp>  s9       :u--  r   z (with compute capability )zHYour device %s doesn't support %s. Falling back to %s for compatibility.)r   r   r.  rT  rF   rI   get_device_nameget_device_capabilityas_version_strr   rH  )
r
  r  r   r   r.  preferred_dtypedevice_namedevice_capability
device_strversion_strs
   `         r   _resolve_auto_dtyper    s    0/////   %6    .EM-===-*1- u}$$&''' 3"244K>(>@@ #'

'6688#OOOOO

NNR	   r   r2  model_idr   rr   c                   t          j        || |          }|j        }t          |t                    rV|                                }|dk    rt          |||          }nX|t          vrt          d|          t          |         }n/t          |t          j
                  r|}nt          d|           t          ||           ||k    rv|t          j        k    rt                              d||           nI|t          j        k    rt                              d||           nt                              d||           |S )Nr2  r;   )r   Unknown dtype: zUpcasting %s to %s.zDowncasting %s to %s.zCasting %s to %s.)r.   get_torch_dtyper
  r   r   r  r  rx  r   rT  rj   r  rI   r   r   rH  )r  r   rj   r   rr   r  r
  torch_dtypes           r   r   r   3  s^    0?8  L "J% 4F??-!1  KK 555 !<5!<!<===3E:KK	E5;	'	' 42522333z;///l""%-''KK-|[IIIIU]**KK/{KKKK NN.kJJJr   c                    t          | dd           }|dk    r|S t          |t                    r<|                                }|t          vrt          d|          t          |         S t          |t          j                  r|S |.t          j        t          j
        vr|S |dk    rt          j        S |S t          d|           )Nr0  ra   r  rU   )r   r   r   r  rx  r   rT  rj   rI   r   r.  )r   rj   r   r0  s       r   r-  r-  b  s     ,36<+N+NJW	J	$	$ 9%%''
666=z==>>>(44	J	,	, 	9		= 0 AAAL)##= 7:77888r   rm   r   r9  rw   r   r   rx   r   c           	         |j         \  }}	|r|
||k     rd}	|}|r&|                    d|          }
t          ||
          }|t          d          k    r'||S ||S d}t                              d|           |}t          | dd          }|rt          |          sd|i}|Wd	| j        vrNd
}|	                                D ]2}|d         }|dvr$|                    d|          }|dk    r|d         }3||z  }|rd|v r|d         }||dk    rp|Jt          d |	                                D                       rt          t          | d|                    }nt          |          }t          j        |          }nm||k    rgt          | dd          }|||k    rNd| d|	 d| d| d	}d}t          j        rt                              d||           nt#          | d|           t          |          S )z*Get and verify the model's maximum length.Nr   model_max_lengthinfr  zThe model's config.json does not contain any of the keys to determine the original maximum length of the model. Assuming the model's maximum length is %d.rope_parametersrb   r  g      ?	rope_type)sulongropellama3factoryarn original_max_position_embeddingsmax_seq_lengthru   c              3   .   K   | ]}|d          dk    V  dS )r  r  Nr   )r   rps     r   r  z*_get_and_verify_max_len.<locals>.<genexpr>  s<       /
 /
.0B{Oz)/
 /
 /
 /
 /
 /
r   zUser-specified max_model_len (z-) is greater than the derived max_model_len (=z or model_max_length=z in model's config.json).a1  VLLM_ALLOW_LONG_MAX_MODEL_LEN must be used with extreme caution. If the model uses relative position encoding (RoPE), positions exceeding derived_max_model_len lead to nan. If the model uses absolute position encoding, positions exceeding derived_max_model_len will cause a CUDA array out-of-bounds error.z%s %szT To allow overriding this maximum, set the env var VLLM_ALLOW_LONG_MAX_MODEL_LEN=1. )derived_max_model_len_and_keyr   minrH   r   rH  r   r#   r
  valuesr  r  r   check_max_model_lenr!  VLLM_ALLOW_LONG_MAX_MODEL_LENr   r   )rm   r   r9  rw   r   r   rx   r   derived_max_model_lenmax_len_keytokenizer_model_max_lengthdefault_max_lenr  scaling_factorr  r  r  msgrH  s                      r   r:  r:  z  s2    	7 )K 	/&222& .  W%5%9%9 5&
 &
" !$$9;U V V e,,$  $0 -,9 		
 	
 	
 !0 i):DAAO 08II 0/ "xy7K'K'K!((** 	S 	SB ;I <<< "$.!A!A&&,./Q,R)/ A*n<< ./? @
  3 3 &3 /
 /
4C4J4J4L4L/
 /
 /
 ,
 ,
&  ACX  MM   566M(<]KK 
.	.	. #9.@$GG#}7G'G'G? ? ?3>? ?(? ? $? ? ?   1 ##GS'::::  N NDKN N   }r   )NN)r   collections.abcr   dataclassesr   r   	functoolsr   typingr   r   r	   r
   r   rT  pydanticr   r   r   r   pydantic.dataclassesr   	vllm.envsr!  vllm.config.model_archr   vllm.config.multimodalr   r   r   vllm.config.poolerr   vllm.config.schedulerr   r   r   r   vllm.loggerr   r   r   vllm.transformers_utils.configr   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   "vllm.transformers_utils.gguf_utilsr)   r*   r+   r,   3vllm.transformers_utils.model_arch_config_convertorr-   r.   #vllm.transformers_utils.runai_utilsr/   r0   vllm.transformers_utils.utilsr1   vllm.utils.import_utilsr2   #vllm.v1.attention.backends.registryr3   rO   r5   'vllm.model_executor.layers.quantizationr:   layersry   r]  vllm.model_executor.modelsmodelsr3  vllm.config.loadr6   vllm.config.parallelr7   r8   vllm.v1.sample.logits_processorr9   globalsr  r   rP  r[  rQ  rR  rS  rV  r   r   rW  rX  r]  rW   rU  rO  r^  r`   r   ro  r\  rq  r(  rF   rI   rG   rx  ry  r  rj   r  r  r&  r  r   r-  r  r:  r   r   r   <module>r     s    $ $ $ $ $ $ & & & & & & & & % % % % % % > > > > > > > > > > > > > >  H H H H H H H H H H H H * * * * * *            R Q Q Q Q Q Q Q Q Q + + + + + + , , , , , , 2 2 2 2 2 2 2 2 # # # # # # + + + + + +                                                U T T T T T T T > > > > > > . . . . . . D D D D D D ------>>>>>>>>>>>>222222222++++++333333KKKKKK???????z''))%N H 
+WWYY8TUUIJNO	X		vz)*NO+,GHNO
J 38nx)9(:<L(LMM@A	AB ...9 9 $z4#445    W
 
**T:::;;;vP vP vP vP vP vP vP <; vPr1 tCy49O    $I I I T%U:{+B%C CDE   (# # # &*'+	   d" $	
 3j+-../$6   * M}]}  /3 / / / / ONSNL! !  EK    3 u{    ++++ 	+ + + +h  , , ,,, ,
 , Dj, [, , , ,^99%*[9?B9
[9 9 9 9> -1!%@ @@.@ Tk@ :	@
 !@ $J@  #Tz@ $J@ 	@ @ @ @ @ @r   