
    .`i                        d Z ddlZddlZddlZddlZddlZddlZddlZddlm	Z	m
Z
 ddlmZmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlZdd	lmZ dd
lm Z m!Z!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* erddl+m,Z, ddl-m.Z.m/Z/ neZ,eZ.eZ/ddl0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z< ddl=m>Z>m?Z?m@Z@mAZAmBZB  e$eC          ZDi dddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d,d.d/d0d/d1di d2d3d4d5d6d7d8d9d:d;d<d;d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRi dSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdti dudvdwdxdydzd{dzd|d}d~dddddddddddddddddddddddi ddddddddddddddNddddddddddddddddddddi ddddddddddddddddddddddÓddœddǓddɓdd˓dd͓ddϓi ddѓddӓddՓddדddٓddۓddݓddߓddddddLdddddddddddddddddddZEddd5dVdd^ddxdddddddd eEF                                D             i d dddddddӓddݓddݓdddd	d
ddddddddddddddddddddddZGddddd d!d"d#d#d$	ZHi d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFi dGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d\d^d_d`dadbdcdddedfdgi dhdidjdkdldmdndodpdqdrdsdtdudvdwdxdydddzd{d|d}d~dddddddddi dddddddddddddddddddddddddddddddddddddddddddddddZIi ddddddddddddddddddddÓdĐdœdƐdǓdȐdɓdʐd˓d̐d͓dΐdϓdАdѓdҐdӐdԜZJdՐd֐dלZKdՐdؐd֐dِdڐdېdܐdݐdސdߐd
ZLi eEeGeHeIeJeKeLZMejN        ddgZOddddddddddd
ZP ed           G d d                      ZQ G d de	          ZR ed           G d deR                      ZS ed           G d deR                      ZT ed          deUdeRdeVejW                 dz  fd            ZX ed          deUdeRdeQdz  fd            ZYe G d d                      ZZ eZd eMF                                D                       Z[ ed          Z\deg e\f         de\fdZ]d dZ^eCdk    r e^             dS dS (  z
Whenever you add an architecture to this page, please also update
`tests/models/registry.py` with example HuggingFace models for it.
    N)ABCabstractmethod)CallableSet)asdict	dataclassfield)	lru_cache)Path)TYPE_CHECKINGAnyTypeVar)envs)ModelConfigiter_architecture_defaultstry_match_architecture_defaults)init_logger)logtime)!try_get_class_from_dynamic_module)	safe_hash)AttnTypeStr)SequencePoolingTypeTokenPoolingType   )has_inner_state	has_noopsis_attention_free	is_hybridrequires_raw_input_tokenssupports_cross_encodingsupports_mamba_prefix_cachingsupports_multimodal#supports_multimodal_encoder_tp_data"supports_multimodal_raw_input_onlysupports_ppsupports_transcription)get_attn_typeget_default_seq_pooling_typeget_default_tok_pooling_typeis_pooling_modelis_text_generation_modelAfmoeForCausalLM)afmoer,   ApertusForCausalLM)apertusr.   AquilaModel)llamaLlamaForCausalLMAquilaForCausalLMArceeForCausalLM)arceer4   ArcticForCausalLM)arcticr6   BaiChuanForCausalLM)baichuanr8   BaichuanForCausalLM)r9   r:   BailingMoeForCausalLM)bailing_moer;   BailingMoeV2ForCausalLM)r<   r=   BambaForCausalLM)bambar>   BloomForCausalLM)bloomr@   ChatGLMModel)chatglmChatGLMForCausalLMChatGLMForConditionalGenerationCohereForCausalLM)commandrrF   Cohere2ForCausalLMCwmForCausalLMDbrxForCausalLM)dbrxrJ   DeciLMForCausalLM)nemotron_nasrL   DeepseekForCausalLM)deepseek_v2rN   DeepseekV2ForCausalLM)rO   rP   DeepseekV3ForCausalLM)rO   rQ   DeepseekV32ForCausalLMDots1ForCausalLM)dots1rS   Ernie4_5ForCausalLM)ernie45rU   Ernie4_5_MoeForCausalLM)ernie45_moerW   ExaoneForCausalLM)exaonerY   Exaone4ForCausalLM)exaone4r[   ExaoneMoEForCausalLM)
exaone_moeExaoneMoeForCausalLMFairseq2LlamaForCausalLM)fairseq2_llamar`   FalconForCausalLM)falconrb   FalconMambaForCausalLM)mambaMambaForCausalLMFalconH1ForCausalLM)	falcon_h1rg   FlexOlmoForCausalLM)	flex_olmori   GemmaForCausalLM)gemmark   Gemma2ForCausalLM)gemma2rm   Gemma3ForCausalLM)gemma3ro   Gemma3nForCausalLM)gemma3nrq   Qwen3NextForCausalLM)
qwen3_nextrs   GlmForCausalLM)glmru   Glm4ForCausalLM)glm4rw   Glm4MoeForCausalLM)glm4_moery   Glm4MoeLiteForCausalLM)glm4_moe_liter{   GptOssForCausalLM)gpt_ossr}   GPT2LMHeadModel)gpt2r   GPTBigCodeForCausalLM)gpt_bigcoder   GPTJForCausalLM)gpt_jr   GPTNeoXForCausalLM)gpt_neoxr   GraniteForCausalLM)graniter   GraniteMoeForCausalLM)
granitemoer   GraniteMoeHybridForCausalLM)granitemoehybridr   GraniteMoeSharedForCausalLM)granitemoesharedr   GritLM)gritlmr   Grok1ModelForCausalLM)grok1GrokForCausalLMGrok1ForCausalLMHunYuanMoEV1ForCausalLM)
hunyuan_v1r   HunYuanDenseV1ForCausalLM)r   r   HCXVisionForCausalLM)hyperclovax_visionr   InternLMForCausalLMInternLM2ForCausalLM)	internlm2r   InternLM2VEForCausalLM)internlm2_ver   InternLM3ForCausalLMIQuestCoderForCausalLMIQuestLoopCoderForCausalLM)iquest_loopcoderr   JAISLMHeadModel)jaisr   Jais2ForCausalLM)jais2r   JambaForCausalLM)jambar   KimiLinearForCausalLM)kimi_linearr   Lfm2ForCausalLM)lfm2r   Lfm2MoeForCausalLM)lfm2_moer   r2   Llama4ForCausalLM)llama4r   LLaMAForCausalLMLongcatFlashForCausalLM)longcat_flashr   rf   Mamba2ForCausalLM)mamba2r   MiniCPMForCausalLM)minicpmr   MiniCPM3ForCausalLM)minicpm3r   MiniMaxForCausalLM)minimax_text_01MiniMaxText01ForCausalLMr   MiniMaxM1ForCausalLMMiniMaxM2ForCausalLM)
minimax_m2r   MistralForCausalLM)mistralr   MistralLarge3ForCausalLM)mistral_large_3r   MixtralForCausalLM)mixtralr   MptForCausalLM)mptMPTForCausalLMr   MiMoForCausalLM)mimor   MiMoV2FlashForCausalLM)mimo_v2_flashr   NemotronForCausalLM)nemotronr   NemotronHForCausalLM)
nemotron_hr   OlmoForCausalLM)olmor   Olmo2ForCausalLM)olmo2r   Olmo3ForCausalLMOlmoeForCausalLM)olmoer   OPTForCausalLM)optr   OrionForCausalLM)orionr   OuroForCausalLM)ouror   PanguEmbeddedForCausalLM)	openpangur   PanguProMoEV2ForCausalLM)r   r   PanguUltraMoEForCausalLM)r   r   PersimmonForCausalLM)	persimmonr   PhiForCausalLM)phir   Phi3ForCausalLM)phi3r   PhiMoEForCausalLM)phimoer   Plamo2ForCausalLM)plamo2r   Plamo3ForCausalLM)plamo3r   QWenLMHeadModel)qwenr   Qwen2ForCausalLM)qwen2r   Qwen2MoeForCausalLM)	qwen2_moer   Qwen3ForCausalLM)qwen3r   Qwen3MoeForCausalLM)	qwen3_moer   RWForCausalLMSeedOssForCausalLM)seed_ossr   Step1ForCausalLM)step1r   Step3TextForCausalLM)
step3_textr   StableLMEpochForCausalLM)stablelmStablelmForCausalLMStableLmForCausalLMStarcoder2ForCausalLM)
starcoder2r  )solarSolarForCausalLM)	telechat2TeleChat2ForCausalLM)teleflmTeleFLMForCausalLM)zamba2Zamba2ForCausalLM)r  TeleChatForCausalLMr  r
  XverseForCausalLMr  )bertBertEmbeddingModel)r  BertSpladeSparseEmbeddingModel)rp   Gemma3Model)r   GPT2ForSequenceClassification)bert_with_ropeSnowflakeGteNewModel)r  GteNewModel)r   InternLM2ForRewardModel)r   JambaForSequenceClassification)r1   LlamaBidirectionalModel)	BertModelr  rL   Gemma2ModelGemma3TextModelru   r  r   GteModelr  r  r  r  
LlamaModelc                 0    i | ]\  }\  }}|d k    |||fS )r2    ).0kmodarchs       w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/registry.py
<dictcomp>r&     s>        A{T%%% 	
C;%%%    MistralModelModernBertModel)
modernbertr)  NomicBertModel)r  r+  
Qwen2ModelQwen2ForRewardModel)qwen2_rmr-  Qwen2ForProcessRewardModel)r.  r/  RobertaForMaskedLM)robertaRobertaEmbeddingModelRobertaModelr  r  XLMRobertaModelBgeM3EmbeddingModel)r1  r5  	CLIPModel)clipCLIPEmbeddingModel!LlavaNextForConditionalGeneration)
llava_nextr9  Phi3VForCausalLM)phi3vr;  )qwen2_vlQwen2VLForConditionalGeneration)siglipSiglipEmbeddingModel)
terratorch
Terratorch)r>  SiglipModelPrithviGeoSpatialMAErB  )r  BertForSequenceClassification)r  BertForTokenClassification)r  GteNewForSequenceClassification)jina_vlJinaVLForSequenceClassification)r1   +LlamaBidirectionalForSequenceClassification)r*  #ModernBertForSequenceClassification)r*   ModernBertForTokenClassification)r1   RobertaForSequenceClassification)	rE  rF  rG  JinaVLForRankingrJ  rK  rL  rM  #XLMRobertaForSequenceClassificationAriaForConditionalGeneration)ariarP  &AudioFlamingo3ForConditionalGeneration)audioflamingo3rR  !AyaVisionForConditionalGeneration)
aya_visionrT  BagelForConditionalGeneration)bagelrV  BeeForConditionalGeneration)beerX  Blip2ForConditionalGeneration)blip2rZ  !ChameleonForConditionalGeneration)	chameleonr\  %Cohere2VisionForConditionalGeneration)cohere2_visionr^  DeepseekVLV2ForCausalLM)deepseek_vl2r`  DeepseekOCRForCausalLM)deepseek_ocrrb  DotsOCRForCausalLM)dots_ocrrd  #Eagle2_5_VLForConditionalGeneration)eagle2_5_vlrf  &Ernie4_5_VLMoeForConditionalGeneration)
ernie45_vlrh  FuyuForCausalLM)fuyurj  Gemma3ForConditionalGeneration)	gemma3_mmrl  Gemma3nForConditionalGeneration)
gemma3n_mmrn  GlmAsrForConditionalGeneration)glmasrrp  GLM4VForCausalLM)glm4vrr  Glm4vForConditionalGeneration)glm4_1vrt   Glm4vMoeForConditionalGeneration)ru  rv  %GraniteSpeechForConditionalGeneration)granite_speechrw  H2OVLChatModel)h2ovlry  !HunYuanVLForConditionalGeneration)hunyuan_visionr{  StepVLForConditionalGeneration)step_vlr}  InternVLChatModel)internvlr  NemotronH_Nano_VL_V2)nano_nemotron_vlr  OpenCUAForConditionalGeneration)opencuar   InternS1ForConditionalGeneration)interns1r   InternVLForConditionalGeneration Idefics3ForConditionalGeneration)idefics3r  IsaacForConditionalGeneration)isaacr  SmolVLMForConditionalGeneration)smolvlmr  KananaVForConditionalGeneration)kanana_vr  KeyeForConditionalGeneration)keyer  !KeyeVL1_5ForConditionalGeneration)
keye_vl1_5r  RForConditionalGeneration)rvlr  KimiVLForConditionalGeneration)kimi_vlr  KimiK25ForConditionalGeneration)kimi_k25r  "LightOnOCRForConditionalGeneration)
lightonocrr  Lfm2VlForConditionalGeneration)lfm2_vlLfm2VLForConditionalGenerationLlama_Nemotron_Nano_VL)nemotron_vlLlamaNemotronVLChatModelLlama4ForConditionalGeneration)mllama4r  LlavaForConditionalGeneration)llavar  &LlavaNextVideoForConditionalGeneration)llava_next_videor  &LlavaOnevisionForConditionalGeneration)llava_onevisionr  MantisForConditionalGeneration)r  r  MiDashengLMModel)midashenglmr  #MiniMaxVL01ForConditionalGeneration)minimax_vl_01r  MiniCPMO)minicpmor  MiniCPMV)minicpmvr   Mistral3ForConditionalGeneration)mistral3r  MolmoForCausalLM)molmor  Molmo2ForConditionalGeneration)molmo2r  NVLM_D)nvlm_dNVLM_D_ModelOvis)ovisr  Ovis2_5)ovis2_5r  #PaddleOCRVLForConditionalGeneration)paddleocr_vlr  !PaliGemmaForConditionalGeneration)	paligemmar  Phi4MMForCausalLM)phi4mmr  PixtralForConditionalGeneration)pixtralr  QwenVLForConditionalGeneration)qwen_vlr  r>  "Qwen2_5_VLForConditionalGeneration)
qwen2_5_vlr  "Qwen2AudioForConditionalGeneration)qwen2_audior  Qwen2_5OmniModel)qwen2_5_omni_thinker*Qwen2_5OmniThinkerForConditionalGeneration#Qwen2_5OmniForConditionalGeneration)qwen3_omni_moe_thinker+Qwen3OmniMoeThinkerForConditionalGeneration)qwen3_vlQwen3VLForConditionalGeneration)qwen3_vl_moe"Qwen3VLMoeForConditionalGeneration)
skyworkr1vSkyworkR1VChatModel)step3_vlStep3VLForConditionalGeneration)tarsierTarsierForConditionalGeneration)r=   Tarsier2ForConditionalGeneration)ultravoxUltravoxModel)voxtralVoxtralForConditionalGeneration)voxtral_streamingVoxtralStreamingGeneration)nemotron_parse%NemotronParseForConditionalGeneration)whisperWhisperForConditionalGeneration)$Qwen3OmniMoeForConditionalGenerationr  r  r  r  r  r  r  r  r  r  r  MiMoMTPModel)mimo_mtpMiMoMTPEagleLlamaForCausalLM)llama_eagler  EagleLlama4ForCausalLM)llama4_eagler  EagleMiniCPMForCausalLM)minicpm_eagler  Eagle3LlamaForCausalLM)llama_eagle3r  LlamaForCausalLMEagle3Eagle3Qwen2_5vlForCausalLMEagle3Qwen3vlForCausalLMEagleMistralLarge3ForCausalLM)mistral_large_3_eagler  EagleDeepSeekMTPModel)deepseek_eagleEagleDeepseekV3ForCausalLMDeepSeekMTPModel)deepseek_mtpDeepSeekMTPErnieMTPModel)	ernie_mtpErnieMTPExaoneMoeMTP)exaone_moe_mtpr  LongCatFlashMTPModel)longcat_flash_mtpLongCatFlashMTPGlm4MoeMTPModel)glm4_moe_mtp
Glm4MoeMTPGlm4MoeLiteMTPModel)glm4_moe_lite_mtpGlm4MoeLiteMTPMedusaModel)medusaMedusa)openpangu_mtpOpenPanguMTP)qwen3_next_mtpQwen3NextMTP)OpenPanguMTPModelr  )transformersTransformersForCausalLM)r  !TransformersMultiModalForCausalLM)SmolLM3ForCausalLMEmu3ForConditionalGeneration)r  TransformersMoEForCausalLM)r  $TransformersMultiModalMoEForCausalLM)r  TransformersEmbeddingModel)r  TransformersMoEEmbeddingModel)r  $TransformersMultiModalEmbeddingModel)r  %TransformersForSequenceClassification)r  (TransformersMoEForSequenceClassification)r  /TransformersMultiModalForSequenceClassification)
r  r  r  r  r  r  r  r   r!  r"  z-mz#vllm.model_executor.models.registryz0.10.2z0.9.2z0.12.0)
MotifForCausalLMPhi3SmallForCausalLMPhi4FlashForCausalLMPhi4MultimodalForCausalLM	BartModelBartForConditionalGenerationDonutForConditionalGeneration!Florence2ForConditionalGenerationMBartForConditionalGenerationMllamaForConditionalGenerationT)frozenc                      e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   eed
<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   e	de
ej                 dd fd            ZdS )
_ModelInfoarchitecturer+   r*   	attn_typedefault_seq_pooling_typedefault_tok_pooling_typer    r"   r$   r   r#   r%   r   r   r   r   r!   r&   supports_transcription_onlymodelreturnc                 t   t          di d| j        dt          |           dt          |           dt	          |           dt          |           dt          |           dt          |           dt          |           d	t          |           d
t          |           dt          |           dt          |           dt          |           dt          |           dt          |           dt!          |           dt#          |           dt#          |           o| j        dt'          |           S )Nr0  r+   r*   r2  r3  r1  r    r"   r$   r   r#   r%   r   r   r   r!   r&   r4  r   r   )r/  __name__r+   r*   r(   r)   r'   r    r"   r$   r   r#   r%   r   r   r   r!   r&   r4  r   )r5  s    r%  from_model_clsz_ModelInfo.from_model_clsG  s    
 
 

%=e%D%D%D
 .e444
 &B%%H%H%H	

 &B%%H%H%H
 $E***
 %<E$B$B$B
 !4E : : :
 0R0 0 0
 '@&F&F&F
 1T1 1 1
  $E***!
" ,E222#
$ 0666%
&  &&&'
( +H*N*N*N)
* $:%#@#@#@+
. 'u--S%2S/
2  &&&3
 	
r'  N)r8  
__module____qualname__str__annotations__boolr   r   r   staticmethodtypennModuler9  r   r'  r%  r/  r/  1  s&        """"1111....!!!!(,,,,####)----OOOOOO#''''    !%%%%
d29o 
, 
 
 
 \
 
 
r'  r/  c                   \    e Zd Zedefd            Zedeej                 fd            Z	dS )_BaseRegisteredModelr6  c                     t           NNotImplementedErrorselfs    r%  inspect_model_clsz&_BaseRegisteredModel.inspect_model_clsg      !!r'  c                     t           rF  rG  rI  s    r%  load_model_clsz#_BaseRegisteredModel.load_model_clsk  rL  r'  N)
r8  r:  r;  r   r/  rK  r@  rA  rB  rN  r   r'  r%  rD  rD  f  sf        ": " " " ^" "RY " " " ^" " "r'  rD  c                       e Zd ZU dZeed<   eej                 ed<   e	deej                 fd            Z
defdZdeej                 fdZdS )	_RegisteredModelzP
    Represents a model that has already been imported in the main process.
    
interfaces	model_clsc                 T    t          t                              |           |           S )N)rQ  rR  )rP  r/  r9  rR  s    r%  r9  z_RegisteredModel.from_model_clsy  s-    !00;;
 
 
 	
r'  r6  c                     | j         S rF  )rQ  rI  s    r%  rK  z"_RegisteredModel.inspect_model_cls  s
    r'  c                     | j         S rF  rT  rI  s    r%  rN  z_RegisteredModel.load_model_cls  s
    ~r'  N)r8  r:  r;  __doc__r/  r=  r@  rA  rB  r?  r9  rK  rN  r   r'  r%  rP  rP  p  s           BI
$ry/ 
 
 
 \
:    RY      r'  rP  c                       e Zd ZU dZeed<   eed<   edefd            ZdefdZ	dede
dz  fd	Zd
e
deddfdZ eed          de
fd            Zdeej                 fdZdS )_LazyRegisteredModelzL
    Represents a model that has not been imported in the main process.
    module_name
class_namer6  c                  :    t          t          j                  dz  S )N
modelinfos)r   r   VLLM_CACHE_ROOTr   r'  r%  _get_cache_dirz#_LazyRegisteredModel._get_cache_dir  s    D())L88r'  c                 V    | j          d| j                             dd          }| dS )N-.z.json)rZ  r[  replace)rJ  cls_names     r%  _get_cache_filenamez(_LazyRegisteredModel._get_cache_filename  s9    &::::BB3LL!!!!r'  module_hashNc                 <   	 	 |                                  |                                 z  }t          |d          5 }t          j        |          }d d d            n# 1 swxY w Y   n7# t
          $ r* t                              d| j        | j	                   Y d S w xY w|d         |k    r(t                              d| j        | j	                   d S t          di |d         S # t          $ r* t                              d| j        | j	                   Y d S w xY w)	Nutf-8encodingz0Cached model info file for class %s.%s not foundhashz/Cached model info file for class %s.%s is stale	modelinfoz)Cached model info for class %s.%s error. r   )r_  re  openjsonloadFileNotFoundErrorloggerdebugrZ  r[  r/  	Exception)rJ  rf  modelinfo_pathfilemi_dicts        r%  _load_modelinfo_from_cachez/_LazyRegisteredModel._load_modelinfo_from_cache  s   	
!%!4!4!6!69Q9Q9S9S!S.7;;; .t"iooG. . . . . . . . . . . . . . .$   F$O  
 tt v+--E$O  
 t 55 4555 	 	 	LL;   
 44	sX   :A* AA* A""A* %A"&A* )C' *0BC' B5C' C' '0DDmic                    ddl m} 	 |t          |          d}|                                 }|                    dd           ||                                 z  } ||d          5 }t          j        ||d	           d
d
d
           d
S # 1 swxY w Y   d
S # t          $ r t          
                    d           Y d
S w xY w)z"save dictionary json file to cacher   )atomic_writer)rk  rl  T)parentsexist_okrh  ri     )indentNzError saving model info cache.)-vllm.model_executor.model_loader.weight_utilsrz  r   r_  mkdirre  rn  dumprs  rq  	exception)rJ  rx  rf  rz  modelinfo_dict	cache_dirrt  fs           r%  _save_modelinfo_to_cachez-_LazyRegisteredModel._save_modelinfo_to_cache  s6   OOOOOO	?##BZZ N ++--IOOD4O888&)A)A)C)CCN~@@@ 7A	.!A66667 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 	? 	? 	?=>>>>>>	?s6   A!B )BB BB BB $CCzRegistry inspect model class)rq  msgc                     t          t                    j         j                            d          d          dz  }d }|                                rt          |d          5 }t          |                                d          	                                }d d d            n# 1 swxY w Y    
                    |          }|(t                              d j         j                   |S t                              d j         j                   t           fd	          }t                              d
 j         j                   |                     ||           |S )Nrb  z.pyrbF)usedforsecurityz,Loaded model info for class %s.%s from cachez=Cache model info for class %s.%s miss. Loading model instead.c                  \    t                                                                          S rF  )r/  r9  rN  rI  s   r%  <lambda>z8_LazyRegisteredModel.inspect_model_cls.<locals>.<lambda>  s!    J--d.A.A.C.CDD r'  z!Loaded model info for class %s.%s)r   __file__parentrZ  splitexistsrm  r   read	hexdigestrw  rq  rr  r[  _run_in_subprocessr  )rJ  
model_pathrf  r  rx  s   `    r%  rK  z&_LazyRegisteredModel.inspect_model_cls  s   (^^*0@0F0Fs0K0KB0O-T-T-TT
 	j$'' U1'%HHHRRTTU U U U U U U U U U U U U U U 00==B~B$O  
 	S$O    DDDD
 
 	/1A4?	
 	
 	

 "))"k:::	s   %6B''B+.B+c                 ^    t          j        | j                  }t          || j                  S rF  )	importlibimport_modulerZ  getattrr[  )rJ  r#  s     r%  rN  z#_LazyRegisteredModel.load_model_cls  s'    %d&677sDO,,,r'  )r8  r:  r;  rW  r<  r=  r?  r   r_  re  r/  rw  r  r   rq  rK  r@  rA  rB  rN  r   r'  r%  rY  rY    s#          OOO9D 9 9 9 \9"S " " " "c j4>O    @?: ?C ?D ? ? ? ?" WF >???#: # # # @?#J-RY - - - - - -r'  rY     )maxsize
model_archr5  r6  c                     ddl m} |                    |            	 |                                S # t          $ r t
                              d|            Y d S w xY w)Nr   )current_platformz(Error in loading model architecture '%s')vllm.platformsr  verify_model_archrN  rs  rq  r  )r  r5  r  s      r%  _try_load_model_clsr    sz    
 0/////&&z222##%%%   CZPPPtts   1 %AAc                     	 |                                 S # t          $ r t                              d|            Y d S w xY w)Nz+Error in inspecting model architecture '%s')rK  rs  rq  r  )r  r5  s     r%  _try_inspect_model_clsr    sR    
&&(((   F
SSStts    %??c                   \   e Zd ZU  ee          Zeeef         ed<   de	e         fdZ
dedeej                 ez  ddfdZd	ee         fd
Zdedeej                 dz  fdZdededz  fdZdedededz  fdZdededefdZd	eee         z  dedeeef         fdZd	eee         z  dedeeej                 ef         fdZd	eee         z  dedefdZd	eee         z  dedefdZd	eee         z  dedefdZd	eee         z  dedefdZd	eee         z  dedefdZd	eee         z  dedefdZ d	eee         z  dedefdZ!d	eee         z  dedefdZ"d	eee         z  dedefdZ#d	eee         z  dedefdZ$d	eee         z  dedefdZ%d	eee         z  dedefdZ&dS )_ModelRegistry)default_factorymodelsr6  c                 4    | j                                         S rF  )r  keysrI  s    r%  get_supported_archsz"_ModelRegistry.get_supported_archs  s    {!!!r'  r  rR  Nc                 V   t          |t                    s!dt          |           }t          |          || j        v rt
                              d||           t          |t                    rC|                    d          }t          |          dk    rd}t          |          t          | }nkt          |t                    r5t          |t          j                  rt                              |          }n!dt          |           }t          |          || j        |<   dS )a  
        Register an external model to be used in vLLM.

        `model_cls` can be either:

        - A [`torch.nn.Module`][] class directly referencing the model.
        - A string in the format `<module>:<class>` which can be used to
          lazily import the model. This is useful to avoid initializing CUDA
          when importing the model and thus the related error
          `RuntimeError: Cannot re-initialize CUDA in forked subprocess`.
        z'`model_arch` should be a string, not a z_Model architecture %s is already registered, and will be overwritten by the new model class %s.:r}  z2Expected a string in the format `<module>:<class>`z=`model_cls` should be a string or PyTorch model class, not a N)
isinstancer<  r@  	TypeErrorr  rq  warningr  len
ValueErrorrY  
issubclassrA  rB  rP  r9  )rJ  r  rR  r  	split_strr5  s         r%  register_modelz_ModelRegistry.register_model  s1     *c** 	!ND<L<LNNCC.. $$NN9	   i%% 	!!,,I9~~""J oo%()4EE	4(( 	!Z	29-M-M 	!$33I>>EE,j)), ,  C.. "'Jr'  architecturesc                    |                                  t          fd|D                       rt          d| d          |D ].}|t          v r#t          |         }t          d| d| d          /t          d| d           )Nc              3       K   | ]}|v V  	d S rF  r   )r!  r$  all_supported_archss     r%  	<genexpr>z8_ModelRegistry._raise_for_unsupported.<locals>.<genexpr>H  s)      EEtt**EEEEEEr'  zModel architectures z@ failed to be inspected. Please check the logs for more details.zModel architecture z was supported in vLLM until vzo, and is not supported anymore. Please use an older version of vLLM if you want to use this model architecture.z5 are not supported for now. Supported architectures: )r  anyr  _PREVIOUSLY_SUPPORTED_MODELS)rJ  r  r$  previous_versionr  s       @r%  _raise_for_unsupportedz%_ModelRegistry._raise_for_unsupportedE  s    "6688EEEE}EEEEE 	K} K K K  
 " 		 		D333#?#E  3$ 3 3(3 3 3   4 >= > >(;> >
 
 	
r'  c                 N    || j         vrd S t          || j         |                   S rF  )r  r  rJ  r  s     r%  r  z"_ModelRegistry._try_load_model_cls^  s+    T[((4":t{:/FGGGr'  c                 N    || j         vrd S t          || j         |                   S rF  )r  r  r  s     r%  r  z%_ModelRegistry._try_inspect_model_clsd  s+    T[((4%j$+j2IJJJr'  r0  model_configc           	         |t           v r|S t          |j        dd           pt                      }dD ]T}|                                D ]=\  }}|                    |          r#t          ||j        |j        |j	        d           >Ut          t          |d           }|v|                                D ]A\  }}|                    d          r't          ||j        |j        |j	        d          }| n!B|j        dk    rd S t          d|d	          |                                s |j        dk    rd S t          d
|d          |                                S )Nauto_map)
AutoConfig	AutoModelF)revisiontrust_remote_codewarn_on_failr  Tr  zCannot find model module. z is not a registered model in the Transformers library (only relevant if the model is meant to be in Transformers) and 'AutoModel' is not present in the model config's 'auto_map' (relevant if the model is custom).z#The Transformers implementation of z is not compatible with vLLM.)_TRANSFORMERS_BACKEND_MODELSr  	hf_configdictitems
startswithr   r5  r  r  r  
model_implr  is_backend_compatible_get_transformers_backend_cls)rJ  r0  r  r  prefixnamemodulemodel_modules           r%  _try_resolve_transformersz(_ModelRegistry._try_resolve_transformersj  s   
 777 L*J==G 	 2 		 		F ( 0 0  f??6** 5$*!-!6*6*H%*    |\4@@ ( 0 0  f??;// 	#D$*!-!6*6*H%)$ $ $L $/*n<<4 D D D D   1133 	&.88t/l / / /  
 99;;;r'  c           	          || j         v r|S t          |t          |dd           t          |dd                     }|r<|\  }}t                      D ](\  }}|                    ||          }|| j         v r|c S )|S )Nrunner_typeconvert_type)r  r  )r  r   r  r   rc  )rJ  r0  r  matchsuffix_repl_suffix	base_archs           r%  _normalize_archz_ModelRegistry._normalize_arch  s    
 4;&& 0mTBB ~tDD
 
 

  	%IFA #=">"> % %Q(00EE	++$$$$ , r'  c                 h    t          |t                    r|g}|st          d          |j        dk    r:                     |d         |          }|                     |          }|||fS n$|j        dk    r                     d          }|dfS t           fd|D                       rY|j        dk    rNt          |dd	          d	k    r9                     |d         |          }|                     |          }|||fS |D ]5}                     ||          }                     |          }|||fc S 6t           fd
|D                       rD|j        dk    r9                     |d         |          }|                     |          }|||fS  	                    |          S )N$No model architectures are specifiedr  r   rA  rB  c              3   *   K   | ]}|j         vV  d S rF  r  r!  r$  rJ  s     r%  r  z3_ModelRegistry.inspect_model_cls.<locals>.<genexpr>  *      BBDDK'BBBBBBr'  autor  nonec              3   *   K   | ]}|j         vV  d S rF  r  r  s     r%  r  z3_ModelRegistry.inspect_model_cls.<locals>.<genexpr>  r  r'  )
r  r<  r  r  r  r  allr  r  r  )rJ  r  r  r$  
model_infonormalized_archs   `     r%  rK  z _ModelRegistry.inspect_model_cls  s'   
 mS)) 	,*OM 	ECDDD "n4411-2BLQQD!88>>
)&--$4444\BBJ-- BBBBMBBBBB		.'611nf==GG11-2BLQQD!88>>
)&--! 	* 	*D"224FFO44_EEJ%"D)))) &
 BBBBMBBBBB	.'61111-2BLQQD!88>>
)&--**=999r'  c                 p    t          |t                    r|g}|st          d          |j        dk    r:                     |d         |          }|                     |          }|||fS n(|j        dk    rd}                     |          }|||fS t           fd|D                       rY|j        dk    rNt          |dd	          d	k    r9                     |d         |          }|                     |          }|||fS |D ]5}                     ||          }                     |          }|||fc S 6t           fd
|D                       rD|j        dk    r9                     |d         |          }|                     |          }|||fS  	                    |          S )Nr  r  r   rA  rB  c              3   *   K   | ]}|j         vV  d S rF  r  r  s     r%  r  z3_ModelRegistry.resolve_model_cls.<locals>.<genexpr>  r  r'  r  r  r  c              3   *   K   | ]}|j         vV  d S rF  r  r  s     r%  r  z3_ModelRegistry.resolve_model_cls.<locals>.<genexpr>'  r  r'  )
r  r<  r  r  r  r  r  r  r  r  )rJ  r  r  r$  rR  r  s   `     r%  resolve_model_clsz _ModelRegistry.resolve_model_cls  s2   
 mS)) 	,*OM 	ECDDD "n4411-2BLQQD 44T::	(%t,,$44D0066I$!4(( BBBBMBBBBB		-'611nf==GG11-2BLQQD 44T::	(%t,,! 	) 	)D"224FFO00AAI$!4(((( %
 BBBBMBBBBB	-'61111-2BLQQD 44T::	(%t,,**=999r'  c                 B    |                      ||          \  }}|j        S rF  )rK  r+   rJ  r  r  rR  r  s        r%  r+   z'_ModelRegistry.is_text_generation_model2  s&    
 --m\JJ	111r'  c                 B    |                      ||          \  }}|j        S rF  )rK  r*   r  s        r%  r*   z_ModelRegistry.is_pooling_model:  s&    
 --m\JJ	1))r'  c                 B    |                      ||          \  }}|j        S rF  )rK  r    r  s        r%  is_cross_encoder_modelz%_ModelRegistry.is_cross_encoder_modelB  s&    
 --m\JJ	100r'  c                 B    |                      ||          \  }}|j        S rF  )rK  r"   r  s        r%  is_multimodal_modelz"_ModelRegistry.is_multimodal_modelJ  s&    
 --m\JJ	1,,r'  c                 B    |                      ||          \  }}|j        S rF  )rK  r$   r  s        r%  "is_multimodal_raw_input_only_modelz1_ModelRegistry.is_multimodal_raw_input_only_modelR  s&    
 --m\JJ	1;;r'  c                 B    |                      ||          \  }}|j        S rF  )rK  r%   r  s        r%  is_pp_supported_modelz$_ModelRegistry.is_pp_supported_modelZ  s&    
 --m\JJ	1$$r'  c                 B    |                      ||          \  }}|j        S rF  )rK  r   r  s        r%  model_has_inner_statez$_ModelRegistry.model_has_inner_stateb  s&    
 --m\JJ	1((r'  c                 B    |                      ||          \  }}|j        S rF  )rK  r   r  s        r%  is_attention_free_modelz&_ModelRegistry.is_attention_free_modelj  s&    
 --m\JJ	1**r'  c                 B    |                      ||          \  }}|j        S rF  )rK  r   r  s        r%  is_hybrid_modelz_ModelRegistry.is_hybrid_modelr  &    
 --m\JJ	1""r'  c                 B    |                      ||          \  }}|j        S rF  )rK  r   r  s        r%  is_noops_modelz_ModelRegistry.is_noops_modelz  r   r'  c                 B    |                      ||          \  }}|j        S rF  )rK  r&   r  s        r%  is_transcription_modelz%_ModelRegistry.is_transcription_model  s&    
 --m\JJ	1//r'  c                 B    |                      ||          \  }}|j        S rF  )rK  r4  r  s        r%  is_transcription_only_modelz*_ModelRegistry.is_transcription_only_model  s&    
 --m\JJ	144r'  )'r8  r:  r;  r	   r  r  r<  rD  r=  r   r  r@  rA  rB  r  listr  r  r/  r  r   r  r  tuplerK  r  r>  r+   r*   r  r  r  r  r  r  r  r  r  r  r   r'  r%  r  r    s         /4eD.I.I.IFD**+III"SX " " " ",(,( 	?S(,( 
	,( ,( ,( ,(\
DI 
 
 
 
2Hc Hd29o6L H H H HK Kd9J K K K KB<B< "B< 
t	B< B< B< B<H " 
	   42:T#Y2: "2: 
z3		2: 2: 2: 2:h4:T#Y4: "4: 
tBI#	$	4: 4: 4: 4:l2T#Y2 "2 
	2 2 2 2*T#Y* "* 
	* * * *1T#Y1 "1 
	1 1 1 1-T#Y- "- 
	- - - -<T#Y< "< 
	< < < <%T#Y% "% 
	% % % %)T#Y) ") 
	) ) ) )+T#Y+ "+ 
	+ + + +#T#Y# "# 
	# # # ##T#Y# "# 
	# # # #0T#Y0 "0 
	0 0 0 05T#Y5 "5 
	5 5 5 5 5 5r'  r  c                 D    i | ]\  }\  }}|t          d | |          S )zvllm.model_executor.models.)rZ  r[  )rY  )r!  r  mod_relnamerd  s       r%  r&  r&    sS       
 0J/h	 	(CkCC
 
 
  r'  _Tfnc                 B   t          j                    5 }t          j                            |d          }dd l}|                    | |f          }t          j        t          |d          }	 |
                                 n<# t          $ r/}t          d|j                                                   |d }~ww xY wt          |d          5 }t!          j        |          cd d d            cd d d            S # 1 swxY w Y   	 d d d            d S # 1 swxY w Y   d S )Nzregistry_output.tmpr   T)inputcapture_outputzError raised in subprocess:
r  )tempfileTemporaryDirectoryospathjoincloudpickledumps
subprocessrun_SUBPROCESS_COMMANDcheck_returncoders  RuntimeErrorstderrdecoderm  picklero  )r  tempdiroutput_filepathr  input_bytesreturneder  s           r%  r  r    s    
	$	&	& "'',,w0EFF 	!''_(=>> >{4
 
 

	%%'''' 	 	 	J0F0F0H0HJJ 	 /4(( 	"A;q>>	" 	" 	" 	" 	" 	" 	"/" " " " " " " ".	" 	" 	" 	" 	" 	" 	" 	" 	"/" " " " " " " " " " " " " " " " " "sZ   AD-BD
B;*B66B;;DC;"D;C?	?DC?	DDDc                  J   ddl m}   |              t          j        t          j        j                                                  \  }} |            }t          |d          5 }|	                    t          j
        |                     d d d            d S # 1 swxY w Y   d S )Nr   )load_general_pluginswb)vllm.pluginsr%  r  loadssysstdinbufferr  rm  writer  )r%  r  output_fileresultr  s        r%  _runr/    s    111111l39#3#8#8#:#:;;OBRTTF	k4	 	  &A	V$$%%%& & & & & & & & & & & & & & & & & &s   #(BBB__main__)r6  N)_rW  r  rn  r  r  r  r)  r  abcr   r   collections.abcr   r   dataclassesr   r   r	   	functoolsr
   pathlibr   typingr   r   r   torch.nnrA  r  vllmr   vllm.configr   r   r   vllm.loggerr   vllm.logging_utilsr   &vllm.transformers_utils.dynamic_moduler   vllm.utils.hashingr   vllm.config.modelr   vllm.config.poolerr   r   rQ  r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   interfaces_baser'   r(   r)   r*   r+   r8  rq  _TEXT_GENERATION_MODELSr  _EMBEDDING_MODELS_CROSS_ENCODER_MODELS_MULTIMODAL_MODELS_SPECULATIVE_DECODING_MODELS_TRANSFORMERS_SUPPORTED_MODELSr  _VLLM_MODELS
executabler  r  r/  rD  rP  rY  r<  r@  rB  r  r  r  ModelRegistryr  r  r/  r   r'  r%  <module>rJ     s`   
      				      



  # # # # # # # # ) ) ) ) ) ) ) ) 0 0 0 0 0 0 0 0 0 0             . . . . . . . . . .                         
 $ # # # # # & & & & & & T T T T T T ( ( ( ( ( ( ------HHHHHHHHHK                                         
X		C5C ;C 0	C
 6C 5C 8C >C >C EC IC 5C 5C  5!C" &'H#C$ :%C& ;'C( 3)C C* 2+C, >-C. A/C0 E1C2 E3C4 F5C6 57C8 =9C: I;C< 8=C> ;?C@ BACB  NCCD 8ECF ;GCH ?ICJ ?KC C CL 5MCN 8OCP 8QCR ;SCT BUCV /WCX 2YCZ <[C\ I]C^ 9_C` 2aCb EcCd 3eCf <gCh ;iCj DkCl "#VmC C Cn "#VoCp "qCr 9sCt 4uCv HwCx  !LyCz J{C| 8}C~ AC@ HACB 9CCD ;ECF !"TGCH 2ICJ 5KCL 5MCN EOC C CP 2QCR <SCT 5UCV 8WCZ 5[C\ K]C^ 5_C` 8aCb ;cCd >eCf IgCh  OiCj KkCl BmCn ;oCp  OqCr ;sC C Cv /wCx /yCz 2{C| I}C~ >C@ BACB 2CCD 5ECF 5GCH 5ICJ /KCL 5MCN 2OCP  IQCR  ISCT  IUCV AWC C CX /YCZ 2[C\ 8]C^ 8_C` 8aCb 2cCd 5eCf ?gCh 5iCj ?kCl 4mCn <oCp 5qCr BsCt  CuCv >wCx DyC Cz 6@A;68EC C C N 0&P>20/%N":4G&QC/2 2   6;;==  !2 2, 1-2. 8/20 :122 2324 /526 5728 >92: !"L;2< >=2> 8?2@ @A2B AC2D ;E2F =G2J /K2L ( *M2T 5U2 2V (W5 9.c2 2 2 j &O"H( G4,) )X,+  6m"$Lm - /m ( *m $%Om "#Im $%Om ( *m$ , .%m, J-m. H/m0 <1m2 * ,3m: - /;mB 2CmD %&UEmF & (GmN %&ROm mP 5QmR $%QSmT '(WUmV , .Wm^ 1_m` ( *amh %&Simj :kml Hmmn & (omv ' )wm~ ' )mF ' )GmN $%OOmP &'UQmR &'VSmT #$LUm m mV ( *Wm^  !E_m` %&Samb &'Vcmd ) +eml %&Smmn Iomp %&Sqmr $%Osmt ( *um| - /}mD - /EmL %&QMmN ;OmP * ,QmX (YmZ ([m m m\ ' )]md 5emf %&Rgmh (imj kml %mmn * ,omv ( *wm~ 5m@ 8AmB &'UCmD %&SEmF &'VGmH ) +ImP ) +QmX  Ym` * ,am mh- (W+ A'V'U) 3'U"U. (VYm m m ^ + E  H  K	 
 H  H  !"L   J  $ &  M  7  .   6! " D# $ 5% & B' ( ') * ;6-      < F%	" "   K"P*-
 #Q%V-
.18;!  !  F  	
 # % # ~t-RS  !#$!) $,%-)1%-&.      $1
 1
 1
 1
 1
 1
 1
 1
h" " " " "3 " " " $    +   , $i- i- i- i- i-/ i- i- i-X 3 
")_t    3 $    @5 @5 @5 @5 @5 @5 @5 @5F  
 4@3E3E3G3G    WT]]"8BF+ " " " " "<& & & & zDFFFFF r'  