
    -`i                        d Z ddlZddlZddlZddlZddlZddlZddlZddlm	Z	 ddl
Z
ddlZddlmZ ddlmZmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddl m!Z!m"Z" ddl#m$Z$m%Z% dd	l&m'Z'm(Z( dd
l)m*Z* ddl+m,Z, ddl-m.Z. ddl/m0Z0m1Z1 ddl2m3Z3 	 d(de4e         de5de%de6de6de7e8e4e,         dz  f         fdZ9	 d(de4e         de5de%de6de6de7e8e4e,         f         fdZ:	 	 d)de4e         de5de$de6de6de6de8fdZ;	 d(de4e         de<de0de5de5de6de6de8fdZ=dej>        d e?e<e	f         ddfd!Z@d" ZAd# ZBd$ ZCd%ejD        fd&ZEdej>        fd'ZFdS )*z'Benchmark offline inference throughput.    N)Any)tqdm)AutoModelForCausalLMPreTrainedTokenizerBase)AIMODatasetBurstGPTDatasetConversationDatasetInstructCoderDatasetMultiModalConversationDatasetPrefixRepetitionRandomDatasetRandomDatasetRandomDatasetForRerankingRandomMultiModalDatasetSampleRequestShareGPTDatasetSonnetDatasetVisionArenaDatasetadd_random_dataset_base_args"add_random_multimodal_dataset_args)#convert_to_pytorch_benchmark_formatwrite_to_json)AsyncEngineArgs
EngineArgs)
TextPromptTokensPrompt)LoRARequest)RequestOutput)BeamSearchParams)TokenizerLikeget_tokenizer)merge_async_iteratorsFrequestsnengine_args
do_profiledisable_detokenizereturnc                 h   ddl m}m}  |di t          j        |          t          fd| D                       s
J d            g }g }| D ]}	d|	j        v rt          |	j        d                   nt          |	j                  }
|	j	        r&t          |	j	        t                    sJ |	j	        |
d<   |                    |
           |                     ||d	d	d
|	j        |                      d }|j        rd | D             }d}d }|slt          j                    }|r                                                     |||d
          }|r                                 t          j                    }n|
J d            d | D             }| d         j        }| D ]}	|	j        |k    sJ t          j                    }|r                                                     |t+          ||d
                     |r                                 t          j                    }||z
  |fS )Nr   LLMSamplingParamsc              3   \   K   | ]&}j         j        j        |j        |j        z   k    V  'd S N
llm_enginemodel_configmax_model_len
prompt_lenexpected_output_len.0requestllms     n/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/benchmarks/throughput.py	<genexpr>zrun_vllm.<locals>.<genexpr>8   T          	#1!<<	>         pPlease ensure that max_model_len is greater than the sum of prompt_len and expected_output_len for all requests.prompt_token_idsr=   promptmulti_modal_data      ?Tr#   temperaturetop_p
ignore_eos
max_tokens
detokenizec                     g | ]	}|j         
S  )lora_requestr5   r6   s     r8   
<listcomp>zrun_vllm.<locals>.<listcomp>Z   s    FFF'-FFFr;   F)rK   use_tqdmz$BeamSearch API does not support LoRAc                     g | ]	}|j         
S rJ   r?   rL   s     r8   rM   zrun_vllm.<locals>.<listcomp>k   s    :::g7>:::r;   )
beam_widthrG   rF   rJ   )vllmr*   r+   dataclassesasdictallr@   r   r   rA   
isinstancedictappendr3   enable_loratimeperf_counterstart_profilegeneratestop_profilebeam_searchr   )r"   r#   r$   r%   r&   r*   r+   promptssampling_paramsr6   r@   lora_requestsuse_beam_searchoutputsstartend
output_lenr7   s                    @r8   run_vllmrg   .   s    )(((((((
#
0
0";//
0
0C            
	@   02G,.O 
 
 "W^33 '.9K*LMMMM7>222 	
 # 	Bg6=====)0)AF%&vN"611  		
 		
 		
 		
 /3M GFFXFFFOG "!## 	 ,,_=4  
 
  	!!$$&L$$$:::::a[4
 	= 	=G.*<<<<<!## 	 %  	
 	
 	
  	!!;r;   c                    ddl m}m}  |d	i t          j        |          t          fd| D                       s
J d            g }g }| D ]F}	|                    |	j                   |                     ||ddd|	j        |                      Gt          j
                    }
|r                                                     ||d          }|r                                 t          j
                    }||
z
  |fS )
z
    Run vLLM chat benchmark. This function is recommended ONLY for benchmarking
    multimodal models as it properly handles multimodal inputs and chat
    formatting. For non-multimodal models, use run_vllm() instead.
    r   r)   c              3   \   K   | ]&}j         j        j        |j        |j        z   k    V  'd S r-   r.   r4   s     r8   r9   z run_vllm_chat.<locals>.<genexpr>   r:   r;   r<   rB   TrC   )rN   rJ   )rQ   r*   r+   rR   rS   rT   rW   r@   r3   rY   rZ   r[   chatr]   )r"   r#   r$   r%   r&   r*   r+   r_   r`   r6   rd   rc   re   r7   s                @r8   run_vllm_chatrk      s    )(((((((
#
0
0";//
0
0C            
	?   G,.O 
 
w~&&&N"611  		
 		
 		
 		
 E hhw$h??G 



C;r;    disable_frontend_multiprocessingc                 *  K   ddl m} ddlm}  |||          4 d {V 	 }|j        t          fd| D                       s
J d            g }	g }
g }| D ]}d|j        v rt          |j        d                   nt          |j        	          }|j	        r&t          |j	        t                    sJ |j	        |d
<   |
                     ||ddd|j        |                      |	                    |           |                    |j                   g }t          j                    }|r|                                 d {V  t%          t'          |	|
|                    D ]:\  }\  }}}|                    |||d|           }|                    |           ;t+          | }|2 3 d {V \  }}6 |r|                                 d {V  t          j                    }||z
  cd d d           d {V  S # 1 d {V swxY w Y   d S )Nr   )r+   )*build_async_engine_client_from_engine_args)rl   c              3   H   K   | ]}j         |j        |j        z   k    V  d S r-   )r1   r2   r3   )r5   r6   r0   s     r8   r9   z!run_vllm_async.<locals>.<genexpr>   sP       
 
  &"W%@@B
 
 
 
 
 
r;   r<   r=   r>   r?   rA   rB   TrC   test)rK   
request_id)rQ   r+   "vllm.entrypoints.openai.api_serverrn   r0   rT   r@   r   r   rA   rU   rV   rW   r3   rK   rY   rZ   r[   	enumeratezipr\   r!   r]   )r"   r#   r$   r%   rl   r&   r+   rn   r7   r_   r`   ra   r6   r@   
generatorsrd   isplr	generatorall_gensresre   r0   s                          @r8   run_vllm_asyncr|      s      $#####      :9)I   9 9 9 9 9 9 9 9 
' 
 
 
 
 $
 
 
 
 
 	
 	

D	
 	
 
 460224 	7 	7G &77 gn=O.PQQQQw~666  ' F!'":DAAAAA-4-E)*"" ##&:#55  	 	 	 NN6"""  !56666
!## 	&##%%%%%%%%%#,-88$
 $
 	) 	)AB VRbZTUZZXXIi(((((*5$ 	 	 	 	 	 	 	&!S % 	%""$$$$$$$$$!!U{s9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9s   FH0F;69H
HHmodel	tokenizermax_batch_sizetrust_remote_codec           
      Z   t          |t                    s
J d            t          j        |t          j        |          }|j        j        dk    r|j        |_	        |
                                }t          t          |                     }t          j                    }	g }
d}d}t          t          |                     D ]P}| |         j        }| |         j        }| |         j        }|
                    |           t)          ||          }t)          ||          }t          |
          |k     r\|t          |           dz
  k    rF| |dz            j        }| |dz            j        }t)          ||          t)          ||          z   dk    r ||
dd	
          j        }|                    |
                                d	|ddd	|          }|s|                    |d	           |                    t          |
                     g }
d}d}Rt          j                    }||	z
  S )Nz*the hf backend only supports HF tokenizers)dtyper   llama)totalr      i   ptT)return_tensorspaddingrB   )	input_ids	do_samplenum_return_sequencesrD   rE   	use_cachemax_new_tokens)skip_special_tokens)rU   r   r   from_pretrainedtorchfloat16config
model_type	eos_token	pad_tokencudar   lenrY   rZ   ranger@   r2   r3   rW   maxr   r\   batch_decodeupdate)r"   r}   r~   r#   r   r   r&   r7   pbarrd   batchmax_prompt_lenmax_output_lenrv   r@   r2   rf   next_prompt_lennext_output_lenr   llm_outputsre   s                         r8   run_hfr      sM    i!899  4 9 
.U]6G  C z'''1	
((**Cc(mm$$$DEENN3x==!! & &!#a[+
a[4
V^Z88^Z88u::&&1H0A+A+A&q1uo8O&q1uoAONO44no667 
  IeD$GGGQ	llnn&&!") # 
 
 " 	J"";D"IIICJJ 



C;r;   argsresultsc                     t          | d         gd         gdfddD                       }|r?t          j                            | j                  d          d}t          ||           d S d S )	Nrequests_per_secondtokens_per_second)r   r   c                 "    i | ]}||         S rJ   rJ   )r5   kr   s     r8   
<dictcomp>z4save_to_pytorch_benchmark_format.<locals>.<dictcomp>H  s,     
 
 
Awqz
 
 
r;   )elapsed_timenum_requeststotal_num_tokens)r   metrics
extra_infor   z.pytorch.json)r   ospathsplitextoutput_jsonr   )r   r   
pt_recordspt_files    `  r8    save_to_pytorch_benchmark_formatr   ?  s     5$+,A$B#C")*=">!?
 

 
 
 
#W
 
 
	 	 	J  +W%%d&677:IIIgz*****+ +r;   c                    | j         | j        d}|| j        | j        | j        d}| j        dk    s| j         y| j        dvrp| j        |d<   t          | dd           }||n| j        |d<   t          | dd           }||n| j	        |d	<   t          | d
d           }||n| j
        |d<   t          }nx| j        dk    r*t          }| j        dk    rd|d<   | j
        
| j
        |d<   nC| j        dk    rR|j        s|j        s
J d            t           }| j        |d<   d|d<   | j	        
| j	        |d	<   | j
        
| j
        |d<   n| j        dk    r	t"          }n| j        dk    r| j
        
| j
        |d<   | j         t$          j        v rt$          }d |d<   d|d<   d|d<   n| j         t(          j        v rt(          }d|d<   nj| j         t*          j        v r"t*          }| j        |d<   | j        |d<   d|d<   n5| j         t0          j        v r"t0          }| j        |d<   | j        |d<   d|d<   n | j         t2          j        v rt2          }d |d<   d|d<   n| j        dk    r1t4          }| j        |d<   | j        |d<   | j        |d<   | j        |d<   n| j        dk    rt>          }t          | dd           }||nt          | d	d           |d	<   t          | d
d           }||nt          | dd           |d<   t          | dd           |d<   t          | dd           |d<   t          | d d           |d!<   t          | d"d           |d#<   d|d<   t          | dd           }t          | dd           }||n||d<   | j        |d<   n| j        d$k    rt@          }t          | dd           }||nt          | d	d           |d	<   t          | d
d           }||nt          | dd           |d<   t          | d%d&          |d'<   t          | d(d)           |d*<   | j        |d<   ntC          d+| j                   d, |"                                D             }  |d-i |j#        d-i |}	tI          |	| j%                  }	|	S ).N)dataset_pathrandom_seed)r~   	lora_path	max_lorasr   random>   prefix_repetition	random-mmrandom-rerankrange_ratiorandom_prefix_len
prefix_lenrandom_input_len	input_lenrandom_output_lenrf   sharegpt	vllm-chatTenable_multimodal_chatsonnetz;Tokenizer/model must have chat template for sonnet dataset.return_prompt_formattedburstgpthfdataset_subsettraindataset_splitr   
suffix_lennum_prefixesr    random_mm_base_items_per_requestbase_items_per_request"random_mm_num_mm_items_range_rationum_mm_items_range_ratiorandom_mm_limit_mm_per_promptlimit_mm_per_promptrandom_mm_bucket_configbucket_configr   random_batch_sizer   	batchsizeno_rerankerFis_rerankerzUnknown dataset name: c                     i | ]
\  }}|||S r-   rJ   )r5   r   vs      r8   r   z get_requests.<locals>.<dictcomp>  s    MMMdaq}Q}}}r;   rJ   )&r   seedr   r   num_promptsdataset_namerandom_range_ratiogetattrr   r   rf   r   r   backendchat_templatedefault_chat_templater   r   r   SUPPORTED_DATASET_PATHSr
   r   	hf_subsethf_splitr	   r   r   prefix_repetition_prefix_lenprefix_repetition_suffix_lenprefix_repetition_num_prefixesprefix_repetition_output_lenr   r   
ValueErroritemssamplefilter_requests_for_dpdata_parallel_size)
r   r~   common_kwargssample_kwargsr   r   r   dataset_clsr   r"   s
             r8   get_requestsr   R  s    )y M
 ^^(	 M H$$!%XXX'+'>m$#D*=tDD!2!>DO 	l# #4);TBB 0 <$. 	k" $D*=tDD!2!>DO 	l# $		j	(	(%<;&&6:M23?&*./M,'		h	&	&& 	
)*I 	
 	
I	
 	
I $&*ol#37/0>%)-M+&?&*./M,'		j	(	(%		d	"	"?&*./M,' 2 JJJ,K.2M*+-4M/*6:M233"6"NNN.K-4M/**"?"WWW7K.2nM*+-1]M/*6:M233"5"MMM-K.2nM*+-1]M/*6:M233+"EEE%K.2M*+-4M/*		1	1	13&*&Gl#&*&Gl#(,(Kn%&*&Gl##		k	)	)-"4);TBB  + {D11 	k"
 $D*=tDD !, |T22 	l#
 3:4d3
 3
./ 5<65
 5
01 07140
 0
+, *17PRV)W)Wo&26./#D*=tDDT<66
!2!>J 	l# (,'>m$$		o	-	-/"4);TBB  + {D11 	k"
 $D*=tDD !, |T22 	l#
 &-T3F%J%Jk"+24+N+N'Nm$'+'>m$$E$2CEEFFFMMm&9&9&;&;MMMM2{{++]++2CC]CCH%h0GHHHOr;   c                     dk    r| S t          t          j        d                   }t          t          j        d                   }||z  z  fdt          |           D             S )Nr   RANK
WORLD_SIZEc                 ,    g | ]\  }}|z  k    |S rJ   rJ   )r5   rv   rdata_parallel_rankr   s      r8   rM   z*filter_requests_for_dp.<locals>.<listcomp>  s9       Aq!!%777 	
777r;   )intr   environrs   )r"   r   global_rank
world_sizer   s    `  @r8   r   r     s     Qbj())KRZ-..J$7I)IJ    h''   r;   c                    | j         "t          j        dd           | j         | _        t	          | dd          s| j        | _        h d}| j        |vrt          d| j                   | j         sO| j        sH| j	        dvr?t          d	           d
| _	        t	          | dd          }| j        |t          d          | j	        dk    r9t	          | dd          t	          | dd          t          j        dd           n| j	        dk    r| j        t          j                                        t          j        z  t           j        z  v r| j        dk    sJ | j         d            nU| j        t"          j        t$          j        z  v r| j        dk    sJ | j         d            nt          | j         d          | j	        dvr| j        t          j        dd           | j	        dk    r2t	          | dd          !| j        dk    rt          j        dd           | j	        dk    r't	          | dd          rt          j        dd           | j	        dvr| j        t          j        d d           | j	        dv rt	          | dd          }t	          | d!d          }t	          | d"d          }| j        |t          j        d#d           | j        |t          j        d$d           | j        |t          j        d%d           t	          | d&d          r| j        dk    rt          d'          t	          | d&d          r| j        t          d(          | j        dk    r| j        t          d)          | j        dk    r| j        t          d*          | j        d+v r t	          | d,d          t          d-          | j        d.k    r| j        d/k    rt          d0          | j        d.k    r| j        dk    rt          d1          | j        d.k    r| j        | j        k    rt          d2          | j        dk    r!| j        d3k    s| j        rt          d4          dS dS )5z*
    Validate command-line arguments.
    NzzThe '--dataset' argument will be deprecated in the next release. Please use '--dataset-name' and '--dataset-path' instead.   )
stacklevelr~   >   r   miirQ   r   zUnsupported backend: >   r   z?When dataset path is not set, it will default to random datasetr   r   zNEither --input-len or --random-input-len must be provided for a random datasetr   r   r   z\--hf-subset and --hf-split will be ignored                 since --dataset-name is not 'hf'.r   z' needs to use vllm-chat as the backend.rQ   z" needs to use vllm as the backend.z  is not supported by hf dataset.>   r   r   r   z{--random-range-ratio will be ignored since                 --dataset-name is not 'random', 'random-mm', or 'random-rerank'.r   r   r   zd--random-batch-size will be ignored since                     --dataset-name is not 'random-rerank'.r   FzZ--no-reranker will be ignored since                 --dataset-name is not 'random-rerank'.>   Nr   r   r   zu--prefix-len will be ignored since --dataset-name                 is not 'random', 'random-mm', 'sonnet', or not set.r   r   z}Both --input-len and --random-input-len are specified. The random version (--random-input-len) will be preferred in this run.zBoth --output-len and --random-output-len are specified. The random version (--random-output-len) will be preferred in this run.zBoth --prefix-len and --random-prefix-len are specified. The random version (--random-prefix-len) will be preferred in this run.rX   z4LoRA benchmarking is only supported for vLLM backendz3LoRA path must be provided when enable_lora is Truez,HF max batch size is required for HF backendz)HF max batch size is only for HF backend.>   r   r  quantizationz&Quantization is only for vLLM backend.r  autoz#dtype must be auto for MII backend.zn must be 1 for MII backend.z8Tokenizer must be the same as the model for MII backend.external_launcherzData parallel is only supported with external launcher mode with synchronous engine in offline benchmark, please use benchmark serving instead)datasetwarningswarnr   r   r}   r~   r   r   r   printr   r   r   keysr   r	   r
   r   r   r   r   rf   r   hf_max_batch_sizer   r#   r   distributed_executor_backendasync_engine)r   valid_backendsr   r   r   s        r8   validate_argsr    s    |H	
 	
 	
 	

 !L4d++ $ 877N|>))???@@@ L! %:::OPPP$"4);TBB>!&6&>'   D  k4((44T**63	
 	
 	
 	
 	

 
	d	"	"6;;==+CD!9:
 

 <;...$MMM /...  812
 
 <6)))$HHH *)))  1SSSTTT
 	!III#/R	
 	
 	
 	
 	_,,D-t44@

 A
%
%<	
 	
 	
 	
 O++mU0S0S+8	
 	
 	
 	
 	!HHHO'F	
 	
 	
 	
 DDD"4);TBB#D*=tDD#D*=tDD>%*:*FM 	    ?&+<+HM 	    ?&+<+HM 	    t]E** Qt|v/E/EOPPPt]E** Pt~/ENOOO |t 6 >GHHH|t 6 BDEEE 	%%D.$//;ABBB|uv!5!5>???|u17888|u4:!=!=STTT"")-@@@DDU@ 3
 
 	
 #"@@r;   parserc                 r   |                      dt          g dd           |                      dt          g ddd	           |                      d
t          d d           |                      dt          d d           |                      dt          d d           |                      dt          d d           |                      dt          dd           |                      dt          dd           |                      dt          d d           |                      dt          d d           |                      dddd !           |                      d"ddd#!           |                      d$dd%&           |                      d't          d d(           |                      d)t          d*d+           |                      d,t          d d-           |                      d.t          d d/           |                      d0ddd1!           |                      d2t          d d3           |                      d4t          d d5           |                      d6t          d d7           |                      d8t          d d9           t          |            t	          |            t          j        |           } d S ):Nz	--backend)rQ   r   r  r   rQ   )typechoicesdefaultz--dataset-name)r   r   r   r   r   r   r   r   z$Name of the dataset to benchmark on.r   )r  r  helpr  z	--datasetzPath to the ShareGPT dataset, will be deprecated in            the next release. The dataset is expected to be a json in form of list[dict[..., conversations: list[dict[..., value: <prompt_or_response>]]]])r  r  r  z--dataset-pathzPath to the datasetz--input-lenz$Input prompt length for each requestz--output-lenzMOutput length for each request. Overrides the output length from the dataset.z--nr   z)Number of generated sequences per prompt.z--num-promptsi  zNumber of prompts to process.z--hf-max-batch-sizez"Maximum batch size for HF backend.z--output-jsonz3Path to save the throughput results in JSON format.z--async-engine
store_trueFz,Use vLLM async engine rather than LLM class.)actionr  r  z"--disable-frontend-multiprocessingz(Disable decoupled async engine frontend.z--disable-detokenizez[Do not detokenize the response (i.e. do not include detokenization time in the measurement))r  r  z--lora-pathztPath to the lora adapters to use. This can be an absolute path, a relative path, or a Hugging Face model identifier.z--prefix-lenr   zRNumber of fixed prefix tokens before the random context in a request (default: 0).z--hf-subsetzSubset of the HF dataset.z
--hf-splitzSplit of the HF dataset.z	--profilezEUse vLLM Profiling. --profiler-config must be provided on the server.z--prefix-repetition-prefix-lenzMNumber of prefix tokens per request, used only for prefix repetition dataset.z--prefix-repetition-suffix-lenz|Number of suffix tokens per request, used only for prefix repetition dataset. Total input length is prefix_len + suffix_len.z --prefix-repetition-num-prefixesz|Number of prefixes to generate, used only for prefix repetition dataset. Prompts per prefix is num_requests // num_prefixes.z--prefix-repetition-output-lenzMNumber of output tokens per request, used only for prefix repetition dataset.)add_argumentstrr   r   r   r   add_cli_args)r  s    r8   r  r    s   
222	     	
 	
 	
 4      9	     sD7L     3	     *	     C)T     c46U     1	     B	     ;	     ,7	     6	     ?	     -	     (	     '	     T	     (	     (M	     *G	     (	     !(((&v...)&11FFFr;   c           
      :   t          |            | j        d| _        t          j        | j                   | j        dk    s| j        dk    r| j        dk    rd| _        t          | j        | j        | j                  }t          | |          }t          d |D                       }d }| j        dk    r| j
        rNt          j        t          || j        t          j        |           | j        | j        | j                            }nt)          || j        t+          j        |           | j        | j        	          \  }}n| j        dk    rR| j        d
k    sJ | j        rt/          d          t1          || j        || j        | j        | j        | j                  }n[| j        dk    r9t7          || j        t+          j        |           | j        | j        	          \  }}nt9          d| j                   |rfd}d}|D ]Y}t;          |t<                    s||j        rtA          |j                  ndz  }|tC          d |j"        D                       z  }Z||z   }	n7tC          d |D                       }	tC          d |D                       }|	|z
  }|r#| j        dk    rtG          d| j         d           tG          dtA          |          |z  dd|	|z  dd||z  dd           tG          d|            tG          d|            | j$        r~|tA          |          |	tA          |          |z  |	|z  d}
tK          | j$        d          5 }tM          j'        |
|d           d d d            n# 1 swxY w Y   tQ          | |
           d S d S )Nr   r   r  r  )tokenizer_moder   c              3   (   K   | ]}|j         d uV  d S r-   )rA   rL   s     r8   r9   zmain.<locals>.<genexpr>U  s*      VV'1=VVVVVVr;   rQ   )rl   r&   r%   )r&   r%   r   z/Profiling not implemented yet for backend='hf'.r   zUnknown backend: c              3   B   K   | ]}|t          |j                  V  d S r-   )r   	token_ids)r5   os     r8   r9   zmain.<locals>.<genexpr>  s1      &Q&QAq&Qs1;'7'7&Q&Q&Q&Q&Q&Qr;   c              3   4   K   | ]}|j         |j        z   V  d S r-   )r2   r3   r5   r   s     r8   r9   zmain.<locals>.<genexpr>  s,      VVq|a.CCVVVVVVr;   c              3   $   K   | ]}|j         V  d S r-   )r3   r&  s     r8   r9   zmain.<locals>.<genexpr>  s%      !J!JA!"7!J!J!J!J!J!Jr;   z+[91mWARNING[0m: Multi-modal request with z backend detected. The following metrics are not accurate because image tokens are not counted. See vllm-project/vllm/issues/9778 for details.zThroughput: z.2fz requests/s, z total tokens/s, z output tokens/szTotal num prompt tokens:  zTotal num output tokens:  )r   r   r   r   r   w   )indent))r  r   r   r   r   r    r~   r   r   anyr  uvlooprunr|   r#   r   from_cli_argsrl   r&   profilerg   r   tensor_parallel_sizeNotImplementedErrorr   r}   r  rk   r   rU   r   r=   r   sumrc   r  r   openjsondumpr   )r   r~   r"   is_multi_modalrequest_outputsr   total_prompt_tokenstotal_output_tokensror   r   fs               r8   mainr<  C  s   $y	
K	 	 5 5


'
' #*0  I
 D),,HVVXVVVVVN26O|v 	!:F#1$77595Z'+'>#|  	 	LL -5(..#'#:<- - -)L// 
		(A----< 	Y%&WXXXJF""#
 
 
	$	$(5F$T**#6|)
 )
 )
%oo ;T\;;<<< E  ! 	R 	RBb-00 ,.,?FB'(((Q  3&Q&Q&Q&Q&Q#Q#QQ.1DDVVXVVVVV!!J!J!J!J!JJJ.1DD 
$,+55G|G G G	
 	
 	
 
	Ds8}}|3F 	D 	Dl*A	D 	D-C	D 	D 	D  
 

<':
<
<===	
<':
<
<===  
8(MM 0#&x==<#?!1L!@
 
 $"C(( 	,AIgq++++	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	,(w77777
8 
8s   M>>NN)F)FF)G__doc__argparserR   r4  r   r   rY   r  typingr   r   r,  r   transformersr   r   vllm.benchmarks.datasetsr   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   vllm.benchmarks.lib.utilsr   r   vllm.engine.arg_utilsr   r   vllm.inputsr   r   vllm.lora.requestr   vllm.outputsr   vllm.sampling_paramsr   vllm.tokenizersr   r    vllm.utils.async_utilsr!   listr   booltuplefloatrg   rk   r|   r  r   	NamespacerV   r   r   r   r  ArgumentParserr  r<  rJ   r;   r8   <module>rP     s   . -       				                  F F F F F F F F                                 " Y X X X X X X X = = = = = = = = 0 0 0 0 0 0 0 0 ) ) ) ) ) ) & & & & & & 1 1 1 1 1 1 8 8 8 8 8 8 8 8 8 8 8 8 8 8  %P  P =!P 
P  P  	P 
 P  5$}%,,-P  P  P  P p  %.  . =!. 
.  .  	. 
 .  5$}%%&.  .  .  . l .3$F F=!F
F !F 	F
 '+F F F F F F`  %A A=!AA A 	A
 A A A A A A AH+

+'+CH~+	+ + + +&J J JZ  "q
 q
 q
h\20 \2 \2 \2 \2~o8x! o8 o8 o8 o8 o8 o8r;   