
    -`i9              
       H   d Z ddlZddlZddlZddlZddlmZ ddlmZ ddlZ	ddl
mZmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ 	 ddlZn# e$ r  ed          ZY nw xY w	 ddededeeee         f         fdZdeeee         f         dee         deeeeef         f         fdZ d Z!dej"        deeef         fdZ#dej$        ddfdZ%dej"        ddfdZ&e'dk    r= ej$        d          Z( e%e(           e()                                Z* e&e*           dS dS )a?  Benchmark multimodal processor latency.

This benchmark measures the latency of the mm processor module
using multimodal prompts from datasets.
MM processor stats are automatically enabled.

Run:
    vllm bench mm-processor \
        --model <your_model> \
        --dataset-name random-mm \
        --num-prompts 10 \
    N)datetime)Any)MultiModalConversationDatasetVisionArenaDataset)get_requests
EngineArgs)#get_timing_stats_from_engine_client)freeze_gc_heap)PlaceholderModulepandas
llm_enginenum_warmup_reqsreturnc                     t          |           }g d}d |D             }t          |                                          |d         }|D ],}|D ]'}||v r!||                             ||                    (-|S )z
    Collect multimodal processor timing stats.
    Returns a dictionary mapping stage names to lists of timing values (in seconds).
    )hf_processor_timehashing_timecache_lookup_timeprompt_update_timepreprocessor_total_timeencoder_forward_timenum_encoder_callsc                     i | ]}|g S  r   ).0keys     p/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/benchmarks/mm_processor.py
<dictcomp>z.collect_mm_processor_stats.<locals>.<dictcomp>>   s    333#c2333    N)r
   listvaluesappend)r   r   	all_stats	stat_keysstats_by_stage
stats_list
stats_dictr   s           r   collect_mm_processor_statsr(   +   s     4J??I  I 43333N i&&(())/*:*:;J  < <
 	< 	<Cj  s#**:c?;;;	< r   r%   selected_percentilesc                    i }|                                  D ]\  }}|sddddd |D             ||<   |dk    }|r|nd |D             t          t          j                            t          t          j                            t          t          j                            dfd|D             ||<   |S )z:
    Calculate aggregate metrics from stats by stage.
            )meanmedianstdc                     i | ]}d | d	S )pr+   r   r   r0   s     r   r   z2calculate_mm_processor_metrics.<locals>.<dictcomp>Z   s     >>>A7q77C>>>r   r   c                     g | ]}|d z  S )  r   )r   ts     r   
<listcomp>z2calculate_mm_processor_metrics.<locals>.<listcomp>_   s    /H/H/HQD/H/H/Hr   c           	      \    i | ](}d | t          t          j        |                    )S )r0   floatnp
percentile)r   r0   r!   s     r   r   z2calculate_mm_processor_metrics.<locals>.<dictcomp>e   s5    VVVAw1wwbmFA6677VVVr   )itemsr8   r9   r,   r-   r.   )r%   r)   metrics
stage_nametimesis_count_metricr!   s         @r   calculate_mm_processor_metricsr@   K   s
    G+1133 
 

E 	# # ?>)=>>>	#GJ $(;;)H/H/H%/H/H/H "'&//**BIf--..((
 
 WVVVAUVVV	

 Nr   c                    t          | dd          s| j        | _        t          | d          sd| _        t          | d          sd| _        t          | d          sd| _        | j        dk    r| j        st          d          | j        dk    rZt          j
                                        t          j
        z  }| j        |vr(t          | j         dt          |                     dS dS )	zE
    Validate command-line arguments for mm_processor benchmark.
    	tokenizerNdataset_path	lora_path	max_lorashfz--dataset-path is required when using --dataset-name hf. For multimodal benchmarking, specify a dataset like 'lmarena-ai/VisionArena-Chat'.zK is not a supported multimodal dataset. Supported multimodal datasets are: )getattrmodelrB   hasattrrC   rD   rE   dataset_name
ValueErrorr   SUPPORTED_DATASET_PATHSkeysr   sorted)argssupported_mm_datasetss     r   validate_argsrQ   k   s*    4d++ $4(( ! 4%% 4%% D  ): -
 
 	

 D  6;;==+CD 	 $999$ V V6<=R6S6SV V   ! 
 :9r   rO   c           	      v  #$% ddl m}m# t          |            | j        d| _        t          j        |           } |d%i t          j        |          %%	                                }t          | |          }t          %fd|D                       s
J d            d |D             }d |D             }#fd|D             }d	 t          | d
d                              d          D             }t                       t          | dd          }	|	dk    rt          d|	 d           t!          j        d%i t%          |           }
|	|
_        |
xj        dz  c_        t          |
|          }d |D             }d |D             }#fd|D             }%                    ||t          | dd                      t          dt+          |           d           t-          j                    }%                    ||t          | dd                     }t-          j                    }||z
  }t1          %j        |	          }t5          |                                          st          d           t9          ||          }t+          d |D                       }t+          |          |z
  }g $|D ]}|j        r|j        |j        }t          |dd          at          |dd          Pt          |dd          ?|j        }tA          d|j!        |j"        z
            }$#                    ||z   dz             $s#|dk    rt          d           ||z  }|dz  g|z  $$rrtI          tK          j&        $                    }tI          tK          j'        $                    }tI          tK          j(        $                    }$fd |D             }nd}d}d}d! |D             }i } d"|v r<|d"         r4|d"         }!tS          tU          |!                    t+          |!          d#} |||||||| d$}"|"S )&z1
    Run the multimodal processor benchmark.
    r   )LLMSamplingParamsNc              3   \   K   | ]&}j         j        j        |j        |j        z   k    V  'd S N)r   model_configmax_model_len
prompt_lenexpected_output_len)r   requestllms     r   	<genexpr>z1benchmark_multimodal_processor.<locals>.<genexpr>   sT          	#1!<<	>     r   zpPlease ensure that max_model_len is greater than the sum of prompt_len and expected_output_len for all requests.c                     g | ]	}|j         
S r   promptr   r[   s     r   r5   z2benchmark_multimodal_processor.<locals>.<listcomp>   s    666'w~666r   c                     g | ]	}|j         
S r   rZ   ra   s     r   r5   z2benchmark_multimodal_processor.<locals>.<listcomp>   s    PPPGG7PPPr   c                 .    g | ]} d d|d          S )   r+   T)ntemperature
max_tokens
detokenizer   r   
output_lenrT   s     r   r5   z2benchmark_multimodal_processor.<locals>.<listcomp>   sH         	!		
 	
 	
  r   c                 ,    g | ]}t          |          S r   r8   r1   s     r   r5   z2benchmark_multimodal_processor.<locals>.<listcomp>   s+       a  r   metric_percentiles99,num_warmupszProcessing z warmup requests...re   c                     g | ]	}|j         
S r   r_   r   reqs     r   r5   z2benchmark_multimodal_processor.<locals>.<listcomp>   s    @@@#*@@@r   c                     g | ]	}|j         
S r   rc   rs   s     r   r5   z2benchmark_multimodal_processor.<locals>.<listcomp>   s    QQQ#c5QQQr   c                 (    g | ]} |           S ))rh   r   rj   s     r   r5   z2benchmark_multimodal_processor.<locals>.<listcomp>   s3     "
 "
 "
6@NNj111"
 "
 "
r   disable_tqdmF)use_tqdmz requests...u   
⚠️  Warning: No MM processor stats found in registry.
   This may indicate that:
   - No multimodal requests were processed
   - Stats were already retrieved (registry is cleared after retrieval)
c                      g | ]}|j         	|S r   )finished)r   os     r   r5   z2benchmark_multimodal_processor.<locals>.<listcomp>   s    66611:6Q666r   first_token_latencylast_token_tsfirst_token_tsr+   r3   u   
⚠️  Warning: Detailed end-to-end latency metrics not available.
   Falling back to average request latency (total_time / num_completed_requests).
c           	      X    g | ]&}|t          t          j        |                    f'S r   r7   )r   r0   
e2el_timess     r   r5   z2benchmark_multimodal_processor.<locals>.<listcomp>  s?     
 
 
9:QbmJ22334
 
 
r   c                     g | ]}|d fS )r+   r   r1   s     r   r5   z2benchmark_multimodal_processor.<locals>.<listcomp>  s    FFFA3xFFFr   r   )total_encoder_callsnum_requests_with_encoder_calls)	completedfailedmean_e2el_msmedian_e2el_msstd_e2el_mspercentiles_e2el_msmm_processor_statsencoder_summaryr   )+vllmrS   rT   rQ   seedr	   from_cli_argsdataclassesasdictget_tokenizerr   allrG   splitr   printargparse	Namespacevarsnum_promptschatlentimeperf_counterr(   r   anyr!   r@   rz   r<   r|   maxr}   r~   r"   r8   r9   r,   r-   r.   intsum)&rO   rS   engine_argsrB   requestspromptsexpected_output_lenssampling_paramsr)   rq   warmup_argswarmup_requestswarmup_promptswarmup_output_lenswarmup_sampling_params
start_timeoutputsend_time
total_timemm_stats_by_stagemm_processor_metricsr   r   outputr<   ttftdecode_timeavg_time_per_requestr   r   r   r   r   encoder_callsbenchmark_resultrT   r   r\   s&                                      @@@r   benchmark_multimodal_processorr      s    )((((((($y	*400K
#
0
0";//
0
0C!!##ID),,H            
	?   76X666GPPxPPP    /  O !$(<dCCII#NN   $q11KQ<K<<<===(664::66"-A&{I>>@@@@@QQQQQ"
 "
 "
 "
DV"
 "
 "
 	" ~u=== 	 	
 	
 	
 

2G
2
2
2333"$$Jhhwt^U/S/S+S   G  ""HJ&J23>;OO ''))** 
X	
 	
 	
 :/  6666677I\\I%FJ ; ; 	&."8. G2D99E$77C!1488D.Dc7#87;Q#QRRKtk1T9::: ?)a--7	
 	
 	

  *I5*T12Y>
 GRWZ0011ry4455BF:..//
 
 
 
>R
 
 
 FF1EFFFO00012 	1 **=>#&s='9'9#:#:/2=/A/A
 
 $("22*	 	 r   parserc                    ddl m}  |j        |            |                     d           |                     dt
          dddgd	           |                     d
t          dd           |                     dt          dd           ddlm}m	}  ||             ||            |                     dt
          dd           |                     dt
          dd           |                     dt
          dd           |                     dt          dd           |                     dt
          dd           |                     dt
          dd           |                     d d!d"#           dS )$z9Add CLI arguments for the multimodal processor benchmark.r   r   T)enable_mm_processor_statsz--dataset-namez	random-mmrF   z=Name of the dataset to benchmark on. Defaults to 'random-mm'.)typedefaultchoiceshelpz--num-prompts
   zNumber of prompts to process.)r   r   r   z--num-warmupsre   z$Number of warmup prompts to process.)add_random_dataset_base_args"add_random_multimodal_dataset_argsz--dataset-pathNzlPath to the dataset file or HuggingFace dataset name (e.g., 'yale-nlp/MMVU', 'lmarena-ai/VisionArena-Chat').z--hf-subsetz-Subset of the HuggingFace dataset (optional).z
--hf-splitzGSplit of the HuggingFace dataset (e.g., 'train', 'test', 'validation').z--output-lenzVOutput length for each request. Overrides the default output lengths from the dataset.z--output-jsonz2Path to save the benchmark results in JSON format.z--metric-percentilesro   zDComma-separated list of percentiles to calculate (e.g., '50,90,99').z--disable-tqdm
store_truezDisable tqdm progress bar.)actionr   )
vllm.engine.arg_utilsr	   add_cli_argsset_defaultsadd_argumentstrr   vllm.benchmarks.datasetsr   r   )r   r	   r   r   s       r   r   r   &  sK   000000JF###
$777
d#L     ,	     3	           
 ! (((&&v... B	     <	     V	     A	     A	     S	     )      r   c                 b   t          d           t          |           }t          d           t          d           t          d           d|v r=t          d           d t          | dd	                              d
          D             }g }|d                                         D ]n\  }}|dk    }|rdnd}||z   |d         d|d         d|d         dd}|D ]#|                    d d          d|d <   $|                    |           ot          j        |          }	t          |		                    d                     d|v r:|d         r2|d         d         }
|d         d         }t          d|
 d| d           d|v rt          d           d  t          | dd	                              d
          D             }d!|d         dd"d#|d$         dd"d%|d&         dd"g}|D ]At          fd'|d(         D             d          }|                    d |dd"           Bt          j        |          }t          |	                    d                     | j        r| j        | j        t          | d)d*          t          | d+d*          d,|d-<   t          j                                                    |d.<   t#          | j        d/          5 }t%          j        ||d01           d*d*d*           n# 1 swxY w Y   t          d2| j                    d*S d*S )3z8Main entry point for the multimodal processor benchmark.z*Starting multimodal processor benchmark...zQ
================================================================================z&Multimodal Processor Benchmark ResultszP================================================================================r   z
MM Processor Metrics:c                 ,    g | ]}t          |          S r   rm   r1   s     r   r5   zmain.<locals>.<listcomp>  +      
  
  
E!HH 
  
  
r   rn   ro   rp   r    z (ms)r,   z.2fr-   r.   )StageMeanMedianStdr0   r+   PF)indexr   r   r   z

Summary: z total encoder calls across z
 requests.r   z
End-to-End Latency (ms):c                 ,    g | ]}t          |          S r   rm   r1   s     r   r5   zmain.<locals>.<listcomp>  r   r   r   )Metricz
Value (ms)r   r   r   r   c              3   .   K   | ]\  }}|k    |V  d S rV   r   )r   pctvalr0   s      r   r]   zmain.<locals>.<genexpr>  s+      OOccQhhhhhhOOr   r   random_input_lenNrandom_output_len)rH   r   	input_lenrk   config	timestampw   )indentz
Results saved to )r   r   rG   r   r;   getr"   pd	DataFrame	to_stringnextoutput_jsonrH   r   r   now	isoformatopenjsondump)rO   resultr)   mm_datastager<   is_countunitrowmm_dftotal_callsnum_requests	e2el_datapercentile_valuee2el_dffr0   s                   @r   mainr   y  sc    

6777+D11F	/	
2333	(OOOv%%'((( 
  
%d,@$GGMMcRR 
  
  
 $%9:@@BB 	  	 NE7 33H!.22wD "6?00$X.44!%...	 C * B B")++g!ggs";";AAGGGNN3W%%eooEo**+++&&62C+D& !234IJK!"345VWL3k 3 3&3 3 3  
 *+++ 
  
%d,@$GGMMcRR 
  
  

 ~0F-L-LMM&9I2J/P/PQQvm/D,J,JKK
	 & 
	 
	A#OOOOV,A%BOOO    %!gg%5";";     ,y))ge,,--- 8Z+ '94@@!$(;TBB	
 
x 'lnn6688{$"C(( 	+AIfa****	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+6D$466777778 8s   'LLL__main__zBenchmark mm processor latency)description)r   )+__doc__r   r   r   r   r   typingr   numpyr9   r   r   r   vllm.benchmarks.throughputr   r   r	   "vllm.multimodal.processing.contextr
   vllm.utils.gc_utilsr   vllm.utils.import_utilsr   r   r   ImportErrorr   dictr   r    r8   r(   r@   rQ   r   r   ArgumentParserr   r   __name__r   
parse_argsrO   r   r   r   <module>r     s                                  4 3 3 3 3 3 , , , , , ,      / . . . . . 5 5 5 5 5 5% % % %		8	$	$BBB%   
#tE{
   @d5k)*u+ 
#tCJ
    @  >Y

Y	#s(^Y Y Y YxP0 PT P P P PfO8x! O8d O8 O8 O8 O8d z$X$1QRRRFLDDJJJJJ	 s   
A A"!A"