
    fPiFU                        d Z ddlZddlZddlZddlZddlmZ ddlmZ ddlZddl	m
Z
 ddlmZ ddlmZmZmZ ddlmZmZmZ dd	lmZ  G d
 d          Zd"dedefdZd"dedefdZ G d d          ZdefdZdefdZ	 d"dej        dej        dz  fdZd Z d Z!e"dk    r e!            Z# e$de#            e#j%        e#j&        dk    rdnde#_%        e#j'        r<ej(        )                                sJ e#j*        dk    rd  e            v sJ d!e#_+        ne#j,        rJ e#j+        rJ e#j,        se#j+        r ee#           dS  e e#           dS dS )#z]
Benchmark performance of SAM2 encoder with ORT or PyTorch. See benchmark_sam2.sh for usage.
    N)Mapping)datetime)SAM2ImageDecoder)SAM2ImageEncoder)decoder_shape_dictencoder_shape_dictload_sam2_model)InferenceSessionSessionOptionsget_available_providers)CudaSessionc            +           e Zd Zddddddddddddej        dddddd	dfd
edededej        dededededededededededededededededef*dZ	d  Z
d!eeee         f         fd"Zd!eeej        f         fd#Zd$S )%
TestConfigimage_encoderCPUExecutionProvidermax-autotune      FT     
model_type	onnx_pathsam2_dirdevice	component
batch_sizeheightwidth
num_labels
num_points	num_masksmulti_mask_outputuse_tf32enable_cuda_graphprefer_nhwcwarm_upenable_nvtx_profileenable_ort_profileenable_torch_profilerepeatsverbosec                    |dv sJ |	dk    r|	dk    sJ |
dk    r|
dk    sJ || _         || _        || _        || _        || _        || _        || _        |	| _        |
| _        || _	        || _
        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        | j        dk    r | j        dk    r| j        dk    sJ d            d S d S )Nsam2_hiera_tinysam2_hiera_smallsam2_hiera_largesam2_hiera_base_plus   i   r   r   z7Only image size 1024x1024 is allowed for image encoder.)r   r   r   r   providertorch_compile_moder   r   r   r   r    r!   r"   r   r#   r$   dtyper%   r&   r'   r(   r)   r*   r+   )selfr   r   r   r   r   r3   r4   r   r   r   r   r    r!   r"   r#   r$   r5   r%   r&   r'   r(   r)   r*   r+   s                            /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/models/sam2/benchmark_sam2.py__init__zTestConfig.__init__   s4   6 ppppp}}4/||-$" " "4$
$$"!2 !2
&#6 "4$8!>_,,;$&&4:+=+=+=?x+=+== -,+=+=    c                 "    t          |            S N)varsr6   s    r7   __repr__zTestConfig.__repr__V   s    t**r9   returnc                     | j         dk    r t          | j        | j        | j                  S t          | j        | j        | j        | j        | j                  S )Nr   )	r   r   r   r   r   r   r   r    r!   r=   s    r7   
shape_dictzTestConfig.shape_dictY   sK    >_,,%dot{DJOOO%dk4:tPTP_aeaopppr9   c                    | j         }| j        dk    r0dt          j        | j        d| j        | j        || j                  iS t          j        dddd|| j                  t          j        ddd	d	|| j                  t          j        dddd|| j                  t          j	        d
d| j
        | j        df|| j                  t          j	        d
d| j
        | j        ft          j        | j                  t          j        | j
        ddd|| j                  t          j        | j
        || j                  t          j        | j        | j        gt          j        | j                  dS )Nr   image   )r5   r   r          @      r   r      )image_features_0image_features_1image_embeddingspoint_coordspoint_labelsinput_maskshas_input_masksoriginal_image_size)r5   r   torchrandnr   r   r   r   randrandintr   r    int32zerosonestensor)r6   r5   s     r7   random_inputszTestConfig.random_inputs_   sm   
>_,,U[!T[$*\ajnjuvvvww %*Jq"c3eTXT_$`$`$`$)Jq"c3eTXT_$`$`$`$)Jq#r2USWS^$_$_$_ %tdotB%X\Xc! ! ! !&q4?DO<EKX\Xc! ! !  %{4?AsCu]a]hiii#(:doUSWS^#_#_#_',|T[$*4MUZU`imit'u'u'u  r9   N)__name__
__module____qualname__rR   float32strr   intboolr8   r>   r   listrA   TensorrZ    r9   r7   r   r      s        )')"'"'m!$)#(%*39y 9y9y 9y 	9y
 9y 9y 9y 9y 9y 9y 9y 9y  9y  !9y"  #9y& '9y( )9y* "+9y, !-9y. #/9y0 19y2 39y 9y 9y 9yv  qGCcN3 q q q qwsEL'89      r9   r   configr?   c                    | j         rt          dt          |                       | j        dk    rt	          | j        t                    rt          j        	                                n| j        j
        }t          j        || j                  }t          | j                  |d<   | j        rd|d<   | j        |fdg}ndg}t#          | j        ||          }|S )Nzcreate session for CUDAExecutionProviderr#   r   r%   r   )	providers)r+   printr<   r3   
isinstancer   r_   rR   cudacurrent_deviceindexr   get_cuda_provider_optionsr$   r`   r#   r%   r
   r   )re   session_options	device_idprovider_optionsrh   ort_sessions         r7   create_ort_sessionrs   t   s    ~ 42DLL223331113=fmS3Q3QjEJ--///W]WdWj	&@FLdee'*6?';';$ 	0./]+o'78:PQ		+,	"6#3_PYZZZKr9   c                     t          | |          }t          || j        | j                  }|                    |                                            |S r;   )rs   r   r   r$   allocate_buffersrA   )re   ro   rr   cuda_sessions       r7   create_sessionrw      sM    $V_==K{FM6;STTL!!&"3"3"5"5666r9   c                   &    e Zd ZdZddefdZd ZdS )OrtTestSessionz;A wrapper of ORT session to test relevance and performance.Nre   c                 b    t          ||          | _        |                                | _        d S r;   )rw   rr   rZ   	feed_dict)r6   re   ro   s      r7   r8   zOrtTestSession.__init__   s+    )&/BB--//r9   c                 @    | j                             | j                  S r;   )rr   inferr{   r=   s    r7   r}   zOrtTestSession.infer   s    %%dn555r9   r;   )r[   r\   r]   __doc__r   r8   r}   rd   r9   r7   ry   ry      sI        EE0 0z 0 0 0 06 6 6 6 6r9   ry   rv   c                     t          j                     }|                     |          }t          j                     }||z
  S r;   )timer}   )rv   
input_dictstart_ends        r7   measure_latencyr      s5    IKKE:&&A
)++C;r9   c                 2   | j         j        }|dk    }|r`t          j                            d          j        dk    r8| j        r1dt          j        j        j        _	        dt          j        j
        _	        |o| j        t          j        k    }|                                 }t          j                    5  t          j        || j        |          5  t!          | j        | j        | j                   }| j        dk    r	|r<| j        dk    r1t          j        |j        j        | j        dd	
          |j        _        |                                 d         }t          j        |                              | j         | j                  }t7          |          }|r#| j        dk    rt9          d| j         d           t;          | j                  D ]}	 ||          \  }
}}|r| j        r{dd l }ddlm!} |"                                 t9          d           |#                    d          5   ||d           d d d            n# 1 swxY w Y   |$                                 |r| j%        rt          j&        '                    t          j&        j(        j)        t          j&        j(        j*        gd          5 }t9          d           t          j&        +                    d          5   ||           d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   t9          |,                                -                    dd                     |.                    d           | j/        dk    r	 d d d            d d d            d S t9          d| j/         d           ta          j0                    }t;          | j/                  D ]1}	 ||          \  }
}}|rt          j        1                                 2n|d         |d         |d         |d         |d          |d!         |d"         |d#         f}te          || j3        $          }|r2| j        dk    r't          j        |j        | j        dd	
          |_        t;          | j                  D ]}	 || \  }}}|r}| j        rvdd l }ddlm!} |"                                 t9          d%           |#                    d          5   ||d&di d d d            n# 1 swxY w Y   |$                                 |r | j%        rt          j&        '                    t          j&        j(        j)        t          j&        j(        j*        gd          5 }t9          d'           t          j&        +                    d(          5   ||  d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   t9          |,                                -                    dd                     |.                    d)           | j/        dk    r	 d d d            d d d            d S t9          d| j/         d           ta          j0                    }t;          | j/                  D ]+}	 || \  }}}|rt          j        1                                 ,ta          j0                    }||z
  | j/        z  cd d d            cd d d            S # 1 swxY w Y   d d d            d S # 1 swxY w Y   d S )*Nrk   r      T)device_typer5   enabled)r   r   noneF)mode	fullgraphdynamicrC   )r   r5   zBRunning warm up. It will take a while since torch compile mode is .cudartz#Start nvtx profiling on encoder ...one_run)r'   )
activitiesrecord_shapesz$Start torch profiling on encoder ...encodercuda_time_total
   )sort_by	row_limitztorch_image_encoder.jsonzStart z runs of performance tests...rJ   rK   rL   rM   rN   rO   rP   rQ   )multimask_outputz"Start nvtx profiling on decoder...r'   z$Start torch profiling on decoder ...decoderztorch_image_decoder.json)4r   typerR   rk   get_device_propertiesmajorr#   backendsmatmul
allow_tf32cudnnr5   r^   rZ   inference_modeautocastr	   r   r   r   r4   compiler   forwardrA   rS   tor   ri   ranger&   r'   nvtxr   cudaProfilerStartannotatecudaProfilerStopr)   profilerprofileProfilerActivityCPUCUDArecord_functionkey_averagestableexport_chrome_tracer*   r   synchronizer   r"   )re   r   is_cudaenabled_auto_cast
ort_inputs
sam2_modelimage_shapeimgsam2_encoderr   _image_features_0_image_features_1_image_embeddingsr   r   profr   torch_inputssam2_decoder_masks_iou_predictions_low_res_masksr   s                          r7   	run_torchr      sV
   -$KV#G  /5:33A66<AAfoA04"-*.'AFLEM$A%%''J				 p. p.Kv|ev!w!w!w p. p.$V_f6GPVP]^^^
.. 64>>38=,42"!	4 4 4
(0 !++--g6K+k**--V]&,-WWC+J77L y64>>w[a[twwwxxx6>** \ \JV,WZJ[J[G!#46G6G *65 *''''''((***;<<<]]9-- @ @ L$????@ @ @ @ @ @ @ @ @ @ @ @ @ @ @''))) 	E66 	E^++ % ? CU^EdEij"& ,   * @AAA77	BB * *$S)))* * * * * * * * * * * * * * ** * * * * * * * * * * * * * * d''))//8IUW/XXYYY(()CDDD~""Wp. p. p. p. p. p. p. p. p. p. p. p. p. p.Z H6>HHHIIIIKKE6>** - -JV,WZJ[J[G!#46G -J**,,,- -.-.-.>*>*=),-01	L ,!'!9  L
  64>>',} (2"!	( ( ($ 6>** W W;G<;V8(.. *65 *''''''((***:;;;]]9-- J J L,IDIIIJ J J J J J J J J J J J J J J''))) 	E66 	E^++ % ? CU^EdEij"& ,   4 @AAA77	BB 4 4$l334 4 4 4 4 4 4 4 4 4 4 4 4 4 44 4 4 4 4 4 4 4 4 4 4 4 4 4 4 d''))//8IUW/XXYYY(()CDDD~""Mp. p. p. p. p. p. p. p. p. p. p. p. p. p.P H6>HHHIIIIKKE6>** - -;G<;V8(. -J**,,,ikkev~-ap. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p. p.s-  3\E[4.I<[4I[4IA,[4</L+L	7LLL
LL[4L[4!L"A[4<\E"[46	T?[4T[4TA,[4?/W.W 	4W WWWW[4W[4WA[49\B
[4\4[8	8\;[8	<\\\args
csv_writerc                 b   | j         }| j        }| j        }|r6t          j                                        }t          j        d|          }d}nd}t          j        d          }d}d}t          j        t          j        t          j	        d}t          d+i d| j        d	| j        d
| j        d| j        d|d| j        d| j        d| j        d|ddd|d|| j                 d| j        d| j        d| j        d| j        d| j        d| j        d| j        dd}	| j        dk    rt5                      }
| j        |
_        |	j        rd|
_        d|
_        d|
_        t?          |	|
          }|	                                 }	 tC          |	j                  D ]}tE          ||          }n-# tF          $ r }tI          d|	d |            Y d }~d S d }~ww xY w|	j        rtdd l%}dd!lm&} |'                                 |(                    d"          5  |)                    |          }d d d            n# 1 swxY w Y   |*                                 |	j        r|j+        ,                                 |dk    rd S g }tC          |          D ]'}tE          ||          }|-                    |           (t]          j/        |          }~n}t          j0                    5  	 tc          |	          }n8# tF          $ r+}tI          d|	d |            Y d }~d d d            d S d }~ww xY w	 d d d            n# 1 swxY w Y   |dk    rd S | j        d#z   |rdndz   }i d| j        d| j        d| j        d$|d|d|	j        d|	j2        d| j        d| j        d| j        d%| j3        d&|	j4        d'|	j5        d(|	j6        d)| j        d|	j        d|| j        | j        ||d*}||7                    |           tI          tq          |	                      tI          |            d S ),Nrk   rg   r   cpuFr   fp32fp16bf16r   r   r   r   r3   r   r   r   r   r#   Tr$   r5   r%   r*   r&   r'   r(   r)   r4   r+   ort   zFailed to run config=z. Exception: r   r   :use_gpur"   r   r    r!   intra_op_num_threads)r'   r4   engineaverage_latencyrd   )9r   use_cuda_graphr*   rR   rk   rl   r   r^   float16bfloat16r   r   r   r   r   r   r   r   r5   r%   r&   r'   r(   r)   r4   r   r   r   enable_profilinglog_severity_levellog_verbosity_levelrw   rZ   r   r   	Exceptionri   r   r   r   r   r}   r   rr   end_profilingappend
statisticsmeanno_gradr   r#   r   r   r    r!   writerowr<   )r   r   r   r$   r*   rp   r   r3   dtypesre   sess_optionssessionr   r   er   r   latency_listlatencyr   r   rows                         r7   run_testr     sJ    LG"1<G *J--//	fi00*	e$$!)mU]ENSSF   ??..  ..	
  ?? {{ jj v  ,+ TZ   $$    !44!"  22#$ "66%&  22'( )F. {e%'',0,E)$ 	1,0L)./L+/0L, 66))++
	6>** 9 9#GZ889 	 	 	<6<<<<===FFFFF	 % 	&KKK######$$&&&y)) . .MM*--. . . . . . . . . . . . . . .##%%%$ 	0--///a<<Fw 	) 	)A%gz::G(((($/,77G]__ 	 	"+F"3"3   @v@@Q@@AAA	 	 	 	 	 	 	 	  	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 a<<F[3G">&&?FdoT^ 	 	7	
 	. 	v) 	FO 	do 	$+ 	 	T2 	f' 	f' 	V% 	 9  	6>!" 	7#$  $7"5*+  C0 C   	T&\\
	S(OOOOOsf   
'F2 2
G<GGH;;H?H?$L;&K65L;6
L+ L&L;&L++L;;L?L?c                 n   | j         rdnd}d                    || j        t          j                                        d                    }t          |dd          5 }g d}t          j        ||	          }|	                                 t          | |           d d d            d S # 1 swxY w Y   d S )
Ngpur   zbenchmark_sam_{}_{}_{}.csvz%Y%m%d-%H%M%Sa )r   newline)r   r   r5   r   r$   r%   r#   r   r   r   r"   r   r    r!   r   r&   r*   r'   r4   r   r   )
fieldnames)r   formatr   r   nowstrftimeopencsv
DictWriterwriteheaderr   )r   featurescsv_filenamecsv_filecolumn_namesr   s         r7   run_perf_testr     s   /uu%H/6600 L
 
lb	1	1	1 #X
 
 
. ^HFFF
   z"""7# # # # # # # # # # # # # # # # # #s   ?B**B.1B.c                  J   t          j        d          } |                     ddddgdd           |                     d	dg d
dd           |                     dddd           |                     d           |                     dddd           |                     d           |                     ddt          g ddd           |                     ddt          dd           |                     ddt          dd            |                     d!dt          dd"           |                     d#dt          d$d%           |                     d&dt          d'd(           |                     d)dt
          d*d*d+gd,-           |                     d.dddd/0           |                     d1dddd20           |                     d3dddd40           |                     d5dddd60           |                     d7dddd80           |                     d9dt
          d:g d;d<-           |                     d=dt
          d>d?           |                     d@dt
          dAdB           |                     dCdt
          d g dDdE-           |                                 }|S )FNz,Benchmark SMA2 for ONNX Runtime and PyTorch.)descriptionz--componentFr   image_decoderzDcomponent to benchmark. Choices are image_encoder and image_decoder.)requiredchoicesdefaulthelpz--dtyper   r   zData type for inference.z	--use_gpu
store_truezUse GPU for inference.)r   actionr  )r   z--use_cuda_graphzUse cuda graph in onnxruntime.)r   z--intra_op_num_threads)r   r   rI   r   r      r   z&intra_op_num_threads for onnxruntime. )r   r   r  r  r  z--batch_sizer   z
batch size)r   r   r  r  z--heightr   zimage heightz--widthzimage widthz	--repeatsr   z8number of repeats for performance test. Default is 1000.z	--warm_upr   z)number of runs for warm up. Default is 5.z--enginer   rR   zengine for inference)r   r   r  r  r  z--multimask_outputz:Export mask_decoder or image_decoder with multimask_output)r   r  r  r  z--prefer_nhwcz;Use prefer_nhwc=1 provider option for CUDAExecutionProviderz--enable_nvtx_profilezVEnable nvtx profiling. It will add an extra run for profiling before performance test.z--enable_ort_profilezEnable ORT profiling.z--enable_torch_profilezYEnable PyTorch profiling. It will add an extra run for profiling before performance test.z--model_typer0   r-   zsam2 model namez
--sam2_dirz./segment-anything-2z6The directory of segment-anything-2 git root directoryz--onnx_pathz6./sam2_onnx_models/sam2_hiera_large_image_encoder.onnxzpath of onnx modelz--torch_compile_mode)zreduce-overheadr   zmax-autotune-no-cudagraphsr   z4torch compile mode. none will disable torch compile.)argparseArgumentParseradd_argumentset_defaultsr`   r_   
parse_args)parserr   s     r7   _parse_argumentsr    s   $1_```F
 /2S     E+C+C+CVZt     %	     &&&
-	     u---
 ###5                    G     8      #     I     J     e     $      h     "ccc     &E     H!     YYYC     DKr9   __main__z
arguments:r   r   r   r   rg   Fr;   )-r~   r  r   r   r   collections.abcr   r   rR   r   r   r   r   
sam2_utilsr   r   r	   onnxruntimer
   r   r   *onnxruntime.transformers.io_binding_helperr   r   rs   rw   ry   r   r   	Namespacer   r   r   r  r[   r   ri   r4   r   r   rk   is_availabler   r)   r'   rd   r9   r7   <module>r     s-     



      # # # # # #        * * * * * * * * * * * * N N N N N N N N N N Q Q Q Q Q Q Q Q Q Q B B B B B BW W W W W W W Wt z DT    $ :     6 6 6 6 6 6 6 6+    |.j |. |. |. |.B )-{ {

{%{ { { {|"# "# "#Je e eP zD	E
t

&48No4U4U..[a| -z&&(((((;%*.E.E.G.GGGGG(-D% ++++,,,, 4#< d+ r9   