
    fPi                        d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlZd dl	Z	d dl
mZ dddddd	d
ddd	ZdddddZd Zd ZdBdZdededefdZdedededefdZdededededef
d Z	 dCded"ed#efd$Zd%efd&Z	 dCded"efd'Z	 dCdeded(ededed)ed*eded+ed,ed-ed#efd.Z	 	 	 dDdededed0efd1Z	 	 dEded"efd2Z	 	 dEdeded(ededed)ed*eded+ed,ed0ed#efd3Z	 	 dFd4ed5ededed)ed*eded+ed,ed6ed7ed8efd9Z	 	 	 dGd4ed5edededed)ed*eded+ed,ed6ed7ed8ed#efd:Z 	 	 	 dGd4ed5ededed)ed*eded+ed,ed6ed7ed#efd;Z!	 	 	 dGd4ed5ededed)ed*eded+ed,ed6ed7ed#efd<Z"	 dHdededededed)ed*eded+ed,ed#efd=Z#d> Z$dHd?Z%d@ Z&e'dAk    r4d dl(Z(	  e&             dS # e)$ r  e(j*         ej+                      Y dS w xY wdS )I    N)Pathmeasure_memoryzrunwayml/stable-diffusion-v1-5zstabilityai/stable-diffusion-2z stabilityai/stable-diffusion-2-1z+stabilityai/stable-diffusion-xl-refiner-1.0z/stabilityai/stable-diffusion-3-medium-diffusersz'stabilityai/stable-diffusion-3.5-mediumz&stabilityai/stable-diffusion-3.5-largez black-forest-labs/FLUX.1-schnellzblack-forest-labs/FLUX.1-dev)	1.5z2.02.1zxl-1.0z3.0Mz3.5Mz3.5LzFlux.1SzFlux.1DCUDAExecutionProviderROCMExecutionProviderMIGraphXExecutionProviderTensorrtExecutionProvider)cudarocmmigraphxtensorrtc                      g d} d}| |fS )N)
z.a photo of an astronaut riding a horse on marsz@cute grey cat with blue eyes, wearing a bowtie, acrylic paintingzia cute magical flying dog, fantasy art drawn by disney concept artists, highly detailed, digital paintingzdan illustration of a house with large barn with many cute flower pots and beautiful blue sky sceneryzgone apple sitting on a table, still life, reflective, full color photograph, centered, close-up productzWbackground texture of stones, masterpiece, artistic, stunning photo, award winner photozSnew international organic style house, tropical surroundings, architecture, 8k, hdrznbeautiful Renaissance Revival Estate, Hobbit-House, detailed painting, warm colors, 8k, trending on Artstationzcblue owl, big green eyes, portrait, intricate metal design, unreal engine, octane render, realisticzldelicate elvish moonstone necklace on a velvet background, symmetrical intricate motifs, leaves, flowers, 8kz*bad composition, ugly, abnormal, malformed )promptsnegative_prompts     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/models/stable_diffusion/benchmark.pyexample_promptsr   (   s&      G COO##    c                      dS )N)zwarm upbadr   r   r   r   warmup_promptsr   ;   s    r   c                 (    t          d|| |          S )NT)is_gpufuncmonitor_typestart_memoryr   )r   r   r   s      r   measure_gpu_memoryr   ?   s    D|Zfggggr   
model_name	directorydisable_safety_checkerc                    ddl m}m} dd l}|Nt          j                            |          sJ |                                }|                    |||          }n|                    | d|d          }|	                    |j
        j                  |_
        |                    d           |rd |_        d |_        |S )Nr   )DDIMSchedulerOnnxStableDiffusionPipeline)providersess_optionsonnxT)revisionr&   use_auth_tokendisable)	diffusersr$   r%   onnxruntimeospathexistsSessionOptionsfrom_pretrainedfrom_config	schedulerconfigset_progress_bar_configsafety_checkerfeature_extractor)	r    r!   r&   r"   r$   r%   r.   session_optionspipes	            r   get_ort_pipeliner<   C   s    DDDDDDDDw~~i(((((%4466*::( ; 
 
 +::	 ; 
 
 #..t~/DEEDN   ... &"!%Kr   enable_torch_compileuse_xformersc                 f   d| v rddl m} |                    | t          j                                      d          }|rF|j                            t          j                   t          j        |j        dd	          |_        |S d
| v rddl m	} |                    | t          j                                      d          }|rF|j                            t          j                   t          j        |j        dd	          |_        |S ddl m
}m} ddlm}	m}
 |                    | |
                              d          }|j                            |	           |r|                                 |rit          j        |j                  |_        t          j        |j                  |_        t          j        |j                  |_        t#          d           |                    |j        j                  |_        |                    d           |rd |_        d |_        |S )NFLUXr   )FluxPipeline)torch_dtyper   )memory_formatzmax-autotuneT)mode	fullgraphzstable-diffusion-3)StableDiffusion3Pipeline)r$   StableDiffusionPipeline)channels_lastfloat16z)Torch compiled unet, vae and text_encoderr+   )r-   rA   r3   torchbfloat16totransformerrH   compilerF   r$   rG   rI   unet*enable_xformers_memory_efficient_attentionvaetext_encoderprintr4   r5   r6   r7   r8   r9   )r    r"   r=   r>   rA   r;   rF   r$   rG   rH   rI   s              r   get_torch_pipelinerT   a   sG   ******++JEN+SSVVW]^^ 	de.ABBB$}T-=N^bcccDz))666666'77
PUP^7__bbcijj 	de.ABBB$}T-=N^bcccD@@@@@@@@,,,,,,,,"22:72SSVVW]^^DILL}L--- :77999 ;M$),,	=**!M$*;<<9:::"..t~/DEEDN   ... &"!%Kr   engine
batch_sizestepsc                     |                     d          d                             dd          }|  d| d| d| |rdnd	z   S )
N/zstable-diffusion-sd__b_s _safe)splitreplace)rU   r    rV   rW   r"   short_model_names         r   get_image_filename_prefixrd      sa    !'',,R0889LdSS@@'@@:@@@@J`DmBBfmnnr   Fimage_filename_prefixskip_warmupc                    
 ddl m} t           |          sJ t                      \  }} 
fd}t	          |	||          }t	          |	||          } |             g }t          |          D ]\  }}||k    r nt          j                    }  |gz  |gz            j        }t          j                    }||z
  }|                    |           t          d|dd           t          |          D ]#\  }}|
                    | d| d| d	           $dd
lm} d|||t          |          t          |          z  t          j        |          ||dS )Nr   )r%   c                  b    rd S t                      \  } } | gz  |gz             d S )Npromptheightwidthnum_inference_stepsr   r   )rj   negativerV   rk   r;   rf   rW   rl   s     r   warmupz run_ort_pipeline.<locals>.warmup   s`     	F)++8j( %%J3	
 	
 	
 	
 	
 	
r   ri   Inference took .3f secondsr\   .jpg__version__r.   rU   versionrk   rl   rW   rV   batch_countnum_promptsaverage_latencymedian_latencyfirst_run_memory_MBsecond_run_memory_MB)r-   r%   
isinstancer   r   	enumeratetimeimagesappendrS   saver.   rv   sumlen
statisticsmedian)r;   rV   re   rk   rl   rW   rz   ry   r   memory_monitor_typerf   r%   r   r   rp   first_run_memorysecond_run_memorylatency_listirj   inference_startr   inference_endlatencykimageort_versions   `` ```    `                r   run_ort_pipeliner      s    655555d788888.00G_

 

 

 

 

 

 

 

 

 

 **=v|TT*+>UU
FHHHLw'' ? ?	6E)++8j( %,-
:
 
 
  	 	/1G$$$55555666!&)) 	? 	?HAuJJ/==!==a===>>>>	? 766666   ""|,,s</@/@@$+L99/ 1  r   returnc                     |s|rd| in	d| g|z  ini }t           j                                        r+t          j        d                              d          |d<   |S )Nr   r   )device{   	generator)rJ   r   is_available	Generatormanual_seed)r   use_num_images_per_promptis_fluxrV   kwargss        r   get_negative_prompt_kwargsr      s     	(E00#o%6%CDD   z   N#oV<<<HHMM{Mr   c                    
 t                      \  }}dd l}t           |j                   
fd}t	          |	||          }t	          |	||          } |             t          j        d           g }t          |          D ]\  }}||k    r nt
          j        	                                 t          j
                    }t          |d          }  d|gz  d|j        }t
          j        	                                 t          j
                    }||z
  }|                    |           t          d|dd           t          |          D ]#\  }}|                    | d| d| d	           $d
t
          j        ||t#          |          t%          |          z  t'          j        |          ||dS )Nr   c                  x    rd S t                      \  } }t          |d          } d| gz  	d| d S )NFrj   rk   rl   rm   r   r   r   )
rj   ro   extra_kwargsrV   rk   r   r;   rf   rW   rl   s
      r   rp   z"run_torch_pipeline.<locals>.warmup  sc     	F)++1(E7JWWqVHz)&[`qqdpqqqqqr   Fr   rq   rr   rs   r\   rt   rJ   rw   r   )r   r-   r   rA   r   rJ   set_grad_enabledr   r   synchronizer   r   r   r   rS   r   rv   r   r   r   r   )r;   rV   re   rk   rl   rW   rz   ry   r   r   rf   r   r   r-   rp   r   r   r   r   rj   r   r   r   r   r   r   r   r   s   `` ```    `                @r   run_torch_pipeliner      se     /00G_y566Gr r r r r r r r r r r **=v|TT*+>UU
FHHH	5!!!Lw'' ? ?	6E
   )++1/5'S]^^ 
8j( %	
 

 
 
  	 	
   	/1G$$$55555666!&)) 	? 	?HAuJJ/==!==a===>>>>	? $ ""|,,s</@/@@$+L99/ 1  r   r&   rk   rl   rz   ry   tuningc                 x   |}|r|dv r|dddf}t          j                     }t          | |||          }t          j                     }t          d||z
   d           t          d| |||          }t	          ||||||||	|
||          }|                    | ||                    dd	          |d
d           |S )N)r   r	      )tunable_op_enabletunable_op_tuning_enableModel loading took rs   ortrf   ExecutionProviderr_   Fr    r!   r&   r"   enable_cuda_graph)r   r<   rS   rd   r   updaterb   )r    r!   r&   rV   r"   rk   rl   rW   rz   ry   r   r   r   rf   provider_and_options
load_startr;   load_endre   results                       r   run_ortr   :  s     $ c(PPP (_`*a*abJJ	3GI_``Dy{{H	
?: 5
?
?
?@@@5eZUZ\rss  F MM$" (()<bAA&<!&	
 	
   Mr   Tuse_io_bindingc                    ddl m} |8t          j                            |          r|                    |||          }n.|                    | d||          }|                    |           |rd |_        d |_        |S )Nr   )ORTPipelineForText2Image)r&   r   T)exportr&   r   )	optimum.onnxruntimer   r/   r0   r1   r3   save_pretrainedr8   r9   )r    r!   r&   r"   r   r   pipelines          r   get_optimum_ort_pipeliner   n  s     =<<<<<	!:!:+;;IPXiw;xx+;;)	 < 
 
 	  +++ *"&%)"Or   c                 ^   
 t          dt                                ddlm} t	           |          t                      \  }} 
f	d}t          |	||          }t          |	||          } |             t          |
          }g }t          |          D ]\  }}||k    r nt          j	                    }
r  d|d|j
        }n  d|gz  d|j
        }t          j	                    }||z
  }|                    |           t          d|dd	           t          |          D ]#\  }}|                    | d
| d
| d           $ddlm} d||t          |          t!          |          z  t#          j        |          ||dS )NzPipeline typer   )ORTFluxPipelinec            	         	 rd S t                      \  } }t          |
          }
r d| 	d| d S  d| gz  	d| d S )Nrj   rk   rl   rm   num_images_per_promptr   r   r   )rj   ro   r   ry   rV   rk   r   r;   rf   rW   r   rl   s      r   rp   z(run_optimum_ort_pipeline.<locals>.warmup  s     	F)++1(<UW^`jkk$ 
	vD $)&1       Du:-fE_duuhtuuuuur   r   r   rq   rr   rs   r\   rt   ru   optimum_ortrw   r   )rS   type&optimum.onnxruntime.modeling_diffusionr   r   r   r   r   r   r   r   r   r   r.   rv   r   r   r   r   )r;   rV   re   rk   rl   rW   rz   ry   r   r   r   rf   r   r   r   rp   r   r   r   r   r   rj   r   r   r   r   r   r   r   r   s   `` ``` `  ``                 @r   run_optimum_ort_pipeliner     s    
/4::&&&FFFFFF//G.00G_v v v v v v v v v v v v v& **=v|TT*+>UU
FHHH-o?XZacmnnLLw'' ? ?	6E)++$ 	T $)&0     F T x*,V5^c gs   	/1G$$$55555666!&)) 	? 	?HAuJJ/==!==a===>>>>	? 766666   ""|,,s</@/@@$+L99/ 1  r   c                    t          j                     }t          | ||||          }t          j                     }t          d||z
   d           |r| dz   t          |          j        z   n| }t          d||||          }t          ||||||||	|
||          }|                    | ||                    dd          |d	d
           |S )Nr   r   rs   r\   optimumr   r   r_   Fr   )	r   r   rS   r   namerd   r   r   rb   )r    r!   r&   rV   r"   rk   rl   rW   rz   ry   r   r   r   rf   r   r;   r   full_model_namere   r   s                       r   run_optimum_ortr     s     J#Ix)?P^  D y{{H	
?: 5
?
?
?@@@AJZj3&i)===PZO5?J7M  &  F MM$" (()<bAA&<!&	
 	
   Mr   work_dirrx   max_batch_sizenvtx_profileuse_cuda_graphc                 P  - t          d           ddlm}  |             |k    sJ ddlm}  ||          }|                                }ddlm}m} ddl	m
} |j        } || ||          \  }}}}} ||d|d|||||		  	        --j                            |||d
dddt          j                                                   -                               -fd}t%          |
||	          }t%          |
||	          } |             t'          d||          }g }t)                      \  } }!t+          |           D ]\  }"}#|"|k    r nt-          j                    }$-                    |#gz  |!gz  dd          \  }%}&t-          j                    }'|'|$z
  }(|                    |(           t          d|(dd|&            t+          |%          D ]#\  })}*|*                    | d|" d|) d           $-                                 ddlm}+ ddlm}, i d|                                ddd|,dd|+ dd|d d!d"d#d$|d%|d&t?          |          tA          |          z  d'tC          j"        |          d(|d)|d*|d+|S ),Nzd[I] Initializing ORT TensorRT EP accelerated StableDiffusionXL txt2img pipeline (static input shape)r   init_trt_pluginsPipelineInfo
EngineTypeget_engine_pathsrG   DDIMFr5   
output_dirverboser   r   r   framework_model_direngine_type   T)opt_image_heightopt_image_widthopt_batch_sizestatic_batchstatic_image_shapemax_workspace_size	device_idc                  n    t                      \  } }                    | gz  |gz             d S N)denoising_stepsr   run)rj   ro   rV   rk   r   rW   rl   s     r   rp   z"run_ort_trt_static.<locals>.warmup\  sD    )++fX
*XJ,CVUdijjjjjr   ort_trtg      @r   r   guidanceseedEnd2End took rr    seconds. Inference latency: r\   rt   ru   r    rU   r.   rx   r&   z	tensorrt()r!   rk   rl   rW   rV   ry   rz   r{   r|   r}   r~   r"   r   )#rS   trt_utilitiesr   diffusion_modelsr   
short_nameengine_builderr   r   pipeline_stable_diffusionrG   ORT_TRTbackendbuild_enginesrJ   r   current_deviceload_resourcesr   rd   r   r   r   r   r   r   teardownr   rv   r.   r   r   r   r   r   ).r   rx   rV   r"   rk   rl   rW   rz   ry   r   r   r   r   r   r   r   pipeline_infor   r   r   rG   r   onnx_dir
engine_dirr   r   r\   rp   r   r   re   r   r   r   r   rj   r   r   pipeline_timer   r   r   r   trt_versionr   r   s.     ` ```                                      @r   run_ort_trt_staticr    s`     

pqqq /.....''''------ L))M))++J;;;;;;;;AAAAAA$K?O?OPXZgit?u?u<Hj*&91 '&!%%/
 
 
H ""
!*++-- #     FE:666k k k k k k k k k **=v|TT*+>UU
FHHH5iZY^`vwwL.00G_w'' ? ?	6E)++ (Hz!
*! !- !
 !
 	/1G$$$WgWWWWWXXX!&)) 	? 	?HAuJJ/==!==a===>>>>	? 333333666666m((**- 	; 	....	
 	Z 	& 	 	 	j 	{ 	{ 	3|,,s</@/@@ 	*+L99 	/ 	 1  	!"8!" 	^# r   c                 b  1 t          d           ddlm} ddlm}  |             |k    sJ ddlm}  ||          }ddlm}m	} ddl
m} |j        } || ||          \  }}}}} ||d|d	||d
|          11j                            |||dd
d
d	|           t          1j                                        1j                                                  }|                    |          \  }}1j                            |           1                               1fd} t)          || |
          }!t)          || |
          }" |              t+          d||          }#g }$t-                      \  }%}&t/          |%          D ]\  }'}(|'|k    r nt1          j                    })1                    |(gz  |&gz  d          \  }*}+t1          j                    },|,|)z
  }-|$                    |-           t          d|-dd|+            t/          |*          D ]#\  }.}/|/                    |# d|' d|. d           $1                                 dd l}0d|0j        d|	|t?          |$          tA          |$          z  tC          j"        |$          |!|"|dS )N][I] Initializing TensorRT accelerated StableDiffusionXL txt2img pipeline (static input shape)r   cudartr   r   r   r   r   FT)r5   r   r   r   r   r   r   r   r  r   r  
onnx_opsetr   r   r   r   static_shapeenable_all_tacticstiming_cachec                  v    rd S t                      \  } }                    | gz  |gz             d S r   r   )rj   ro   rV   rk   r   rf   rW   rl   s     r   rp   z#run_tensorrt_static.<locals>.warmup  sQ     	F)++fX
*XJ,CVUdijjjjjr   trtr   )r   r   r   rr   r   r\   rt   r   default)rU   rx   r&   rk   rl   rW   rV   ry   rz   r{   r|   r}   r~   r   )#rS   r   r  r   r   r   r   r   r   r   r   rG   TRTr   load_enginesmaxmax_device_memory
cudaMallocactivate_enginesr  r   rd   r   r   r   r   r   r   r  r   rv   r   r   r   r   )2r   rx   r    rV   r"   rk   rl   rW   rz   ry   r   r   r   r   r   rf   r  r   r   r  r   r   rG   r   r  r  r   r   r  r  r\   shared_device_memoryrp   r   r   re   r   r   r   r   rj   r   r   r  r   r   r   r   r  r   s2      ` ```       `                                 @r   run_tensorrt_staticr    s   $ 

ijjj /.....''''------ L))M;;;;;;;;AAAAAA.KJZJZ-K KGHj*&9<
 '&!%	 	 	H !!/! ! "    H,>>@@(BRBdBdBfBfgg$//0ABBA%%&:;;; FE:666k k k k k k k k k k **=v|TT*+>UU
FHHH5eZUZ\rssL.00G_w'' ? ?	6E)++ (Hz!
*! !- !
 !
 	/1G$$$WgWWWWWXXX!&)) 	? 	?HAuJJ/==!==a===>>>>	?  ? ""|,,s</@/@@$+L99/ 1+  r   c                 v   *+,-./ t          d           dd l}ddlm} ddlm} ,-,dz  dk    s	-dz  dk    rt          d, d- d           |             k    sJ dd	lm} dd
l	m
*m+ *+ f	d}ddlm}  ||          } |||          .t          .j                                        .j                                                  }|                    |          \  }}.j                            |           .                    ,-           d,-.fd	//fd}t)          |
||	          }t)          |
||	          } |             |                                }t-          d||          }g }t/                      \  }} t1          |          D ]\  }!}"|!|k    r nt3          j                    }# /|"gz  | gz  d          \  }$}%t3          j                    }&|&|#z
  }'|                    |'           t          d|'dd|%            t1          |$          D ]#\  }(})|)                    | d|! d|( d           $.                                 |d|j        d||t=          |          t?          |          z  tA          j!        |          ||dS )Nr
  r   r  r      zCImage height and width have to be divisible by 8 but specified as: z and .r   r   c                    	 	j         } ||          \  }}}}} | |d|d||	  	        }|j                            |||d
ddd|           |S )Nr   Fr   r   Tr  )r  r   r  )pipeline_classr  r   r  r  r   r   r  r   r   rV   r   rk   r   r   r   rl   r   s            r   init_pipelinez-run_tensorrt_static_xl.<locals>.init_pipelineJ  s     nN^N^m[O
 O
K*j*=|
 ">!%)) 3#

 

 

 	%%! 3%#!$% 	& 	
 	
 	
 r   r   c           	      <                         | |d|          S Ng      @r   r   )rj   r   r   image_heightimage_widthr   rW   s      r   run_sd_xl_inferencez3run_tensorrt_static_xl.<locals>.run_sd_xl_inferencez  s3    ||!  
 
 	
r   c                  Z    rd S t                      \  } } | gz  |gz             d S Nrn   rj   ro   rV   r(  rf   s     r   rp   z&run_tensorrt_static_xl.<locals>.warmup  G     	F)++VHz1H:
3JKKKKKr   r  r   r   r   rr   r   r\   .pngr   r  r    rU   rx   r&   rk   rl   rW   rV   ry   rz   r{   r|   r}   r~   r   r*  )"rS   r   r   r  r   r   
ValueErrorr   r   r   r   r   r   rG   r  r   r  r  r  r  r   r   rd   r   r   r   r   r   r  rv   r   r   r   r   )0r   rx   rV   r"   rk   rl   rW   rz   ry   r   r   r   r   r   rf   r  r  r   r   r"  rG   r  r  r\   r  rp   r   r   r    re   r   r   r   r   rj   r   r   r  r   r   r   r   r   r   r&  r'  r   r(  s0   ` ` ```    ````                           @@@@@@r   run_tensorrt_static_xlr1  #  s   " 

ijjj...... LKa1a1 4 4sR^ssepsss
 
 	

 ''''------;;;;;;;;! ! ! ! ! ! ! ! ! ! ! ! !F BAAAAA L))M}4mDDHH,>>@@(BRBdBdBfBfgg$//0ABBA%%&:;;; L+zBBB	
 	
 	
 	
 	
 	
 	
 	
 	
L L L L L L L **=v|TT*+>UU
FHHH##%%J5eZUZ\rssL.00G_w'' ? ?	6E)++ 3 3VHz4IOK\_iKips t t t	/1G$$$WgWWWWWXXX!&)) 	? 	?HAuJJ/==!==a===>>>>	?  !? ""|,,s</@/@@$+L99/ 1+  r   c                   %& ddl m} ddlm}  |||j        | ||          %|k    sJ %                               d%fd	&&fd}t          |
||	          }t          |
||	          } |             %j                                        }t          d||          }g }t                      \  }}t          |          D ]\  }}||k    r nt          j                    } &|gz  |gz  d	          \  }}t          j                    }||z
  }|                    |           t          d
|dd|            t          |          D ]5\  } }!| d| d|  d}"|!                    |"           t          d|"           6%                                 ddlm}# ddlm}$ |d|$d|# d||t)          |          t+          |          z  t-          j        |          |||dS )Nr   )initialize_pipeline)r   )rx   r   r   rk   rl   r   r   r   c           	      <                         | |d|          S r$  r%  )rj   r   r   rk   r   rW   rl   s      r   r(  z+run_ort_trt_xl.<locals>.run_sd_xl_inference  s3    ||!  
 
 	
r   c                  Z    rd S t                      \  } } | gz  |gz             d S r*  rn   r+  s     r   rp   zrun_ort_trt_xl.<locals>.warmup  r,  r   r   r   r-  r   rr   r   r\   r.  zImage saved toru   r.   r   r   r/  r*  )
demo_utilsr3  r   r   r   r  r   r  r   rd   r   r   r   r   rS   r   r  r   rv   r.   r   r   r   r   )'r   rx   rV   r"   rk   rl   rW   rz   ry   r   r   r   r   r   rf   r3  r   rp   r   r   r    re   r   r   r   r   rj   r   r   r  r   r   r   r   filenamer  r   r   r(  s'     ` ```       `                      @@r   run_ort_trt_xlr8    s
   " /.....))))))""&%%!	 	 	H ''''FE:666	
 	
 	
 	
 	
 	
 	
 	
 	
L L L L L L L **=v|TT*+>UU
FHHH',,..J5iZY^`vwwL.00G_w'' . .	6E)++ 3 3VHz4IOK\_iKips t t t	/1G$$$WgWWWWWXXX!&)) 	. 	.HAu/==!==a===HJJx   "H----	.
 333333666666 !-{--- ""|,,s</@/@@$+L99/ 1+  r   c                 R   dt           j        j        _        dt           j        j        _        t          j        d           t          j                    }t          | |||          }t          j                    }t          d||z
   d           t          d| |||          }|sFt          j
                    5  t          ||||||||	|
||          }d d d            n# 1 swxY w Y   nt          ||||||||	|
||          }|                    | d |rdn|rdnd	|dd
           |S )NTFr   rs   rJ   r   rN   xformersr  r   )rJ   backendscudnnenabled	benchmarkr   r   rT   rS   rd   inference_moder   r   )r    rV   r"   r=   r>   rk   rl   rW   rz   ry   r   r   rf   r   r;   r   re   r   s                     r   	run_torchr@  !  s    $(EN %)EN"	5!!!Jj*@BVXdeeDy{{H	
?: 5
?
?
?@@@5gz:W\^tuu 
!## 	 	'%#'  F	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 $!#
 
 
 MM$%9h		\?hzz_h&<!&	
 	
   Ms   7CC"%C"c                     t          j                    } |                     dddt          dg dd           |                     dd	dt          d
t	          t
                                                    d           |                     dddd           |                     dddt          t	          t                                                    dd           |                     dddt          d d           |                     dddt          dd           |                     dddd            |                     d!           |                     d"ddd#            |                     d$           |                     d%ddd&            |                     d'           |                     d(ddd)            |                     d*           |                     d+ddd,            |                     d-           |                     d.d/t          d0g d1d23           |                     d4dt          d5d6           |                     d7dt          d5d8           |                     d9d:dt          d;d<           |                     d=d>dt          d?d@           |                     dAdBdt          t          d0dC          dDdE           |                     dFdGdt          t          d0dH          dIdJ           |                     dKdLdddM            |                     dN           |                                 }|S )ONz-ez--engineFr.   )r.   r   rJ   r   z-Engines to benchmark. Default is onnxruntime.)requiredr   r  choiceshelpz-rz
--providerr   z8Provider to benchmark. Default is CUDAExecutionProvider.z-tz--tuning
store_truezsEnable TunableOp and tuning. This will incur longer warmup latency, and is mandatory for some operators of ROCm EP.)actionrD  z-vz	--versionr   z>Stable diffusion version like 1.5, 2.0 or 2.1. Default is 1.5.)rB  r   rC  r  rD  z-pz
--pipelinez[Directory of saved onnx pipeline. It could be the output directory of optimize_pipeline.py.)rB  r   r  rD  z-wz
--work_dirr  z?Root directory to save exported onnx models, built engines etc.z--enable_safety_checkerzEnable safety checker)rB  rF  rD  )enable_safety_checkerz--enable_torch_compilez#Enable compile unet for PyTorch 2.0)r=   z--use_xformerszUse xformers for PyTorch)r>   z--use_io_bindingzUse I/O Binding for Optimum.r   z--skip_warmupz
No warmup.r   z-bz--batch_sizer   )r            r  
          z)Number of images per batch. Default is 1.)r   r  rC  rD  z--heighti   z$Output image height. Default is 512.z--widthz#Output image width. Default is 512.z-sz--steps2   zNumber of steps. Default is 50.z-nz--num_promptsrK  z!Number of prompts. Default is 10.z-cz--batch_count      z(Number of batches to test. Default is 5.z-mz--max_trt_batch_sizerL  rJ  zdMaximum batch size for TensorRT. Change the value may trigger TensorRT engine rebuild. Default is 4.z-gz--enable_cuda_graphz/Enable Cuda Graph. Requires onnxruntime >= 1.16)r   )argparseArgumentParseradd_argumentstrlist	PROVIDERSkeys	SD_MODELSset_defaultsintrange
parse_args)parserargss     r   parse_argumentsr_  f  sx   $&&F
???<     Y^^%%&&G     a	     Y^^%%&&M     j     N     !$	     e444
 2	     U333
'	     U+++
+	     u---
	     E***
+++8     3     2     .     0     a7     as     >     %000DKr   c                     dd l }|                    t          j                              }|                                D ]3| rt          fddD                       rt          j                   4d S )Nr   c              3   *   K   | ]}|j         v V  d S r*  )r0   ).0xlibs     r   	<genexpr>z)print_loaded_libraries.<locals>.<genexpr>  s)      )`)`A!sx-)`)`)`)`)`)`r   )libculibnvr   )psutilProcessr/   getpidmemory_mapsanyrS   r0   )cuda_related_onlyrh  prd  s      @r   print_loaded_librariesro    s    MMMry{{##A}}  ! 	c)`)`)`)`A_)`)`)`&`&` 	#(OOO r   c                     t                      } t          |            | j        dk    r| j        dv rdt          j        d<   ddlm} ddlm} |	                    |          |	                    d          k    rdt          j        d	<   | j
        re| j        dk    r| j        d
v r| j        t          d          |	                    |          |	                    d          k     rt          d          t          j        d           | j        dk    rdnd}t!          |d           }t          d|           t"          | j                 }t$          | j                 }| j        dk    r| j        dk    rd| j        v rft          d           t'          | j        | j        | j        d| j        | j        | j        | j        | j        ||| j        d| j
        | j                  }nt          d           t;          | j        | j        | j        | j         | j        | j        | j        | j        | j        ||| j        d| j
        | j                  }n| j        dk    rp|dk    rjd| j        v rdt          j        d	<   t?          || j        || j        | j         | j        | j        | j        | j        | j        ||| j         | j                  }n | j        dk    r| j        r$t          j!        "                    | j                  s
J d            t          d| d| j#                    tI          || j        || j        | j         | j        | j        | j        | j        | j        ||| j#        | j                   }nt| j        dk    rnd| j        v ret          d!           tK          | j        | j        | j        d| j        | j        | j        | j        | j        ||| j        d| j
        | j                  }n| j        dk    rt          d"           tM          d>i d#| j        d$| j        d%|d&| j        d'dd(| j        d)| j        d*| j        d+| j        d,| j        d-|d.|d/| j        d0dd1| j
        d2| j        }not          d3| j'         d4| j(         d5           tS          || j        | j         | j'        | j(        | j        | j        | j        | j        | j        ||| j        6          }t          |           tU          d7d8d9:          5 }g d;}	tW          j,        ||	<          }
|
-                                 |
.                    |           d d d            n# 1 swxY w Y   | j        d=k    rt_          | j        d
v            d S d S )?Nr.   )r   1ORT_DISABLE_TRT_FLASH_ATTENTIONr   )rx   ru   z1.16.0!ORT_ENABLE_FUSED_CAUSAL_ATTENTION)r   r   z:The stable diffusion pipeline does not support CUDA graph.z1.16z.CUDA graph requires ONNX Runtime 1.16 or laterz%(funcName)20s: %(message)s)fmtr   r   z&GPU memory used before loading models:r   xlzNTesting Txt2ImgXLPipeline with static input shape. Backend is ORT TensorRT EP.TF)r   rx   rV   r"   rk   rl   rW   rz   ry   r   r   r   r   r   rf   zLTesting Txt2ImgPipeline with static input shape. Backend is ORT TensorRT EP.r   r   )r    r!   r&   rV   r"   rk   rl   rW   rz   ry   r   r   r   rf   z?--pipeline should be specified for the directory of ONNX modelsz/Testing diffusers StableDiffusionPipeline with z provider and tuning=)r    r!   r&   rV   r"   rk   rl   rW   rz   ry   r   r   r   rf   zGTesting Txt2ImgXLPipeline with static input shape. Backend is TensorRT.zETesting Txt2ImgPipeline with static input shape. Backend is TensorRT.r   rx   r    rV   r"   rk   rl   rW   rz   ry   r   r   r   r   r   rf   zNTesting Txt2ImgPipeline with dynamic input shape. Backend is PyTorch: compile=z, xformers=r  )r    rV   r"   r=   r>   rk   rl   rW   rz   ry   r   r   rf   zbenchmark_result.csvar_   )rD   newline)r    r!   rU   rx   r&   r"   rk   rl   rW   rV   ry   rz   r{   r|   r}   r~   r   )
fieldnamesr   r   )0r_  rS   rU   rx   r/   environ	packagingr.   rv   parser   r&   r   r0  coloredlogsinstallr   rX  rV  r8  r   rV   rk   rl   rW   rz   ry   max_trt_batch_sizerf   r  rG  r   r   r0   isdirr   r   r1  r  r=   r>   r@  opencsv
DictWriterwriteheaderwriterowro  )r^  rx   r   r   r   sd_modelr&   r   csv_filecolumn_names
csv_writers              r   mainr    s   D	$KKK{m##<7"" =@BJ89%%%%%%::::::==%%x)@)@@@ ?BBJ:;! 	SK=00T]FZ5Z5Z_c_l_t !]^^^}}[))GMM&,A,AAA !QRRR9::::$(MV$;$;&&%&94@@L	
2LAAA&H'H{m##(C(C4<bccc#?'+{jj , ,)$7#6"#5 ,  FF$ `aaa'?+/+E'E{jj , ,)$7#6"#5 ,  FF" 
		!	!h2I&I&I4<>ABJ:; m'+'A#A;**((% 3.(
 
 
  
	%	%} 	
t}!=!= 	
 	
M	
 	
= 	lll_c_jllmmmm'+'A#A;**((% 3;(
 
 
  

	"	"tt|';';WXXX']L#';**((% 321(
 
 
" 

	"	"UVVV$ 
 
 
]]
LL
  x
 	

 $(4
 ;;
 **
 **
 ((
 ((
 &
 !4 3
  22
 
  11
  ((!
& 	 X]a]v  X  X  DH  DU  X  X  X	
 	
 	
 '+'A#A!%!:*;**((% 3(
 
 
  
&MMM	$3	;	;	; $x
 
 
& ^HFFF
   F###-$ $ $ $ $ $ $ $ $ $ $ $ $ $ $2 zQt}0DDEEEEE s   	AUU U__main__r*  )F)r   TF)FF)FT)FTF)T),rQ  r  r/   r   sysr   pathlibr   r|  rJ   benchmark_helperr   rX  rV  r   r   r   rT  boolr<   rT   rZ  rd   r   dictr   r   r   r   r   r   r  r  r1  r8  r@  r_  ro  r  __name__	traceback	Exceptionprint_exceptionexc_infor   r   r   <module>r     s    



 				     



             + + + + + + ,+-;=541-
 
	 $#++	 	$ $ $&  h h h h  X\    <*3 * *\` *pt * * * *Zoc os o oTW oqu o o o o  H HH H H H H HVcg    < F FF F F F Fn 1 111 1 	1
 !1 1 1 1 1 1 1 1 1 1 1n %#'   !	
    L $W WW W W W WN !2 222 2 	2
 !2 2 2 2 2 2 2 2 2 2 2D @ @@@ @ !	@
 @ @ @ @ @ @ @ @ @ @ @b !E EEE E 	E
 !E E E E E E E E E  !E E E Ej S SSS S !	S
 S S S S S S S S S S SF e eee e !	e
 e e e e e e e e e e ej B BBB !B 	B
 B B B B B B B B B BJm m m`   JF JF JFZ z3 3 3 3!	!<3<>>222223 s   
F" "GG