
    fPiGE                     $   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z
mZ d dlmZ d dlmZmZ d dlmZ  ej        d          Zd'dZ G d d	          Zd
 Zd Zd Zd Zd ZdefdZd ZddgfdefdZd Z d Z!d Z"d Z#defdZ$e%dk    rw e            Z& ee&j'                   e&j(        dk     se&j)        dk     se&j(        e&j)        z  dk     re*                    d           ej+        ,                    e&j                  rRe&j-        sKe&j.        s e/d e&j         d!          e0                    d"e&j                    ej1        e&j                    ee&j(        e&j)        e&j                  Z2e&j-        s e$e2e&           	  ee2j3                  Z4n/# e5$ r' e6                    d#e2j3                    e2j7        Z4Y nw xY we0                    d$           e2j3        8                    d%d&          Z9 ee4e9           dS dS )(    N)get_ort_environment_variablessetup_logger)main)PRETRAINED_GPT2_MODELS
Gpt2Helper)	OnnxModel c           
         t          j                    }|                    dddt          dd                    t
                    z              |                    ddt          d	d
           |                    ddt          dd           |                    ddt          dd           |                    dddd           |                    d           |                    dddd           |                    d           |                    dddd           |                    d           |                    ddd           |                    d           |                    d ddd!           |                    d"           |                    d#ddd$           |                    d%           |                    |           }|S )&Nz-mz--model_name_or_pathTz2Model path, or pretrained model name in the list: z, )requiredtypehelpz--csvFzgpt2_parity_results.csvz#path of csv file to save the result)r   r   defaultr   z--test_casesi  znumber of test cases per runz--runs(   znumber of repeated runs	--use_gpu
store_truezuse GPU for inference)r   actionr   )use_gpuz--allz'run all combinations of mixed precision)allz-e--use_external_data_format)r   r   )use_external_data_formatz	--verbose)verbosez--skip_testzEdo not run test, and only rank experiments based on existing csv file)	skip_testz--overwritezOverwrite existing csv file)	overwrite)	argparseArgumentParseradd_argumentstrjoinr   intset_defaults
parse_args)argvparserargss      /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/models/gpt2/gpt2_parity.pyparse_argumentsr&      sG   $&&F
ADIINdDeDee     )2     +     5sBMfggg
eLOfggg
&&&
6	     E"""
:US_```
777
eLIII
&&&
T	     %(((
*	     %(((T""DK    c                       e Zd Zd Zd ZdS )
ParityTaskc                 L    || _         || _        || _        g | _        d| _        d S )Nr   )
total_runs
test_casescsv_pathresultsrun_id)selfr,   r+   r-   s       r%   __init__zParityTask.__init__b   s)    $$ r'   c                    t           j                                                             d          }| d| j         }| xj        dz  c_        	 t	          g |d| j         d| j         ||| j                  }|r| j        	                    |           n/# t          $ r" t                              d|            d }Y nw xY w|S )Nz%Y%m%d%H%M%S_   z-tz-r)experiment_namer/   csv_filenamezFailed to run experiment )datetimenowstrftimer/   r   r,   r+   r-   r.   append	Exceptionlogger	exception)r0   r"   r5   
start_timer/   results         r%   runzParityTask.runi   s    &**,,55nEE
....q	O$OO2ODOT_:NO /!]	  F  ,##F+++ 	 	 	JJJKKKFFF	 s   A
B )CCN)__name__
__module____qualname__r1   r@    r'   r%   r)   r)   a   s2              r'   r)   c                     g }dd l }t          | d          5 } |j        |          }|D ]}|                    |           	 d d d            n# 1 swxY w Y   |S )Nr   r	   newline)csvopen
DictReaderr:   )r-   rowsrH   csvfilereaderrows         r%   load_results_from_csvrO   ~   s    DJJJ	h	#	#	# w(( 	 	CKK	               Ks   +AAAc                     | D ].}|                     d          rt          | |                   c S /t          d          )Nzaverage_latency(batch_size=z)Failed to get average_latency from output)
startswithfloatRuntimeError)rN   names     r%   get_latencyrU      sT     $ $??899 	$T#####	$ B
C
CCr'   c                     t          |           }t          | d                   }t          | d                   }|dz  |dz  z
  |dz  z
  S )z@Scoring function based on 3 metrics. The larger score is better.top1_match_rateonnx_size_in_MBi  
   d   )rU   rR   )rN   latency_in_msrW   rX   s       r%   scorer\      sT    $$MC 1233OC 1233OT!MB$6639NNNr'   c                   
 t                       t          d           i 
|D ]}|
|d         <   t          t          |                                 
fdd                    }t                              | d|            t                              d| d           d	}d
}t          |                                          D ]\  }\  }}	|	|k    r|}|	}|D ]|}|d         |k    rnt                              d                    ||	|t          |          t          |d                   |d         |d         t                                            n}d S )Nz
**********r/   c                 J    | d         t          | d                            fS )Nr4   r   )r\   )itemrow_maps    r%   <lambda>zprint_wins.<locals>.<lambda>   s!    d1guWT!W-='>'>? r'   T)keyreversez Wins:z	Based on z* wins and a scoring function, the ranking:r   za{:02d}: WINs={:02d}, run_id={}, latency={:5.2f}, top1_match={:.4f}, size={}_MB, experiment={}, {}rW   rX   
experiment)printdictsorteditemsr<   debuginfo	enumerateformatrU   rR   r   )winsrK   	test_namerN   sorted_winsrankprevious_valuecountrb   valuer`   s             @r%   
print_winsru      s   	GGG	(OOOG % %!$HJJLL????	
 	
 	
 K LLI22[22333
KKQIQQQRRRDN():):)<)<==  |UN""D 	 	C8}##w~~#C((c"3455-.L)577	 	    $ r'   c                 \   i }i }| D ]}|d         }d||<   d||<   t          |dd          5 }g d}t          j        ||          }|                                 g d}	t	          |           }
t          |
d	z
            D ]Q}| |         }t          |d
         t                    rt          j	        |d
                   }n|d
         }t          |d	z   |
d	          D ]}| |         }d}|	D ]}||         ||         k    rd} n|s+t          |d
         t                    rt          j	        |d
                   }n|d
         }	 t          j                            ||dd          \  }}n# t          $ r d}d}Y nw xY wt          j                            ||dd          \  }}|_|dk     rYt          |d                   t          |d                   k    r||d         xx         d	z  cc<   n||d         xx         d	z  cc<   |dk     rYt          |d                   t          |d                   k    r||d         xx         d	z  cc<   n||d         xx         d	z  cc<   |d         |d         |d         t          |d                   |d         |d         t          |d                   ||||d}|                    |           S	 ddd           n# 1 swxY w Y   t"                              d|            t'          || d           t'          || d           dS )zRun U test and T test.r/   r   wr	   rF   )
model_namerun_id_1experiment_1top1_match_rate_1run_id_2experiment_2top1_match_rate_2U_statisticU_pvalueT_statisticT_pvalue)
fieldnames)rx   r,   runsr4   top1_match_rate_per_runTFz	two-sided)use_continuityalternativeN)axis	equal_varg?rW   rx   re   z(U-Test and T-Test results are output to zU-TestzT-Test)rI   rH   
DictWriterwriteheaderlenrange
isinstancer   jsonloadsscipystatsmannwhitneyu
ValueError	ttest_indrR   writerowr<   rk   ru   )rK   output_csv_path
utest_wins
ttest_winsrN   r/   rL   column_nameswriterrequired_match_columnsnum_resultsiresult1ajresult2all_matchedcolumnbutest_statisticutest_pvaluettest_statisticttest_pvalues                          r%   run_significance_testr      sJ   JJ  X
6
6	osB	/	/	/ O%7
 
 
 LAAA!E!E!E$ii{Q'' ;	% ;	%A1gG'";<cBB 7Jw'@ABB561q5+q11 3% 3%q'"4  Fv'&/99&+ : # g&?@#FF ;
7+D#EFFAA 9:A(49K4L4L1T{ 5M 5 51O\\ " ( ( (&*O#'LLL( 160E0EaQUae0E0f0f-+t0C0CW%67885IZA[;\;\\\"78#4555:5555"78#4555:555$&&W%67885IZA[;\;\\\"78#4555:5555"78#4555:555 #*,"7 ' 1$+L$9).w7H/I)J)J ' 1$+L$9).w7H/I)J)J#2 ,#2 ,  $$$$g3%;	%)O% O% O% O% O% O% O% O% O% O% O% O% O% O% O%` KKL?LLMMMz4***z4*****s7   DK"	&E0/K"0F	>K" F	EK""K&)K&raw_onnx_modelc                    t          j        |           }t          |          }|                                }|j        j        d         j        |v sJ ||j        j        d         j                 }|j        dk    r)t          	                    d|j                    |j        S t          
                    d|j         d|j                    d S )Nr   MatMulz#Found last MatMul node for logits: z-Failed to find MatMul node for logits. Found z	 of node )onnxloadr   output_name_to_nodegraphoutputrT   op_typer<   rk   warning)r   model
onnx_modelr   nodes        r%   get_last_matmul_node_namer   "  s    In%%E5!!J$88::;a %)<<<<<u{1!49:D|xE$)EEFFFy
NNe4<eeZ^Zceefff4r'   c                     | j         }d| d                                }| j        r|                    d           |ddd|gz  }|r|                    dg|           |S )N-m  -o --use_gpu -p fp16r   --io_block_listlogitsz--node_block_list--op_block_list)model_name_or_pathsplitr   r:   extend)r$   last_matmul_node_nameop_block_listr   
parameterss        r%   get_mixed_precision_parametersr   1  s    #E3u33399;;J$ 86777	 J  ?,=}=>>>r'   FastGeluLayerNormalizationtaskc                     t          |||          }d                    t          |                    }|rd| d}nd| d}t                      }|r	|d| dz   }|                     ||           d S )N,Mixed precision baseline +  in FP32z=Mixed precision baseline (logits output and last MatMul node z	 in FP32)z ())r   r   rh   r   r@   )r   r$   r   r   r   op_block_list_strrT   env_varss           r%   run_candidater   C  s     06K][[J!6!677 pH->HHHoOdooo,..H '&8&&&&HHZr'   c                     | j         }d| d                                }| j        r|                    d           | j        r|                    d           d| d                                }| j        r|                    d           ||fS )Nr   z -o -p fp32r   r   r   )r   r   r   r:   r   )r$   r   fp32_baselinefp16_baselines       r%   get_baselinesr   X  s    #E,%,,,2244M| *[)))$ ;9:::6%666<<>>M$ ;9:::-''r'   c                    ddg}|                      ||z   d           dg}|                      ||z   d           |                      ||z   dgz   t          |          z   dgz   d           g }|}|D ]Kdgfd	|D             z   }|                      ||z   |z   d
 d          }	|	r|                    |	           Lt          |d           }
t	          d|
           dS )z:Step 0 is to check which operator in FP16 causes most lossr   r   zFP16 except logitsz--keep_io_typeszGraph I/O FP32, Other FP16r   z--force_fp16_initializerszFP32 except weights in FP16c                      g | ]
}|k    |S rD   rD   ).0oops     r%   
<listcomp>z$run_tuning_step0.<locals>.<listcomp>x  s    .M.M.MQQ"WWqWWWr'   zFP32 except z in FP16c                     | d         S )NrW   rD   )ys    r%   ra   z"run_tuning_step0.<locals>.<lambda>~  s    !<M:N r'   )rb   z<step 0: optimized operator causes the most loss in precisionN)r@   listr:   minrf   )r   r   all_opsoptimized_opsfp32_logitsfp32_iooptimized_ops_resultsop_listr   r?   
min_resultr   s              @r%   run_tuning_step0r   g  s>   $h/KHH][(*>??? !GHH]W$&BCCC 	HH#4"55WEIdHee%  
 G 1 1*+.M.M.M.M'.M.M.MM-'1MAC^RTC^C^C^__ 	1!((000 *0N0NOOOJ	
H*UUUUUr'   c                 R    |D ]#}d|g}|                      ||z   d| d           $dS )zKStep 1 is to figure out which optimized operator in FP32 could benefit mostr   r   r   N)r@   )r   mixed_precision_baseliner   r   r   s        r%   run_tuning_step1r     sV     
 
*B/$}46"666	
 	
 	
 	

 
r'   c           	          g d}fd|D             }D ]O}||vrIg ||}|                      g |d|d                    d                    |          |                     PdS )zAssumed that you have run step 0 and 1 to figure out that Logits FP32 and some operators shall be in FP32,
    This step will try add one more operator.
    )r   r   SkipLayerNormalizationc                     g | ]}|v |	S rD   rD   )r   xr   s     r%   r   z$run_tuning_step2.<locals>.<listcomp>  s#    DDDam1C1C1C1C1Cr'   r   z(Mixed precision baseline + {},{} in FP32r   N)r@   rm   r   )r   r   r   candidate_fp32_opsfp32_opsr   r   s     `    r%   run_tuning_step2r     s     VUUDDDD-DDDH  X+hOOMHHN*N,=NN:AA#((8BTBTVXYY   r'   c           	        
 t          j        d|j        |j        g           }t	          |          \  }}|                     |d          }g }|r(d|v r$|d         r|d                             d          }nt          d          g 
|r(d|v r$|d         r|d                             d          
nt          d          |j        st          
                    d	           d S |                     |d
           t          |d                   }t          | ||g            
fd}|j        rGt          | |
|           t          ||g           }	t!          | |	|           t#          | |	|           n2t          | || |g d                     t          | ||dg           t          | || |g d                     t          | || |g d                     d S )Nonnx_models)
new_folderremove_existingzFP32 baselineoptimized_operatorsr   z!Failed to get optimized operators	operatorszFailed to get operatorsz5skip mixed precision since --use_gpu is not specifiedzFP16 baselineraw)r   c                      fd| D             S )Nc                     g | ]}|v |	S rD   rD   )r   r   r   s     r%   r   z4run_parity.<locals>.get_fp32_ops.<locals>.<listcomp>  s    000r"-----r'   rD   )r   r   s    r%   get_fp32_opsz run_parity.<locals>.get_fp32_ops  s    0000Q0000r'   )r   r   Addr   )r   r   r   r   )r   EmbedLayerNormalizationr   r   r   )r   get_onnx_pathsr   r   r   r@   r   rS   r   r<   rk   r   r   r   r   r   r   r   )r   r$   onnx_model_pathsr   r   r?   r   r   r   r   r   s             @r%   
run_parityr     s   !00	   $1#6#6 M=XXm_55FM @(F22?T8U245;;C@@>???G 6;&((f[.A(%++C004555 < KLLLHH]O,,,56Fu6MNN $32FFFF1 1 1 1 1 x U}g}EEE#A$H]mo#p#p#p 7GGG7GGGG!&,'^'^'^__		
 	
 	
 	
 	dD"7
|TTTT "l#f#f#fgg	    "ljjj
 
	     r'   __main__rZ      i'  zNot enough test cases or runs to get stable results or test significance. Recommend test_cases >= 100, runs >= 20, test_cases * runs >= 10000.zOutput file zK existed. Please remove the file, or use either --skip_test or --overwrite.z6Remove existing file %s since --overwrite is specifiedzFailed to load csv z#Start running significance tests...z.csvz
.stats.csv)N):r   rH   r7   r   loggingosr   scipy.statsr   benchmark_helperr   r   convert_to_onnxr   gpt2_helperr   r   r   r   	getLoggerr<   r&   r)   rO   rU   r\   ru   r   r   r   r   r   r   r   r   r   r   rA   r$   r   r,   r   r   pathexistsr   r   rS   rk   remover   r-   rK   r;   r=   r.   replacesummary_csvrD   r'   r%   <module>r     s    



    				      H H H H H H H H             : : : : : : : :            		2		@ @ @ @F       :  D D DO O O' ' 'T[+ [+ [+|c      , 34	 
   *( ( (V V V6
 
 
  CZ C C C CL z?DL	B$/DI2MPU2U2US	
 	
 	

 
w~~dh    ~ 	 ,ttxttt   KKPRVRZ[[[BIdh:doty$(;;D> 
4$$T]33   >t}>>???| KK5666-''==K$,,,,,A s   F# #)GG