
    fPirL                         d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
mZ  e j        e          Z G d d          Z G d d          ZdS )    N)	Precision)
Gpt2Helper
Gpt2Inputsc                   P    e Zd ZddZd ZdefdZdefdZdd
Z	ddZ
d Zd ZdS )
Gpt2MetricTorch   c                     |dk    r|dk    sJ || _         || _        | d| | _        || _        d| _        d| _        d| _        d| _        d| _        d | _	        d | _
        i | _        d S )N   d   z vs r   )baseline	treatmentnametop_ktop_1_errortop_k_errortotal_samplesmax_logits_diffmax_logits_diff_no_pastbatch_top1_errorbatch_topk_errorseq_len_latency)selftreatment_namebaseline_namer   s       /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/models/gpt2/gpt2_tester.py__init__zGpt2Metric.__init__   s    qyyUc\\\)%'*????	
 ! !"#&'./$3737!    c                     | j         | j        k    rt          d           t          d| j         d| j          d           | j        dk    r\d| j        z  | j        z  }d| j        z  | j        z  }t          d| j         d| j         d	|d
d| j         d| j         d	|d
d           t          d           t          d| j        d           t          d| j        d           nt          d| j         d           | j	        rt          d           d}d}t          | j	                                                  D ]}t          j        | j	        |                   dz  }|dk    rt          d| d|d
d           n&t          dd|z   dd|dz   z  dz
   d|d
d           ||t          | j	        |                   z  z  }|t          | j	        |                   z  }t          d||z  d
d           d S d S )Nz---zMetrics for z (baseline=z):r   g      Y@zTotal=z Top1Error=z (z.2fz%) TopzError=z%)zMax logits diffs:z	with past  = z.6fz	empty past = z (baseline):z/Past sequence length range and average latency:     @@	z:         	z msz	[   z, r   z]:	zAverage Latency: )r   r   printr   r   r   r   r   r   r   sortedkeys
statisticsmeanlen)r   top_1_error_ratetop_k_error_ratetotalcountkeyaverages          r   r#   zGpt2Metric.print%   s   =DN**%LLLMMMDMMMMNNN!A%%#(4+;#;d>P#P #(4+;#;d>P#P  kT/  k  kD<L  k  kP`  k  k  klplv  k  k  C  O  k  k  Sc  k  k  k  k   %&&&>D$8>>>???FD$@FFFGGGG====>>> 	>CDDDEEd27799:: 8 8$/$*>s*CDDvM!88@s@@@@@@AAAAR3RR!a.1*<RR'RRRRSSS3t';C'@#A#AAAT1#6777<eem<<<<=====	> 	>r   is_empty_pastc                     ||z
                                                                   }|rt          | j        |          | _        nt          | j        |          | _        |S N)absmaxr   r   )r   baseline_logitstreatment_logitsr/   diffs        r   diff_logitszGpt2Metric.diff_logitsC   s`    "227799==?? 	C+.t/KT+R+RD((#&t';T#B#BD r   
batch_sizec                     | xj         |z  c_         t          j        |dft          j                  | _        t          j        |dft          j                  | _        d S )Nr   dtype)r   torchzerosboolr   r   )r   r8   s     r   start_batchzGpt2Metric.start_batchL   sV    j( %ZO5: N N N %ZO5: N N Nr   Tc                    |                      |j        |j        d|           |                      |j        |j        | j        |           |                     |j        |j        |dk              }|rt          d| j         d|            d S d S )Nr   r   zMax logits diffs of z: )
_eval_topktop_1_tokenstop_k_tokensr   r7   logitsr#   r   )r   r   r   past_seq_lenverbosemax_diffs         r   
eval_batchzGpt2Metric.eval_batchQ   s    -y/Eq'RRR-y/EtzSZ[[[##HOY5E|WXGXYY 	B@@@h@@AAAAA	B 	Br   c                    t          j        t          j        ||                    s|dk    rP|rt          d| j                    | xj        t          j        ||                                          z  c_        d S |rt          d| d| j         d           | xj        t          j        ||                                                              d          	                    d          dk    z  c_        d S d S )Nr   z!Generated tokens not matched for zTop z tokens not matched for z-. This will lead to wrong beam search results)dimr   )
r<   alleqr#   r   r   logical_notr   sum	unsqueeze)r   baseline_topktreatment_topkr   rF   s        r   rA   zGpt2Metric._eval_topkY   s   y-@@AA 	zz KIdiIIJJJ%%-)P)P)\)\)^)^^%%%% vuvvdivvv   %%H]N;;GGIIMMaPPZZ_`Zaadee%%%%	 	r   c                     | xj         | j                                        z  c_         | xj        | j                                        z  c_        d S r1   )r   r   rN   r   r   r   s    r   	end_batchzGpt2Metric.end_batchh   sN    D155777D155777r   c                     |dk    r$t          t          j        |                    dz   nd}|| j        vr
g | j        |<   | j        |                             |           d S )Nr   r   )intmathlog2r   append)r   rE   latencyr-   s       r   add_latencyzGpt2Metric.add_latencyl   sj    2>2B2Bc$)L))**Q..d***(*D %S!((11111r   N)r   r	   )T)__name__
__module____qualname__r   r#   r>   r7   rV   r?   rH   rA   rT   r[    r   r   r   r      s        " " " "> > ><D    Oc O O O O
B B B B   8 8 82 2 2 2 2r   r   c            
           e Zd Z	 	 	 ddZdefdZd Zd Zd Ze	dd
            Z
e	d             Ze	d             Ze	ej        ddddddddf	d            ZdS )
Gpt2TesterFr	   c                    |j         d         | _        |j         d         | _        || _        || _        || _        || _        |d u| _        |d u| _        g | _	        d| j        |d||z  g}t          |          D ]n}t          j        |                              |rt          j        nt          j                  }| j	                            |                    |                     od | _        d | _        d | _        |	| _        |
| _        d S )Nr   r   r"   )shaper8   input_lengthn_layer	input_idsposition_idsattention_maskhas_position_idshas_attention_maskpastranger<   emptytypefloat16float32rY   torD   rB   rC   r   top_k_required_order)r   rf   rg   rh   num_attention_headshidden_size	num_layerdeviceis_fp16r   rr   
past_shape_i
empty_pasts                 r   r   zGpt2Tester.__init__t   s    $/!,%OA. "(, ,D 8"0"< 	O..

 	"" 	4 	4BZ0055w6aemmTYTabbJIZ]]6223333  
$8!!!r   returnc                 N    t          | j        | j        | j        | j                  S r1   )r   rf   rg   rh   rk   rS   s    r   
get_inputszGpt2Tester.get_inputs   s"    $.$*;T=PRVR[\\\r   c           	         ddl m t          j                            |dt          |          z             }t          j                            |          rt          d| d           d S t          j        |d           fd}g } ||| j	        d	           | j
        r ||| j        d
           | j        r ||| j        d           t          | j                  D ]*} ||| j        |         dt          |          z              +t#          |          D ]v\  }}	t%          t          j                            |d| d          d          5 }
|
                    |	                                           d d d            n# 1 swxY w Y   wd |                                D             }t#          |          D ]\  }}                    t/          ||         t0          j                  r||         n=||                                                                                                                   }	t%          t          j                            |d| d          d          5 }
|
                    |	                                           d d d            n# 1 swxY w Y   t          d|            d S )Nr   )numpy_helpertest_data_set_z
Directory z existed. Skip saving test dataT)exist_okc                     |                                          |                                                                                                |                     d S r1   )rY   
from_arrayclonecpunumpy)input_tensorstorch_tensorr   r   s      r   
add_tensorz-Gpt2Tester.save_test_data.<locals>.add_tensor   sR      !8!89K9K9M9M9Q9Q9S9S9Y9Y9[9[]a!b!bcccccr   rf   rg   rh   past_input_z.pbwbc                     g | ]	}|j         
S r_   )r   ).0outputs     r   
<listcomp>z-Gpt2Tester.save_test_data.<locals>.<listcomp>   s    HHHHHHr   output_zTest data saved to directory )onnxr   ospathjoinstrexistsr#   makedirsrf   ri   rg   rj   rh   rl   re   rk   	enumerateopenwriteSerializeToStringget_outputsr   
isinstancer   ndarrayr   r   )r   sessionr   save_test_data_dirtest_case_idr   r   r   itensorfoutput_names_namer   s                @r   save_test_datazGpt2Tester.save_test_data   sR   %%%%%%w||.0@3|CTCT0TUU7>>$ 	DtDDDEEEF
D4((((	d 	d 	d 	d 	d 
=$.+>>>  	IJ}d&7HHH" 	MJ}d&9;KLLLt|$$ 	F 	FAJ}dilGc!ff4DEEEE"=11 	4 	4IAvbgll4!994@@ 4A00223334 4 4 4 4 4 4 4 4 4 4 4 4 4 4 IH'2E2E2G2GHHH!,// 	4 	4HAu!,,'q	5=AAfq		vayGXGXG\G\G^G^GdGdGfGf F bgll4)91)9)9)9::DAA 4Q00223334 4 4 4 4 4 4 4 4 4 4 4 4 4 4 	4d4455555s$   (FF
	F
	/(J##J'	*J'	c                    t          |d         t          j                  rt          j        |d                   n=|d                                                                                                         | _        t          
                    | j                  | _        t          
                    | j        | j        | j                  | _        | j                                                                                            | j        dg                              |          | _        | j        rdt          j        | j        |z   dz
  g                              d                              | j        d                              |          | _        | j        ret          j        | j        t          j        | j        dg                              | j                  gd                              |          | _        g | _        t          |d         t>                    rtA          |d                   | _        dS tC          | j"                  D ]}t          ||dz            t          j                  rt          j        ||dz                      n.||dz                                                                            }| j        #                    |                    |                     dS )z7
        Update the inputs for next inference.
        r   r   N)$r   r   r   r<   
from_numpyr   detachr   rD   ra   predict_next_tokenrB   r   rr   rC   reshaper8   rq   rf   ri   r   rd   rO   repeatrg   rj   catrh   onestype_asrk   tuplelistrl   re   rY   )r   r   steprv   r   past_is         r   updatezGpt2Tester.update   s   
 ,6fQi+O+OuEVAY'''U[\]U^UdUdUfUfUmUmUoUoUsUsUuUu 	 '99$+FF&99$+tzSWSlmm*002299;;CCT_VWDXYY\\]cdd  	d/$6:;<<FFqIIPPQUQ`bcddgghnoo  " 	"')'J344<<T=PQQ # # bjj  	fQi'' 		4VAYDIII4<(( 4 4 "&Q-??8E$VAE]333A,,..5577 
 	  6!2!233334 4r   c                    t          d           | j        K| j        |j        z
                                                                  }|dk    rt          d|            t	          j        | j        |j        k              st          d| j        |j                   | j        r=t	          j        | j        |j        k              st          d| j        |j                   | j	        r=t	          j        | j
        |j
        k              st          d| j
        |j
                   t          | j                  t          |j                  k    sJ t          | j                  D ]\  }}|j        |j        |         j        k    sJ |                                dk    rO||j        |         z
                                                                  }|dk    rt          d	| d
|            dS )z3
        Compare inputs and logits output.
        zstart diff...Ng-C6?z$Max logits difference is too large: zInput_ids is differentzposition_ids is differentzattention_mask is differentr   zmax_past_diff[z]=)r#   rD   r2   r3   r<   rK   rf   ri   rg   rj   rh   r(   rk   r   rc   nelement)r   r   max_io_diffr   r   max_past_diffs         r   r6   zGpt2Tester.diff   s   
 	o;";8==??CCEEKT!!J[JJKKKy8+==>> 	P*DNH<NOOO  	9T.(2GGHH /%)   " 	9T0H4KKLL 1'+   49~~X]!3!33333"49-- 	A 	AIAv<8=#3#99999  1$$!'(-*:!: ? ? A A E E G G 4''?1????@@@	A 	Ar   r   c                     | dddf         }|dk    rt          j        |dd          }|S t          j        |dd          ddd|f         }|s|                                \  }}|S |S )z4
        Get top k topkens based on logits.
        Nr   T)
descending)r<   argmaxargsortsort)rD   r   required_orderlastTokenLogitsgeneratedTokenstopksorted_topk_s           r   r   zGpt2Tester.predict_next_token  s     !B-A::#l?AtDDO""="FFFqqq&5&yQD! #!%Q""Kr   c                    g }t          |          D ]}t          | |dz            t          j                  rt	          j        | |dz                      n
| |dz            }t          ||dz            t          j                  rt	          j        ||dz                      n
||dz            }||z
                                                                  }|                    |           t          d|            dS )zO
        Compare the present outputs of two outputs from ONNX Runtime.
        r   zpresent_diff_max=N)
rl   r   r   r   r<   r   r2   r3   rY   r#   )onnx_outputonnx_io_outputre   present_diff_maxr   onnx_present_ionnx_io_present_irG   s           r   diff_presentzGpt2Tester.diff_present+  s   
 w 	. 	.A k!a%0%-@@( QU!3444 Q'  nQU3U]CC+ A!6777#AE* 
 ')::??AAEEGGH##H----4"24455555r   c                 F    ddl m}  ||           }ddlm} |j        |k    S )z>
        Returns True if the ONNX model is quantized.
        r   )load)__producer__)r   r   !onnxruntime.quantization.quantizer   producer_name)onnx_model_pathr   modelquantize_producers       r   is_quantized_onnx_modelz"Gpt2Tester.is_quantized_onnx_model@  sF    
 	_%%WWWWWW"&777r   Gpt2LMHeadModelT   r   .c                    t          d| d| d| dt          |           d|	 d           |j        j        }|j        j        }|j        j        }|j        j        }d}|t          j        k    }|r#d| 	                                d         j
        v sJ |                                                    |           t          j        d	d
d|j        |          }t          j        |||          }d}|t          j        k    rdnd}t#          |||          }t#          |||          }t#          |dz   ||          }t%          |          D ]9\  }}|	dk    r	||	k    r n&|dz  dk    rt          |            |d         }|                    dd          }|                    dd          }t)          |||||||||| 
  
        }t)          |||||||||| 
  
        } t)          |||||||d|| 
  
        }!|!j        }"|                    |"           |                    |"           t/          j                    5  t/          j        |"t.          j                  }#t7          |          D ]}$t9          |j                                                  d         }%t9          |j        d                                                   d         }&tA          j!                    }'t          j"        ||!#                                          }(|$                    |&tA          j!                    |'z
             |!%                    |(|$|           t          j&        | |#                                d          \  })}*|$                    |&|*dz             |%                    |)|$|           t          j        |"|&|%|j        |          }+t          j'        ||+           t          j(        | | #                                ||+ddd          \  },}*|$                    |&|*dz             ||k     r| )                    | |,||           |dz  }| %                    |,|$|           |
r|*                    |            t(          +                    |)|,|           t          d            t          d!|!j,                   t          d"|j,                   t          d#| j,                   |-                    |!||&|
$           |-                    |!| |&|
$           |#|!j,        |k    .                                z  }#t/          j/        |#          r nddd           n# 1 swxY w Y   |0                                 |0                                 ;|                                  |                                  |                                  dS )%z
        Test Generation using greedy beam search (without sampling) to compare PyTorch and ONNX model.
        It will print top 1 and top k errors on the given test inputs.
        zstart test generation: (top_k=z top_k_no_order=z max_steps=z test_inputs=z max_inputs=)r   ro             )r8   past_sequence_lengthsequence_lengthconfigmodel_class)
is_float16r   zQuantized OnnxOnnxz with IO Binding
   rf   rg   Nrh   Fr:   r      )
total_runsr    )r   T)r   return_numpyinclude_copy_output_latencyzTop 1 tokens:z	Torchz	ONNXz	ONNX with IO binding)rF   )1r#   r(   r   re   n_headn_embdeos_token_idr   FLOAT16r   rn   evalrq   r   get_output_shapesget_output_buffersINT8r   r   getra   r8   r?   r<   no_gradr=   r>   rl   r   rf   sizerk   timeitdefault_timerpytorch_inferencer}   r[   r   onnxruntime_inferenceauto_increase_buffer_size$onnxruntime_inference_with_binded_ior   r6   r   rB   rH   anyrK   rT   )-r   r   rv   test_inputs	precisionr   r   top_k_no_order	max_steps
max_inputsrF   r   r   re   r   r   r   test_data_savedr   init_output_shapesoutput_buffersr   r   torch_metriconnx_metriconnx_io_metricr   inputsrf   rg   rh   onnx_runneronnx_io_runnertorch_runnerr8   doner   seq_lenrE   
start_timepytorch_outputr   avg_latency_msoutput_shapesr   s-                                                r   test_generationzGpt2Tester.test_generationL  s   ( 	 cU  c  cN  c  c_h  c  cwz  |G  xH  xH  c  c  V`  c  c  c	
 	
 	
 ,&$$|0)"33
 	> 3 3 5 5a 8 ===== 	

 (9!$<#
 
 
 $67I6^hiii-6).-H-H))f!-FF FF#N5G$GX]^^";// q	' q	'IAvA~~!z//2v{{f{+I!::nd;;L#ZZ(8$??N$"" K ("" N &"" L &0J##J///&&z222 < <{:UZ@@@!),, : :D";#8#=#=#?#?@@CG#'(8(;(@(@(B(B#C#CA#FL!'!5!7!7J%/%A%I`I`IbIb%c%cN ,,\6;O;Q;QT^;^___ ''fEEE2<2R!7!7!9!9a3 3 3/K  ++L.6:QRRR&&{D&AAA$.$@"$$/% % %M 8WWW
 #G&1133&%#$%*48  && #..|^f=TUUU&77&55g~Oacrsss'1,")).$GGG U#((888"//^WUUUo...i)BCCCh(@AAA68STTT**<l\c*ddd"--lNLbi-jjj<#<#L"Q"Q"S"SSDy w< < < < < < < < < < < < < < <| !!###$$&&&&s   <L	UU	U	N)Fr	   F)r   F)r\   r]   r^   r   r   r}   r   r   r6   staticmethodr   r   r   r   FLOAT32r  r_   r   r   ra   ra   s   s#        ")9 )9 )9 )9V]J ] ] ] ]%6 %6 %6N&4 &4 &4P%A %A %AN    \$ 6 6 \6( 	8 	8 \	8  #%h h h \h h hr   ra   )loggingrW   r   r&   r   r   r<   benchmark_helperr   gpt2_helperr   r   	getLoggerr\   loggerr   ra   r_   r   r   <module>r     s      				        & & & & & & . . . . . . . .		8	$	$[2 [2 [2 [2 [2 [2 [2 [2|B B B B B B B B B Br   