
    fPi--                       d dl mZ d dlZd dlZd dlZd dlZd dlZd dlm	Z
 d dlZd dlmZ d dlmZmZ d dlmZmZmZmZmZmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
l m!Z! d dl"Z# ej$        d          Z%d#dZ&d#dZ'd Z(	 	 d$d%dZ)d&dZ*g fd&d Z+e,d!k    r8d"Z-ej.        -                    e-            ej/        e-            e+             dS dS )'    )annotationsN)setup_logger)get_rankget_size)add_io_bindings_as_ortvaluesconvert_inputs_for_ort%get_merged_sample_with_past_kv_inputsget_sample_inputsget_sample_with_past_kv_inputsverify_ort_inputs)setup_torch_model)make_dynamic_cache)
AutoConfig)__version__)DynamicCache argsargparse.Namespaceconfigr   c                6    | j         rdnd\  }}|j        }|||fS )N)      )r   r   )use_past_kvmax_position_embeddings)r   r   past_sequence_lengthcurr_sequence_lengthmax_sequence_lengths        /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/models/llama/llama_parity.pyget_sequence_lengthsr   %   s3    ;?;K1WQW.. 8!57JJJ    c                0   t                      }d}t          | |          \  }}}| j        r)t          || j        ||||| j        | j        d|
  
        }nA| j        r!t          || j        ||| j        d|          }nt          || j        ||d          }|S )N   T)seq_lenpast_seq_lenmax_seq_lenuse_fp16use_buffer_sharereturn_dict
world_size)r&   r(   r)   )r(   )
r   r   mergedr	   devicer&   r'   r   r   r
   )r   r   r)   
batch_sizer   sequence_lengthr   inputss           r   
get_inputsr/   +   s    JJAUVZ\bAcAc>/+>{ g6K#-+]!2!
 
 
 
	 g/K]!
 
 
 #64;
OaefffMr    c                "   t          | t          t          t          f          r| S t          | t                    rt	          d | D                       S t          | t
                    rd | D             S t          | t                    rd | D             S t          | t                    rd |                                 D             S t          | t          j
                  r|                                 S t          | d          r|                                 S t          | t                    rCt          t!          t          t#          | j        | j        d                                        S t)          dt+          |                      )	Nc              3  4   K   | ]}t          |          V  d S )Ntorch_deepcopy.0vs     r   	<genexpr>z!torch_deepcopy.<locals>.<genexpr>R   s*      661^A&&666666r    c                ,    g | ]}t          |          S  r2   r4   s     r   
<listcomp>z"torch_deepcopy.<locals>.<listcomp>T        111aq!!111r    c                ,    h | ]}t          |          S r9   r2   r4   s     r   	<setcomp>z!torch_deepcopy.<locals>.<setcomp>V   r;   r    c                4    i | ]\  }}|t          |          S r9   r2   )r5   kr6   s      r   
<dictcomp>z"torch_deepcopy.<locals>.<dictcomp>X   s&    ???A>!$$???r    cloneF)strictz(torch_deepcopy not implemented for type )
isinstanceintfloatstrtuplelistsetdictitemsnpndarraycopyhasattrrA   r   r   r3   zip	key_cachevalue_cacheNotImplementedErrortype)values    r   r3   r3   N   sq   %#uc*++ % 766666666% 21151111% 21151111% @??????%$$ zz||ug {{}}%&& o!.c%/5K\ej6k6k6k1l1l"m"mnnn VeVV
W
WWr    locationrF   use_auth_tokenboolkv_cache_ortvaluesrJ   pytorch_modelNone | torch.nn.ModuleNone | AutoConfigc                   |}|9t          | ||| j        rt          j        nt          j        | j                  \  }}t          | |          }d|v rGt          j        t                    t          j        d          k    rt          |d                   |d<   t          |          }| j        dk    rt          j                                         t          j                    }	 |di |j                                                                                                        }
| j        dk    rt          j                                         t          j                    }t(                              d||	z
   d           | j        r!|~t          j                                         t1          | |          \  }}}t3          || j        ||          }| j                                         d}|d	k    r|d
| j        if}t;          j        | j        t;          j                     |g          }tC          ||          }| j        dk    rtE          ||| j        tG          | j                  | j        |          \  }}|$                                 t          j                    }	|%                    |           |&                                 t          j                    }|'                                d         }~nDt          j                    }	|(                    d |          }t          j                    }|d         }t(                              d||	z
   d           d| j        v s	d| j        v rdnd}tS          j*        |
|||          }t(          +                    d|            |s2t(          +                    dtS          j,        |
|z
                        |S )Ntorch_dtyper+   past_key_valuesz4.45cpuzPyTorch took z s)r'   r$   r%   ExecutionProviderCUDAExecutionProvider	device_id)sess_options	providers)
ort_inputsr+   rd   r'   rY   r   zONNX Runtime took int4int8g      4@g      ?)rtolatolz,Are PyTorch and ONNX Runtime results close? z
Max diff: r9   )-r   r&   torchfloat16float32r+   r/   pvVersiontransformers_versionr   r3   execution_providercudasynchronizetimelogitsdetachra   numpyloggerinfo	small_gpuempty_cacher   r   r'   upperrankortInferenceSessiononnx_model_pathSessionOptionsr   r   rD   synchronize_inputsrun_with_iobindingsynchronize_outputscopy_outputs_to_cpurunrL   allclosewarningmax)r   rV   rW   rY   rZ   r   py_modelr.   inputs_after_deepcopy
start_time
pt_outputsend_timer   _r   ep	ort_model
io_bindingort_outputstolparitys                        r   verify_parityr   d   s    H,*.-JU];
 
 
 f%%FF""rz2F'G'G2:V\K]K]']']$6v>O7P$Q$Q ! +622%''
   J 221229@@BBFFHHNNPPJ%''
   y{{H
KK9: 5999:::~ !(.
    4Hf3U3U0!0#.)'	  F #))++	>	>	>B	$$$;	*+$'))$  I
 y&11F %'')E*$)nn!21*
 *
 *
&
& 	%%'''Y[[
$$Z000&&(((9;; 4466q9I Y[[
mmD&119;;!!n
KK>X
%:>>>??? 4///6T=Q3Q3Q##W[C[[sEEEF
NNJ&JJKKK HFBF:+C$D$DFFGGGr    argv	list[str]c                >   t          j                    }|                    dddd           |                    dddt          j                            d          d	
           |                    dddt          j                            d          d
           |                    ddddg dd           |                    dddd           |                    d           |                    dddd           |                    d           |                    dd dd!           |                    d"           |                    d#dd$           |                    d%           |                    d&d'dg d(d)*           |                    d+dt          d,d-.           |                    d/dd0           | g k    r|                                n|                    |           }|j	        d1v s|j	        d2k    r|j
        dk    rd3nd4|_	        |S )5Nz-mz--model_nameFzModel name in Hugging Face)requiredhelpz-tz--torch_model_directory.zMPath to folder containing PyTorch model and associated files if saved on disk)r   defaultr   z-oz--onnx_model_pathTzSPath to ONNX model (with external data files saved in the same folder as the model)z-epz--execution_providerra   )ra   rs   rocmz(Execution provider to verify parity with)r   r   choicesr   z-vz	--verbose
store_truezPrint verbose logs)actionr   )verbosez-pz--use_past_kvzfUse past key and past value as inputs to the model. Necessary for decoder_with_past_model.onnx models.)r   z-gz--use_buffer_sharezWUse if model has GroupQueryAttention and you want to enable past-present buffer sharing)r'   z--mergedz2Use merged model (i.e. decoder_merged_model.onnx).)r*   z-fpz--precision)rh   ri   fp16fp32zPrecision of model)r   r   r   z--cache_dirz./model_cachezQmodel cache dir to override default HF cache dir to avoid overflood the /home dir)r   rT   r   r   z--small_gpuzhLoad the llama in GPU every time for parity_check if it's running in a machine which GPU memory < 36GB. >   r   ri   rh   r   r   )argparseArgumentParseradd_argumentospathjoinset_defaultsrF   
parse_args	precisionrr   )r   parserr   s      r   get_argsr      s   $&&F
)	     !S!!\     S!!b     '''7     !	     &&&
u	     E***
f	     ///
A    
 u%%%
000!     `     w     #'"**6&2C2CD2I2ID
 >---$.F2J2JtOfjoOoOo 	 	N
 Kr    c                   t          |           }t          |j                   t                              d|            t                      }t          |d|j        dk               ||_        t          |d|j	        dk    rdnd|            t          |dt          j        |j                             |j        t          j                            d          k    }|r|j        n|j        }i }|j        st'          ||||           d S d x}}|j        s9t+          ||||j        rt          j        nt          j        |j        	          \  }}d
|_        t'          ||||||          }d|_        t'          ||||||           d S )NzArguments: r&   r   device_namera   zcuda:r+   r   r^   F)rZ   r   T)r   r   r   ry   rz   r   setattrr   r~   rr   rl   r+   r   torch_model_directoryr   r   r   
model_namer*   r   r{   r   r&   rm   rn   r   )r   r   r~   rW   rV   rY   r   llamas           r   mainr   ,  s   D>>D
KK$d$$%%%::D D*dn6777DID-$*AU*J*JP^X\P^P^___D(EL)9::;;;/27<<3D3DDN"0Ptd6PH; ndHn6HIIIII~ 	-.2mNU]]{  MFE !*(N,>e\b
 
 

  dHn6HX]flmmmmmmr    __main__r"   )r   r   r   r   )NN)r   r   rV   rF   rW   rX   rY   rJ   rZ   r[   r   r\   )r   r   )0
__future__r   r   loggingr   ru   rx   rL   packaging.versionversionro   rl   benchmark_helperr   dist_settingsr   r   llama_inputsr   r   r	   r
   r   r   llama_torchr   (models.torch_export_patches.cache_helperr   transformersr   r   rq   transformers.cache_utilsr   onnxruntimer   	getLoggerry   r   r/   r3   r   r   r   __name__seedrandommanual_seedr9   r    r   <module>r      sK   # " " " " "   				             ) ) ) ) ) ) , , , , , , , ,                * ) ) ) ) ) G G G G G G # # # # # # < < < < < < 1 1 1 1 1 1    		2		K K K K       FX X X6 -1 $a a a a aHa a a aH  $n $n $n $n $nN zDINN4EdDFFFFF	 r    