
    fPiC                         d dl Z d dlZd dlZd dlmZ d dlZej        ej	        ej
        ej        dZd Z G d d          Z	 	 	 	 	 d
d	ZdS )    N)AutoTokenizer)ztorch.int32ztorch.int64ztorch.float32ztorch.float16c                     ddl m} |                    |                                 |                                |                                |                                z  |j        j                   d S )Nr   )cudart)cudar   
cudaMemcpydata_ptrelement_sizenelementcudaMemcpyKindcudaMemcpyDeviceToDevice)dstsrcr   s      /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/models/phi2/inference_example.pycuda_memcpyr      sq    
S\\^^+6	        c                   \    e Zd Zd Zd Zd Zdej        dedefdZ		 dd
Z
ddZd Zd ZdS )ORTGeneratorc                     || _         d| _        d| _        d| _        d| _        d| _        d| _        d| _        i | _        d S )N    P   i   r   F)	onnx_decoder_path	num_heads	head_size
num_layersmax_sequence_length	device_iduse_cuda_graphuse_traced_inputsstatic_inputs_map)selfdecoder_paths     r   __init__zORTGenerator.__init__"   sL    !-#' #!&!#r   c                 R   || j         v rd S t          j        d          }t          j        d| j                  }i }t          j        |dft          j        |          |d<   t          j        dgt          j        |          |d<   t          j        |dgz  t          j        |          |d<   t          j        dgt          j        |          |d	<   || j        | j	        | j
        f}t          | j                  D ]x}t          j        ||t          j        
          }|                    d| |                                d| |                                                                i           yt          j        |ddft          j        |          |d<   || j         |<   d S )Ncpur      )dtypedevice	input_idsr   step	seqlens_ktotal_sequence_lengthr'   r&   	past_key_past_value_   logits)r   torchr'   r   zerosint32tensorint64r   r   r   ranger   float16update
contiguousclone)r    
batch_size
cpu_devicecuda_device	static_iocache_shapeicaches           r   append_static_inputsz!ORTGenerator.append_static_inputs-   s   ///F\%((
l64>::	!&j!_EKXc!d!d!d	+!L!EKTTT	&!&jA3.>ekZe!f!f!f	+-2\1#U[Yc-d-d-d	)*!4>43KT^\t'' 	s 	sAKKu}UUUEo!oou/?/?/A/ACTQRCTCTV[VaVaVcVcVnVnVpVpqrrrr#k:q%*@^ijjj	(-6z***r   c           	      	   | j         rt          j        nt          j        | _        t          j        |d         | j        t          j                  }t          j        |d         | j        t          j                  }|j        \  }}| j	        o|| j
        v o| j        o| j         | _        | j        s't          j        dg| j        t          j                  n| j
        |         d         }| j        s*t          j        |dgz  | j        t          j                  n| j
        |         d         }t          ||                    d                              d                              t          j                             | j        s4t          j        dgt          j        d          t          j                  n| j
        |         d	         }||d<   |                                |                                d
}	| j        r|                                |	d<   | j	        r1|                                |	d<   |                                |	d	<   |	d= | j        r| j        nd}
| j        rd|| j        |
| j        fn|| j        |
| j        f}| j        st1          | j                  D ]}t          j        || j        | j                  }| j        sU|	                    d| |                                d| |                                                                i          n+|	                    d| |                                i           nt1          | j                  D ]m}|	                    d| | j
        |         d|                                          d| | j
        |         d|                                          i           nt          j        ||d| j        | j                  }d|                                i}| j        s| j        rd|| j        || j        fn|| j        || j        f}t1          | j                  D ]}t          j        || j        | j                  }| j        sC|                    d| |                                d| |                                i          n+|                    d| |                                i           |	|fS )Nr(   r,   attention_maskr   r)   r*   r%   r$   r+   )r(   rD      r-   r.   past_r/   r0   present_key_present_value_present_)use_fp16r1   r7   float32torch_dtyper4   r'   r3   shaper   r   use_buffer_share	packed_kvr   r5   r   sumsubtor9   use_stepr   r   r   r6   r   r2   r8   r:   )r    encodings_dictr(   rD   r;   sequence_lengthr)   r*   total_seq_lengthinputspast_seq_length
past_shaper@   pastr0   outputspresent_shapepresents                     r   get_initial_inputs_and_outputsz+ORTGenerator.get_initial_inputs_and_outputsD   s)   ,0ML5==u}L!<T[X]Xcddd	n5E&Ft{bgbmnnn&/o#
O  #t55#%# N"	 	 )<EL!T[DDDD'
3F; 	 )AELqc)$+U[QQQQ'
3K@ 	
 	I~11!4488;;>>u{KKLLL )MEL!U\%%8%8LLLL'
34KL 	
 . #--//,7799
 

 = 	/!__..F6N 	)"+"6"6"8"8F;.>.I.I.K.KF*+'(6:6KR$22QR ~OQ
DNOT^LLdnot~N 	 % 	4?++  {:dkIYZZZ  >IFMM?q??DOO4E4EGXUVGXGXZ^ZdZdZfZfZqZqZsZs"tuuuT__5F5F'GHH 4?++  'A)?
)KOXYOO)\)g)g)i)i)a))4+A*+MN_\]N_N_+`+k+k+m+m    Z%[_[klllV..001$ 	 >SJPP $./4>R 
 4?++  +mDKtO_```
  >PGNN+++W-?-?-A-ACWTUCWCWY`YkYkYmYmn   !A9K9K9M9M(NOO wr   modelrW   r[   c           
         |                                 }d }|                                D ]\  }}|                    ||j        j        |j        j        dk    rdn|j        j        t          t          |j                           t          |j
                  |                                           |j        }|                                D ]}|j        }	| j        rd|	v r||	                    dd                   }|                    |	|j        j        |j        j        | j        rt$          j        nt$          j        t          |j
                  |                                           ||	         }|                    |	|j        |j        dk    rdn|j        | j        rt$          j        nt$          j        t          |j
                  |                                           |S )Nr$   r   )namedevice_typer   element_typerM   
buffer_ptrr]   rZ   )
io_bindingitems
bind_inputr'   typeindexpt_to_npreprr&   tuplerM   r   get_outputsra   rN   replacebind_outputrJ   npr7   rK   )
r    r_   rW   r[   re   r'   kvoutputra   s
             r   apply_io_bindingzORTGenerator.apply_io_binding   s   %%''
LLNN 		 		DAq!!HM x}55!!18>%d17mm4AGnn::<< "    XFF'')) 	 	F;D$ d):):4<<	6::;&& !hn04"M"**2:.. zz|| '     DM&& &#);%#7#7aaV\04"M"**2:.. zz|| '     r   TFc                 <   || _         t          j                    }d|_        d|_        || _        | j         dk    rd| j         | j        dfnd}t          j        | j        ||g          | _        t          j	                    | _
        t          j                                        rt          j        d| j                   nt          j        d          | _        || _        || _        || _        || _        t'          j        d	d
          | _        d| j        _        d S )N   r   CUDAExecutionProvider)r   enable_cuda_graphCPUExecutionProvider)sess_options	providersr   r$   zmicrosoft/phi-2T)trust_remote_codez[PAD])r   ortSessionOptionslog_verbosity_levellog_severity_levelr   InferenceSessionr   sess
RunOptionsror1   r   is_availabler'   rJ   rN   rO   rS   r   from_pretrained	tokenizer	pad_token)	r    r   rJ   rN   rO   rS   r   rz   eps	            r   create_sessionzORTGenerator.create_session   s    #)+++,(*+', ~"" %DNY]Yl&m&mnn' 	
 ()?lgifjkkk	."">Cj>U>U>W>Wpel64>:::]b]ijo]p]p  0" &67H\`aaa#*   r   c                    |                      |          \  }}|d                                         }|j        \  }}	|	}
t          j        || j        t          j                  }|rg }d}|
|k     r|                     | j        ||          }|rt          j	                    }|
                                 |r}| j        r| j                            dd           | j                            || j                   | j        r1| j                            d| j        rt!          |          nd           d}n | j                            || j                   |                                 |r+t          j	                    }|                    ||z
             |d         d d dd d f         }t          j        |d	          }||z  | j        j        k    }|                    || j        j                                      |d
g          }t          j        ||gd	          }t          j        |          rn|
d
z  }
|                    t          j                  |d<   | j        r=t9          | j        |         d         |d                    | j        |         d         |d<   | j        rqt          j        |
d
z
  g| j        t          j                   |d<   | j        r=t9          | j        |         d         |d                    | j        |         d         |d<   | j        r|d         }||                     |d
          z                       t          j                  |d<   |
|d         d<   | j        rut9          | j        |         d         |d                    | j        |         d         |d<   |d         d         | j        |         d         d<   | j        |         d         |d<   nRt          j        |d         |                     |d
          gd
                              t          j                  |d<   |d         j        d
         d
k    rI|d         d d d d
d d f         !                                |d<   | j        r| j        |         d         |d<   |d         "                                 | j#        sMtI          | j%                  D ]=}| j&        s#|d|          |d| <   |d|          |d| <   ,|d|          |d| <   >|d         j        d
         }| j&        rd|| j'        || j(        fn|| j'        || j(        f}tI          | j%                  D ]}t          j        || j        | j)                  }| j&        sU|*                    d| |!                                d| |                                !                                i          n+|*                    d| |!                                i           |
|k     |rYtW          d| d|	 d||	z
              tW          dd|d         z   ddtY          j-        |d
d                    z   d           d S | j        .                    |d          }|S )Nr(   r,   Tgpu_graph_idz-1Fr0   )dimr%   r)   r*   r+   r   rD   rG   r-   rH   r.   rI   rF   rE   zBatch size: z, Sequence length: z, Token num: zPrompt letency: i  zms, Token latency: ms)skip_special_tokens)/r^   r:   rM   r1   r2   r'   boolrt   r   timesynchronize_inputsr   r   add_run_config_entryrun_with_iobindingr   strsynchronize_outputsappendargmaxr   eos_token_idmasked_fillreshapecatallrR   r3   r   r   rS   r4   r5   r9   zero_rN   r6   r   rO   r   r   rL   r8   printrp   meanbatch_decode)r    rT   
max_lengthcuda_graph_annotation	benchmarkrW   r[   all_token_idsr;   rU   current_lengthhas_eoslatency
prompt_runre   startendnext_token_logitsnext_tokenstokens_to_addprevious_seqlens_kr@   new_sequence_lengthr\   r]   textss                             r   generate_implzORTGenerator.generate_impl   s6   ==nMM{+1133&3&9#
O(+jEJOOO 	G
z))..ty&'JJJ $	))+++ B& GG00FFF	,,ZAAA& G00&dF\(f,A(B(B(Bbf   #

	,,ZAAA**,,, ,ikksU{+++ !( 1!!!R( ;,'8bAAAK +t~/JJG (33GT^=XYYaacmopbqrrM!I}m&D"MMMM y!!  aN"/"2"25;"?"?F;% VD2:>{KVT_M`aaa&*&<Z&H&U{#} P!&~/A.B4;^c^i!j!j!jv) P 6z B6 JFSYN[[[%)%;J%G%OF6N" "%+K%8"'9gX<N<Nz[\<]<]']&a&abgbm&n&n{#5C./2) r 6z B; OQWXcQdeee*.*@*L[*YF;'U[\sUtuvUwD*:67NOPQR6:6LZ6XYp6qF23+09,-/A/A*a/P/PQST, ,"U[// '(
 x &q)Q..$+H$5aaa!QQQh$?$J$J$L$L!) U(,(>z(J8(TGH%H##%%%( t// F FA> F29:L:L:L2M1/4;<PQ<P<P4Q0Q0011.5nnn.E{q{{++&,-=&>&DQ&G# ~[Q
DN4GXX$dn6I4>Z 
 t//  A#k-SWScdddG  $~T 2q 2 2G4F4F4H4H 4 4 4gmmoo6P6P6R6R   %^^^^^W=O=O=Q=Q,RSSK z))P  	zzzzozz\fix\xzz   jTGAJ%6jj4RTRYZabcbdbdZeRfRfKfjjjkkkF++Mt+TTr   c                 h    | j                             |d          }|                     |||          S )NT)padding)r   batch_encode_plusr   )r    promptr   r   rT   s        r   generatezORTGenerator.generatea  s5    99&$9OO!!.*>STTTr   c                 b   |\  }}||z   }i }t          j        dd||ft           j                                                  |d<   t          j        ||ft           j                                                  |d<   |                     |||d           |                     |||d           d S )	Nr   iX  )r&   r(   rD   F)r   T)r1   randintr3   tolistonesr   )r    prompt_shape	token_numr   r;   rU   r   rT   s           r   generate_benchmarkzORTGenerator.generate_benchmarkf  s    &2#
O$y0
&+mAuz?>[chcn&o&o&o&v&v&x&x{#+0:z?6S[`[f+g+g+g+n+n+p+p'( 	>:7LX]^^^ 	>:7LX\]]]]]r   N)TTFFF)F)__name__
__module____qualname__r"   rB   r^   r}   r   dictrt   r   r   r   r    r   r   r   r   !   s        	$ 	$ 	$7 7 7.[ [ [z&c&: &D &SW & & & &R pu+ + + +2} } } }~U U U
^ ^ ^ ^ ^r   r   FTc                 
   t          |                               |||||           fd}dg}	|s ||	           |r=d}
dD ]:}                    |           dD ]}||f}                    ||
|           9d S d S )Nc                    t          |           }r                    |                               | d|          }t          t          |                    D ].}t	          d| |                    t	          d||                    /d S )N)r;      )r   r   zPrompt: zTexts: )lenrB   r   r6   r   )r   example_batch_sizer   r@   	generatorr   s       r   
simple_runzrun_phi2.<locals>.simple_run  s     [[ 	J**6H*III""6cQc"dds5zz"" 	' 	'A*fQi((()U1X&&&&	' 	'r   zV```python
    def print_prime(n):
    """
    Print all primes between 1 and n
    """r   )r%   rE   rv      )   i   )r   )r   r   rB   r   )onnx_model_pathrN   r   rO   rJ   rS   r   run_benchmarkr   r   r   r;   rU   r   r   s         `       @r   run_phi2r   u  s    _--IY2BIxYghhh' ' ' ' ' '	F  
6  h	& 	h 	hJ**:666#, h h *O<,,\9\f,ggggh	h h	h 	hr   )FTFFF)r   numpyrp   r1   transformersr   onnxruntimer}   r3   r5   rK   r7   rj   r   r   r   r   r   r   <module>r      s          & & & & & &     88ZZ	   Q^ Q^ Q^ Q^ Q^ Q^ Q^ Q^p
 )h )h )h )h )h )hr   