
    fPiN                        d dl mZ d dlZd dlZd dlmZmZ d dlm	Z	 d dl
mZmZ d;d
Z	 	 d<d=dZ	 	 	 	 d>d?dZ	 	 	 	 	 d@dAdZdBd!ZdCdDd"ZdEd%Z	 	 	 dFdGd)ZdHd+ZdId.ZdJd1ZdKd4ZdLd:ZdS )M    )annotationsN)
AutoConfigAutoTokenizer)DynamicCache)InferenceSessionOrtValueattention_masktorch.Tensoruse_past_kvboolc                    |                                                      d          dz
  }|                    | dk    d           |r|d d df                             d          }|S )N   r   )longcumsummasked_fill_	unsqueeze)r	   r   position_idss      /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/models/llama/llama_inputs.pyget_position_idsr      sq    !&&((//33a7Ln11555 9#AAArE*44R88     ptFconfigr   devicetorch.device
batch_sizeintseq_lenenginestrreturn_dictc                   t          j        d| j        ||ft           j                  }t          j        ||t           j                  }t          |d          }|dk    r|                                n|                    |          }|dk    r|                                n|                    |          }|dk    r|                                n|                    |          }|s|||fS |||d}	|	S )Nr   lowhighsizedtyper'   Fr   ort	input_idsr	   r   )torchrandint
vocab_sizeint64onesr   numpyto)
r   r   r   r   r   r!   r,   r	   r   inputss
             r   get_sample_inputsr5       s    !&*;:wBW_d_jkkkIZ
G5;GGGN#NFFFL &,u__	!!!),,v:N:NI/5^))+++NDUDUV\D]D]N+1U??<%%'''PV@W@WL 9><88 ($ F
 Mr   r   past_seq_lenuse_fp16
world_sizec                4   t          j        d| j        |dft           j                  }t          j        ||dz   t           j                  }	t          |	d          }
t          | ||||          }|dk    r|                                n|                              }|dk    r|	                                n|	                              }	|dk    r|
                                n|
                              }
|dk    rt          |          nfd	|D             }|st          |t                    sJ ||	|
|fS ||	|
d
}|dk    r-t          |t                    sJ |                    |           nt          |t                    sJ ||d<   |S )Nr   r   r#   r(   Tr)   r8   r*   c                |    g | ]8}|d                                         |d                                        f9S r   r   r3   .0kvr   s     r   
<listcomp>z2get_sample_with_past_kv_inputs.<locals>.<listcomp>X   E    @x@x@xjl"Q%((6BRBRTVWXTYT\T\]cTdTdAe@x@x@xr   r+   past_key_values)r-   r.   r/   r0   r1   r   get_past_kv_inputsr2   r3   flatten_past_kv_inputs
isinstancelistdictupdate)r   r   r   r6   r7   r   r!   r8   r,   r	   r   past_kvr4   s    `           r   get_sample_with_past_kv_inputsrK   C   s    !&*;:q/Y^YdeeeIZ
L1,<EKPPPN#NEEEL \8XbcccG &,u__	!!!),,v:N:NI/5^))+++NDUDUV\D]D]N+1U??<%%'''PV@W@WL+1U??w'''@x@x@x@xpw@x@x@x   B'4(((((><AA ($ F
 '4(((((g'4((((($+ !Mr   max_seq_lenuse_buffer_sharec                b   t          j        d| j        ||ft           j                  }t          j        |||z   t           j                  }t          ||dk              }t          | ||||
          }|dk    r|                                n|                              }|dk    r|                                n|                              }|dk    r|                                n|                              }|dk    rt          |          nfd|D             }|	st          |t                    sJ ||||fS |||d}|dk    r@t          |t                    sJ |                    |           |rt          |||          }nt          |t                    sJ ||d	<   |S )
Nr   r#   r(   r)   r:   r*   c                |    g | ]8}|d                                         |d                                        f9S r<   r=   r>   s     r   rA   z9get_merged_sample_with_past_kv_inputs.<locals>.<listcomp>   rB   r   r+   rC   )r-   r.   r/   r0   r1   r   rD   r2   r3   rE   rF   rG   rH   rI    enable_past_present_share_buffer)r   r   r   r   r6   rL   r7   rM   r   r!   r8   r,   r	   r   rJ   r4   s    `              r   %get_merged_sample_with_past_kv_inputsrQ   y   s    !&*;:wBW_d_jkkkIZ
L7,B%+VVVN#NQRARTTTL \8XbcccG &,u__	!!!),,v:N:NI/5^))+++NDUDUV\D]D]N+1U??<%%'''PV@W@WL+1U??w'''@x@x@x@xpw@x@x@x   B'4(((((><AA ($ F
 '4(((((g 	Y5flKXXF '4((((($+ !Mr   split_kvc                h   |rt           j        nt           j        }| j        | j        z  }	|st           j                            ||| j                                      |          dt          j        t          j	        |||f          d          z                      |          t           j                            || j
        || j        |	                              |          t           j                            || j
        || j        |	                              |          t          j        |t           j                  d}
nkt           j                            ||| j                                      |          t          j        t          j	        |||ft           j                  d          dz
                      t           j                  t          j        |t           j                  d}
t          | j
                  D ]}|
                    d| dt           j                            || j        ||	                              |          d	| dt           j                            || j        ||	                              |          i           |rt!          |
||          }
|
S )
Ng     r   )kr(   )x	attn_maskk_cachev_cachepos)rU   rV   rY   k__cachev_)npfloat16float32hidden_sizenum_attention_headsrandomrandastypetriur1   num_hidden_layersarrayr0   int32rangerI   rP   )r   r   r6   r   rL   r7   rM   rR   np_dtype	head_size
ort_inputsis               r   get_msft_sample_inputsrn      st    &5rzz2:H"f&@@I !a
GV5GHHOOPXYY"RWRWj+{5[-\-\`a%b%b%bbjjkstty~~F4lFD^`i fXy~~F4lFD^`i fX8L999

 



 
GV5GHHOOPXYY'"':{K*PXZX`"a"a"aefgggjkkss  8L999
 

 v/00 
	 
	A"NNNBINN"F$>i% %fX&&"NNNBINN"F$>i% %fX&&	 	 	 	  	a9*lT_``Jr   c                    | j         |z  t          | d          r| j        n| j        | j        z  |rt
          j        nt
          j        fdt          | j	                  D             }|S )Nhead_dimc           
     t    g | ]4}t          j                   t          j                   f5S )r(   )r-   rc   )r?   _r   rk   	num_headsr6   torch_dtypes     r   rA   z&get_past_kv_inputs.<locals>.<listcomp>   s]       
  Jz9lI[YYYJz9lI[YYY	
  r   )
num_key_value_headshasattrrp   r`   ra   r-   r^   r_   ri   rf   )	r   r   r6   r7   r8   rJ   rk   rs   rt   s	    ``   @@@r   rD   rD      s    *j8I#*6:#>#>tFDVZ`ZtDtI#+>%--K       
 v/00  G Nr   rC   'list[tuple[torch.Tensor, torch.Tensor]]c                ^   i }t          |           D ]\  }\  }}t          | t                    r}|                                                                                                |d| <   |                                                                                                |d| <   |                                                                                                |d| d<   |                                                                                                |d| d<   |S )Npast_key_values_key_cache_past_key_values_value_cache_past_key_values..key.value)	enumeraterF   r   detachcpur2   )rC   rJ   rm   past_kpast_vs        r   rE   rE      s   G(99 R RFFo|44 	R8>8K8K8M8M8S8S8U8UG4445:@--//:M:M:O:O:U:U:W:WG61667728--//2E2E2G2G2M2M2O2OG.q.../4:MMOO4G4G4I4I4O4O4Q4QG0q00011Nr      	pt_inputsrH   c                l   i }|                                  D ]\  }}t          |t          j                  r|||<   %|dk    r#|                    t          |                     N|                                                                                                ||<   |rt          |||          }|S )NrC   )
itemsrF   r]   ndarrayrI   rE   r   r   r2   rP   )r   rM   r6   rL   rl   rT   vs          r   convert_inputs_for_ortr      s     J!! 5 51a$$ 	5JqMM###4Q778888HHJJNN,,2244JqMM  ]5j,P[\\
r   rl   c                    |                                  D ]N\  }}d|v sd|v rA|j        \  }}}}t          j        ||||f|j                  }	||	d |d |d |d |f<   |	| |<   O| S )NcacherC   r(   )r   shaper]   zerosr'   )
rl   r6   rL   rT   r   r   rs   rr   rk   new_vs
             r   rP   rP     s      "" " "1a<<,1123'/J	1iHj)[)LTUT[\\\EHIE+:+z	z=L=*9*DE!JqMr   modelr   c                &   d |                                  D             }t          |                                          }||z
  }t          |          r!t	          d|            t          d          ||z
  }t          |          r|D ]}||= |S )Nc                    h | ]	}|j         
S  name)r?   model_inputs     r   	<setcomp>z$verify_ort_inputs.<locals>.<setcomp>%  s    KKKK$KKKr   z(The following model inputs are missing: zEThere are missing inputs to the model. Please add them and try again.)
get_inputssetkeyslenprint	Exception)r   rl   model_inputsuser_inputsmissing_inputsunnecessary_inputsunnecessary_inputs          r   verify_ort_inputsr   #  s    KK8H8H8J8JKKKLjoo''((K!K/N
> aIIIJJJ_``` %|3
 .!3 	. 	.,--r   	device_idkv_cache_ortvaluesc                   |                                  }d |                                 D             }|                                D ]\  }}	||vr
|rwd|v sd|v ro||vr3t          j        |	||          }
|                    ||
           |
||<   K||                             |	           |                    |||                    t          j        |	||          }
|                    ||
           |                                 D ]r}|j        }|rOd|v sd|v rG|	                    dd          	                    dd          }|
                    |||                    Z|                    |||           s||fS )Nc                    h | ]	}|j         
S r   r   )r?   rm   s     r   r   z/add_io_bindings_as_ortvalues.<locals>.<setcomp>A  s    777qAF777r   r   rC   )device_typer   outpresent)
io_bindingr   r   r   ortvalue_from_numpybind_ortvalue_inputupdate_inplaceget_outputsr   replacebind_ortvalue_outputbind_output)r   rl   r   r   rM   r   r   r   rT   r   v_deviceoutputr   
input_names                 r   add_io_bindings_as_ortvaluesr   7  s    !!##J77E$4$4$6$6777L  "" 8 81 L    
	8A1Ba1G1G***#7vYbccc..q(;;;(0"1%%"1%44Q777..q2DQ2GHHHH3A6U^___H**1h7777##%% R R{ 	R$)t2C2CeW55==iIZ[[J++D2DZ2PQQQQ""4Vy"QQQQ)))r   r4   outputsc           
     X   t          | |          }d }t          j        t          j        t          j        t          j        d}|                                 }|                                D ]\  }}	|                    ||	j	        j
        |	j	        j
        dk    rdn|	j	        j        |t          |	j                           t          |	j                  |	                                           |	j	        }|                                 D ]}
|
j        }|r d|v r||                    dd                   n||         }	|                    ||j
        |j
        dk    rdn|j        |rt          j        nt          j        t          |	j                  |	                                           |S )N)ztorch.int32ztorch.int64ztorch.float16ztorch.float32r   r   )r   r   r   element_typer   
buffer_ptrr   rC   )r   r]   rh   r0   r^   r_   r   r   
bind_inputr   typeindexreprr'   tupler   data_ptrr   r   r   r   )r   r4   r   r7   rM   r   pt_to_npr   rT   r   r   r   s               r   add_io_bindings_as_tensorsr   d  s    uf--FFxx	 H !!##J 	 	18=E11aaqx~!$qw--0..zz|| 	 	
 	
 	
 ##%% 
 
{  $-$5$5 4<<	+<==>> 	

 	!;%//aaV\(0@"**bj..zz|| 	 	
 	
 	
 	
 r   	tokenizerr   requested_lengthprompt	list[str]c           	     |   |j         |_        |                    |d          }|rt          j        nt          j        }	t          j        |d         |t          j                  }
t          j        |d         |t          j                  }t          |d          }|
j	        d         }||k    r.|
d d d |f         }
|d d d |f         }t          |d          }n||k     r|
d d d	f         
                    d	          j        }|d d d	f         
                    d	          j        }t          ||z
            D ].}t          j        ||
f          }
t          j        ||f          }/t          |d          }|
j	        d         }||k    sJ |d
k    r|
                                n|
|d
k    r|                                n||d
k    r|                                n|d}|d
k    rg |d<   |
j	        \  }}| j        }| j        }t#          | d          r| j        n| j        | j        z  }t          | j                  D ]}t          j        |||r|nd	|||	          }t          j        |||r|nd	|||	          }|d
k    rF|                    d| d|                                d| d|                                i           |d                             ||f           d }|d
k    rt          j        ||| j        ||	          }d|                                i}|st          | j                  D ]{}t          j        ||||||	          }t          j        ||||||	          }|                    d| d|                                d| d|                                i           |||fS )NT)paddingr,   )r   r'   r	   Fr)   r   r   r*   r+   rC   rp   r{   r|   r}   logitszpresent.)	eos_token	pad_tokenbatch_encode_plusr-   r^   r_   tensorr0   r   r   r   Tri   hstack
contiguousmax_position_embeddingsru   rv   rp   r`   ra   rf   r   rI   appendr/   )r   r   r   r   r   r7   rM   r   encodings_dictrt   r,   r	   r   tokenized_lengthinput_ids_first_colattention_mask_first_colrr   r4   r   sequence_lengthmax_sequence_lengthrs   rk   rm   past_key
past_valuer   r   present_keypresent_values                                 r   get_initial_inputs_and_outputsr     s    $-I000FFN#+>%--K
 ^K8u{[[[I\.1A"B6Y^YdeeeN#NFFFL !r****aaa!2"2!223	'+<,<+<(<='EJJJ	,	,	,'1o77::<#1!!!Q$#7#A#A!#D#D#F '*::;; 	V 	VA&99%EFFI"\+C^*TUUNN'EJJJ r*///// 06Y))+++I9?5.33555n5;u__//111, F
 $& ! #,/J 8*I#*6:#>#>tFDVZ`ZtDtI 6+,, E E;#3:
 
 
 [#3:
 
 

 U??MM.q...0C0C0E0E0q000*2G2G2I2I    $%,,h
-CDDDDGZ&:KTZbmnnnV..001 
	6344 	 	#k	?If\g   !&	?If\g! ! ! '''')?)?)A)ACWaCWCWCWYfYqYqYsYst    7?r   )r	   r
   r   r   )r   F)r   r   r   r   r   r   r   r   r   r    r!   r   )Fr   Fr   )r   r   r   r   r   r   r6   r   r7   r   r   r    r!   r   r8   r   )FFr   Fr   )r   r   r   r   r   r   r   r   r6   r   rL   r   r7   r   rM   r   r   r    r!   r   r8   r   )r   r   r   r   r6   r   r   r   rL   r   r7   r   rM   r   rR   r   )r   )
r   r   r   r   r6   r   r7   r   r8   r   )rC   rw   )Fr   r   )r   rH   rM   r   r6   r   rL   r   )rl   rH   r6   r   rL   r   )r   r   rl   rH   )r   r   rl   rH   r   r    r   r   rM   r   r   rH   )
r   r   r4   rH   r   rH   r7   r   rM   r   )r   r   r   r   r   r   r   r   r   r   r7   r   rM   r   r   r    )
__future__r   r2   r]   r-   transformersr   r   transformers.cache_utilsr   onnxruntimer   r   r   r5   rK   rQ   rn   rD   rE   r   rP   r   r   r   r   r   r   r   <module>r      s   # " " " " "      2 2 2 2 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 2 2   (     P ) ) ) ) )z "0 0 0 0 0h0 0 0 0j    	 	 	 	 #	    0	 	 	 	   ((* (* (* (*Z, , , ,`d d d d d dr   