
    fPiS                        d dl Z d dlZd dlZd dlmZ d dlmZ d dlZd dl	Z	d dl
Z
d dlmZ d dlmZ d dl	mZmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZmZ d dlmZ  e j        e          Z  G d de
j!        j"                  Z#dS )    N)chain)Path)convert_float_to_float16)RepeatedCompositeFieldContainer)
ModelProtoValueInfoProto)	OnnxModel)PastKeyValuesHelper)WhisperConfig)convert_inputs_for_ortget_model_dynamic_axesget_sample_decoder_inputsgroup_past_key_values)InferenceSessionc                       e Zd ZdZd'dedej        j        dede	f fdZ
	 	 d(d	ej        d
ej        dz  deeej                          dz  fdZ	 	 d(d	ej        d
ej        dz  deeej                          dz  fdZ	 	 d(d	ej        d
ej        dz  deeej                          dz  fdZd Zd Zd Zd'de	de	de	fdZd)dede	de	fdZd'dede	fdZdefdZdede	fdZ	 	 	 	 	 	 d*ded ed!e	d"e	de	de	d#e	d$e	fd%Zded ede	de	fd&Z xZS )+WhisperDecoderz/A Whisper decoder with optional past key valuesFconfigmodel
model_implno_beam_search_opc                 x   t                                                       || _        |j        | _        || _        || _        |dk    rd n|j        j        | _        |dk    rd n|j        | _        |dk    r|nd | _        | j        j	        | _	        | j        j
        | _        | j        j        | j        z  | _        d S Nopenai)super__init__r   devicer   r   r   decoderproj_outmax_source_positionsdecoder_attention_heads	num_headsd_model	head_size)selfr   r   r   r   	__class__s        /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/models/whisper/whisper_decoder.pyr   zWhisperDecoder.__init__%   s    l$!2)X55tt5;;N *h 6 6EN(H44UU$
$(K$D!<,>    Ndecoder_input_idsencoder_hidden_statespast_key_valuesc                     |                      |||d          }|                     |j                  }|j        }|||fS t	          j        |          \  }}||fS )NT)r)   	input_idsr*   	use_cache)r   r   last_hidden_stater*   r
   group_by_self_and_cross)	r$   r(   r)   r*   outputslogitspresent_key_valuespresent_selfpresent_crosss	            r&   
hf_forwardzWhisperDecoder.hf_forward4   s~     ,,"7'+	  
 
 w899$4"--- ':&QRd&e&e#m |##r'   c                     i }|t          |          \  }}d |D             }d |D             }d |D             }d |D             }t           j        j        j                  D ]k\  }}|d|z           ||j        j        <   |d|z  dz            ||j        j        <   |d|z           ||j        j        <   |d|z  dz            ||j        j        <   l j        	                                \  }	}
 j                            |||          }| j        j        j        D ]}t          j        ||j        j                 |	|j        j                 gd                                          |	|j        j        <   t          j        ||j        j                 |	|j        j                 gd                                          |	|j        j        <   g g }} j        j        j        D ]}|                    |	|j        j                            |                    |	|j        j                            |J|                    |	|j        j                            |                    |	|j        j                             fd	|D             } fd
|D             }|
D ]}|                                 |,t          j        ||z   t#          |          dz            }||fS ||fS )Nc                 :    g | ]}|                     d d          S       	transpose.0past_kvs     r&   
<listcomp>z.WhisperDecoder.oai_forward.<locals>.<listcomp>Z   s(    "^"^"^w7#4#4Q#:#:"^"^"^r'   c                 \    g | ])}|                     g |j        d d         dR           *S Nr:   reshapeshaper=   s     r&   r@   z.WhisperDecoder.oai_forward.<locals>.<listcomp>[   s>    "p"p"pQX7??3KW]2A25F3K3K3K#L#L"p"p"pr'   c                 :    g | ]}|                     d d          S r8   r;   r=   s     r&   r@   z.WhisperDecoder.oai_forward.<locals>.<listcomp>\   s(    #`#`#`G$5$5a$;$;#`#`#`r'   c                 \    g | ])}|                     g |j        d d         dR           *S rB   rD   r=   s     r&   r@   z.WhisperDecoder.oai_forward.<locals>.<listcomp>]   s>    #r#r#rRYGOO4LgmBQB6G4L4L4L$M$M#r#r#rr'   r:   r9   )xxakv_cache)dimc                     g | ]D}|                     g |j        d d         dj        R                               dd          ES Nr:   rC   r9   rE   rF   r#   r<   r>   
present_kvr$   s     r&   r@   z.WhisperDecoder.oai_forward.<locals>.<listcomp>   sj     
 
 
 J!1"1"!5JrJ4>JJKKUUVWYZ[[
 
 
r'   c                     g | ]D}|                     g |j        d d         dj        R                               dd          ES rN   rO   rP   s     r&   r@   z.WhisperDecoder.oai_forward.<locals>.<listcomp>   sj     
 
 
 J!1"1"!5JrJ4>JJKKUUVWYZ[[
 
 
r'   )r   	enumerater   r   blocksattnkeyvalue
cross_attninstall_kv_cache_hookstorchcatdetachappendremover
   group_by_layerlen)r$   r(   r)   r*   past_kv_cacheself_attn_kv_cachescross_attn_kv_cachesidxblockrK   hooksr1   r3   r4   hookr2   s   `               r&   oai_forwardzWhisperDecoder.oai_forwardP   s\    &8Mo8^8^5!5"^"^J]"^"^"^"p"p\o"p"p"p#`#`K_#`#`#` #r#r]q#r#r#r '
(:(ABB Z Z
U0CAG0Lejn-2Ea#gPQk2Rej./6J1s76Se.238LQQTWWX[8Ye.455 *;;==% ##&7<Q\i#jj &+2  +09"5:>2HUZ^4LMST, , ,&(( ( .3Y"5:#34huz?O6PQWX. . .&(( )** ')"mZ'. 	G 	GE 8999)9 :;;;&$$Xe.>.B%CDDD$$Xe.>.D%EFFF
 
 
 
*
 
 

 
 
 
+
 
 
  	 	DKKMMMM"!4!C},c,.?.?1.D" " --- |##r'   c                 t    | j         dk    r|                     |||          S |                     |||          S r   )r   rh   r5   )r$   r(   r)   r*   s       r&   forwardzWhisperDecoder.forward   sC     ?h&&##$57Lo^^^02GYYYr'   c           	          | j         rddg}nFddgt          t          j        d t	          | j        j                  D                                 }|S )Nr,   r)   c              3   :   K   | ]}d | d| d| d| fV  dS )past_key_self_past_value_self_past_key_cross_past_value_cross_N r>   is     r&   	<genexpr>z-WhisperDecoder.input_names.<locals>.<genexpr>   sd       ( ( .!--/E!/E/EG\YZG\G\^urs^u^uv( ( ( ( ( (r'   
first_passlistr   from_iterableranger   decoder_layers)r$   input_namess     r&   r{   zWhisperDecoder.input_names   s}    ? 	&(?@KK '	 ' ( (!&t{'A!B!B( ( (   	K r'   c           	      *   | j         rFdgt          t          j        d t	          | j        j                  D                                 }nEdgt          t          j        d t	          | j        j                  D                                 }|S )Nr1   c              3   :   K   | ]}d | d| d| d| fV  dS )present_key_self_present_value_self_present_key_cross_present_value_cross_Nrq   rr   s     r&   rt   z.WhisperDecoder.output_names.<locals>.<genexpr>   sf       ( (  4335!554446166	( ( ( ( ( (r'   c              3   *   K   | ]}d | d| fV  dS )r~   r   Nrq   rr   s     r&   rt   z.WhisperDecoder.output_names.<locals>.<genexpr>   sJ       ( (QR0Q002K2K2KL( ( ( ( ( (r'   ru   )r$   output_namess     r&   r   zWhisperDecoder.output_names   s    ? 	' ( ( "'t{'A!B!B( ( (  
 
LL  ' ( (V[\`\g\vVwVw( ( (   L r'   c                 Z    t          | j        ||          }d|v r| j        s	|d         d= |S )Nr,   r9   )r   r   r   )r$   r{   r   dynamic_axess       r&   r   zWhisperDecoder.dynamic_axes   s;    -dk;UU,&&t/E&[)!,r'   use_fp16_inputsuse_int32_inputsreturn_dictc           	          t          | j        | j        d| j        rdnd| j        rdnd||          }|r| j        r|d= |S | j        r|d         |d         fS |d         |d         |d         fS )	Nr:   r      r9   )
batch_sizepast_sequence_lengthsequence_lengthuse_fp16	use_int32r*   r(   r)   )r   r   r   rv   )r$   r   r   r   inputss        r&   r   zWhisperDecoder.inputs   s    *KK'+"=!!A"&/8QQq$&
 
 
  	 .,-M? 	*+./ 
 &'*+$%
 	
r'   iois_cross	is_outputc                    |j         j        j        j        d         }d|j        v r |                                 | j        |_        |j         j        j        j        d         }d|j        v r.|                                 |r| j        |_        n|rdnd|_        |j         j        j        j        d         }d|j        v r |                                 | j	        |_        |S )Nr9   _dim_r:   total_sequence_lengthr      )
typetensor_typerF   rL   	dim_paramClearr!   	dim_valuer   r#   )r$   r   r   r   r!   r   r#   s          r&   fix_key_value_cache_dimsz'WhisperDecoder.fix_key_value_cache_dims   s     G'-1!4	i)))OO"&.I'-37:o///!!### m,0,E))GP,l,C,CVl)G'-1!4	i)))OO"&.I	r'   io_listc                    g }g }g }|D ]}d|j         vrd|j         vr|                    |           *d|j         v rK|                     |d|          }| j        r|                    |           h|                    |           ~|                     |d|          }| j        r|                    |           |                    |           | j        s|||z   z  }|S )Npastpresentr$   F)r   r   T)namer]   r   r   )r$   r   r   reordered_iorb   rc   r   new_ios           r&   fix_iozWhisperDecoder.fix_io  s,    ! 	8 	8BRW$$"')A)A##B''''27""66rEU^6__) 7 ''////'..v6666 66rDT]6^^) 8 ''////(//7777% 	G/2FFFLr'   c                 f   |                      |j        j        d          }t          |j        j                  dk    r;|j        j                                         t          |j        j                  dk    ;|j        j                            |           |                      |j        j        d          }t          |j        j                  dk    r;|j        j                                         t          |j        j                  dk    ;|j        j                            |           |S )NF)r   r   T)r   graphinputr`   popextendoutput)r$   r   reordered_inputsreordered_outputss       r&   fix_inputs_and_outputsz%WhisperDecoder.fix_inputs_and_outputs0  s     ;;u{'8E;JJ%+#$$q((K!!### %+#$$q((  !1222 KK(:dKKK%+$%%))K""$$$ %+$%%))!!"3444r'   c                 >    | j         dk    r|rt          |          }|S r   )r   r   )r$   r   r   s      r&   fix_layernorm_weightsz$WhisperDecoder.fix_layernorm_weights>  s(    ?h&&?& -U33Er'   Tonnx_model_pathproviderverboseuse_external_data_formatuse_encoder_hidden_statesuse_kv_cache_inputsc	                    |o| | _         | o|| _        | j         s| j        s
J d            |                     ||          }	|                                 }
|                                 }|                     |
|          }t          |          j                            dd           t          j
                    5 }t          j                            |d          }t          |          j                            dd           |r|n|}t          j                            | |	|d|
||dd|
  
         t          j        ||          }|                     |          }|                     ||          }t)          j        |||d	           d
d
d
           n# 1 swxY w Y   |                     ||||           d
S )al  Export decoder to ONNX

        Args:
            onnx_model_path (str): path to save ONNX model
            provider (str): provider to use for verifying parity on ONNX model
            verbose (bool, optional): print verbose information. Defaults to True.
            use_external_data_format (bool, optional): use external data format or not. Defaults to False.
            use_fp16_inputs (bool, optional): use float16 inputs for the KV caches. Defaults to False.
            use_int32_inputs (bool, optional): use int32 inputs for the decoder_input_ids. Defaults to True.
            use_encoder_hidden_states (bool, optional): use encoder_hidden_states as model input for decoder-init/decoder-without-past models. Defaults to False.
            use_kv_cache_inputs (bool, optional): use KV caches as model inputs for decoder-with-past models. Defaults to True.
        zVOnly one of `use_encoder_hidden_states` and `use_kv_cache_inputs` can be true at once.)r   r   T)parentsexist_okzdecoder.onnx   )	argsfexport_paramsr{   r   r   opset_versiondo_constant_foldingr   )load_external_data)save_as_external_dataall_tensors_to_one_fileN)rv   
later_passr   r{   r   r   r   parentmkdirtempfileTemporaryDirectoryospathjoinrZ   onnxexport
load_modelr   r   r	   saveverify_onnx)r$   r   r   r   r   r   r   r   r   r   r{   r   r   tmp_dir_nametemp_onnx_model_pathout_pathr   s                    r&   export_onnxzWhisperDecoder.export_onnxJ  s2   H 4O<O8O 87O<O 	
$/ 	
 	
d	
 	
1 _O_``&&((((**((lCC_$**4$*GGG(** 	l#%7<<n#M#M %&&-33D43PPP/G\++_HJ"')) $(     OHAYZZZE//66E..uoFFEN&>(,	   +	 	 	 	 	 	 	 	 	 	 	 	 	 	 	8 	(OEUVVVVVs   >CFF #F c                    |                      ||d          }g }| j        r | j        d
i |}|                    |d                                                                                                                    |d         D ]R}|D ]M}	|                    |	                                                                                                           NSn | j        d
i |}|                    |d                                                                                                                    |d         D ]M}
|                    |
                                                                                                           Nt          ||g          }|                    dt          ||                    }	 t          |                                           D ]u\  }}t          j        ||         ||         z
            }t                              d| d           t                              d	t          j        |                      vdS #  Y dS xY w)aw  Verify ONNX model outputs and PyTorch model outputs match

        Args:
            onnx_model_path (str): path to save ONNX model
            provider (str): execution provider for ONNX model
            use_fp16_inputs (bool, optional): use float16 inputs for the KV caches
            use_int32_inputs (bool, optional): use int32 inputs for the decoder_input_ids
        T)r   r   r   r   r9   )	providersNz
Comparing z...z
Max diff: rq   )r   rv   rj   r]   r\   cpunumpyr   runr   rS   r   npabsloggerwarningmax)r$   r   r   r   r   r   
pt_outputsoutpresent_key_value_layerpresent_key_valuepresent_self_key_valuesessort_outputsrs   output_namediffs                   r&   r   zWhisperDecoder.verify_onnx  si   8 _O_mqrr
? 
	Q$,((((Cc!fmmoo113399;;<<<+.q6 P P')@ P P%%%&7&>&>&@&@&D&D&F&F&L&L&N&NOOOOPP $,((((Cc!fmmoo113399;;<<<*-a& Q Q&!!"8"?"?"A"A"E"E"G"G"M"M"O"OPPPP  H:FFFhht%;FD%I%IJJ	"+D,=,=,?,?"@"@ < <;vjmk!n<==<K<<<===:BF4LL::;;;;< <	DDs   BI   I%)F)NN)FF)TFFTFT)__name__
__module____qualname____doc__r   rZ   nnModulestrboolr   Tensorrw   tupler5   rh   rj   r{   r   r   r   r   r   r   r   r   r   r   r   r   __classcell__)r%   s   @r&   r   r   "   s@       99? ?} ?UX_ ?RU ?jn ? ? ? ? ? ?$ 6:<@	$ $ <$  %|d2$ eEL12T9	$ $ $ $> 6:<@	X$ X$ <X$  %|d2X$ eEL12T9	X$ X$ X$ X$z 6:<@	Z Z <Z  %|d2Z eEL12T9	Z Z Z Z     6  
 
d 
d 
QU 
 
 
 
4 > T ^b    ( = $    8J    
: 
 
 
 
 
  ). %!%*/$(OW OWOW OW 	OW
 #'OW OW OW $(OW "OW OW OW OWb55 5 	5
 5 5 5 5 5 5 5 5r'   r   )$loggingr   r   	itertoolsr   pathlibr   r   r   r   rZ   float16r   #google.protobuf.internal.containersr   r   r   
onnx_modelr	   past_helperr
   transformersr   whisper_inputsr   r   r   r   onnxruntimer   	getLoggerr   r   r   r   r   rq   r'   r&   <module>r      sz    				                    , , , , , , O O O O O O + + + + + + + +             + + + + + + & & & & & &            ) ( ( ( ( (		8	$	$n n n n nUX_ n n n n nr'   