
    fPi/                         d dl Z d dlZd dlmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZmZ d dlmZmZ d d	lmZ  e j        e          Zg d
Zg dZ G d d          ZdS )    N)Path)float_to_float16_max_diff)	OnnxModel)optimize_model)	T5DecoderT5DecoderHelper)T5EncoderDecoderInitT5EncoderDecoderInitHelper)MT5ForConditionalGenerationT5ForConditionalGeneration)InferenceSession)zt5-smallzt5-basezt5-largezt5-3bzt5-11b)zgoogle/mt5-smallzgoogle/mt5-basezgoogle/mt5-largezgoogle/mt5-xlzgoogle/mt5-xxlc                      e Zd Ze	 	 d(dededededef
d            Ze	 	 	 d)ded
edej        dededede	ee
ez  f         fd            Ze	 	 	 	 d*dee
z  dej        dededededefd            Ze	 	 	 d+dedee         dz  dedefd            Ze	 	 	 	 d,dededed ed!eded"ed#ed$efd%            Zedee
z  d&edej        defd'            ZdS )-T5Helper F
output_dirmodel_name_or_pathsuffix
new_folderreturnc                 H   |}t           j                            |          rt          |          j        d         }n|                    d          d          ||z  }|r t           j                            | |          n| }t           j                            ||dz             S )a  Build onnx path

        Args:
            output_dir (str): output directory
            model_name_or_path (str): pretrained model name, or path to the model checkpoint
            suffix (str, optional): suffix like "_encoder" or "_decoder_fp16" will be appended to file name. Defaults to None.
            new_folder (bool, optional): create a new directory for the model. Defaults to False.

        Returns:
            str: path of onnx model
        /z.onnx)ospathisdirr   partssplitjoin)r   r   r   r   
model_name	directorys         /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/models/t5/t5_helper.pyget_onnx_pathzT5Helper.get_onnx_path!   s    $ (
7==+,, 	&0117;JJS!!"%%f
<FVBGLLZ888J	w||IzG';<<<    t5	cache_dirdevice
model_typestate_dict_pathencoder_decoder_initc                    |dk    rt          j        | |          }n,|dk    rt          j        | |          }nt          d          |r'|                    t          j        |                     t          |j        |j	        |j
                  }|                                                    |           t          |j        |j        |j	        |j
        d|           }|rdnd}	|	|d	|iS )
a{  Load model given a pretrained name or path, then build models for ONNX conversion.

        Args:
            model_name_or_path (str): pretrained model name or path
            cache_dir (str): cache directory
            device (torch.device): device to run the model
            model_type (str, optional): model type "t5" or "mt5"
            state_dict_path(str, optional): state dictionary path
            encoder_decoder_init (bool, optional): combine encoder and decoder kv cache initialization into one model.
        Returns:
            Dict[str, torch.nn.Module]: mapping from name to modules for ONNX conversion.
        r$   )r%   mt5z only support mode_type=t5 or mt5N)decoder_start_token_idoutput_cross_onlyr)   encoderdecoder)r   from_pretrainedr   
ValueErrorload_state_dicttorchloadr   r/   lm_headconfigevaltor	   r.   )
r   r%   r&   r'   r(   r)   modelr/   r.   encoder_names
             r!   
load_modelzT5Helper.load_model>   s   * .>?Q]fgggEE5  /?@R^ghhhEE?@@@ 	?!!%*_"="=>>>EM5=%,GG&!!!&MMML#'"66
 
 
 2FT--9gy'::r#   Tr9   onnx_model_pathverboseuse_external_data_formatuse_decoder_input_idsuse_int32_inputsc           	          t          | t                    rt          j        | ||||||           d S t	          j        | |||||           d S )N)
isinstancer	   r
   export_onnxr   )r9   r&   r<   r=   r>   r?   r@   s          r!   rC   zT5Helper.export_onnxl   s~     e122 	&2%(      '(     r#   N
onnx_modelop_block_listforce_fp16_logitsuse_symbolic_shape_inferc                    g dd |                                  D             }t                    }|                    |          }t                              d| d|            |                                 j        d         j        }d}|                                 }	||	v sJ |	|         }
d}|
j	        dk    r|
}t                              d	|
j                    d}|
j
        D ]}|                     |          }| nt          |          }t                              d
|
j         d|            |dk     }n*t                              d|
j	         d|
j                    g }g |s||s|g}|j        gdvr|                                 }d}d}|                     d          }|rd}|D ]}
|
j        vr|                     |
|	          }|                     |
|          }fd|D             }fd|D             }t'          |          t'          |          z   dk    r!                    |
j                   |dz  }d}|t'          |          |z
  }t                              d| d|            t                              d            ||d}t                              d|            |r | j        dddi| nddlm}  || j        fddi| |S )a  Convert model to mixed precision.
           It detects whether original model has fp16 precision weights, and set parameters for float16 conversion automatically.
        Args:
            onnx_model (OnnxModel): optimized ONNX model
            op_block_list (List[str], optional): operators need to run in fp32.
            force_fp16_logits (bool, optional): force logits and last MatMul node to be in float16. Defaults to False.
            use_symbolic_shape_infer (bool, optional): use symbolic shape inference to convert float to float16. Defaults to True.
        Returns:
            parameters(dict): a dictionary of parameters used in float16 conversion
        N)SimplifiedLayerNormalization SkipSimplifiedLayerNormalizationReluAddc                     h | ]	}|j         
S  op_type).0nodes     r!   	<setcomp>z0T5Helper.auto_mixed_precision.<locals>.<setcomp>   s    CCCt|CCCr#   z	fp32 op: z
 fp16 op: r   FMatMulz#Found last MatMul node for logits: z3max diff of converting weights in last MatMul node z: gư>z-Failed to find MatMul node for logits. Found z	 of node rL   Tc                 .    g | ]}|j         v s|v |S rN   rO   )rQ   childnode_block_listrE   s     r!   
<listcomp>z1T5Helper.auto_mixed_precision.<locals>.<listcomp>   s8     , , ,&+5=M;Y;Y]bfu]u]uE]u]u]ur#   c                 .    g | ]}|j         v s|v |S rN   rO   )rQ   parentrW   rE   s     r!   rX   z1T5Helper.auto_mixed_precision.<locals>.<listcomp>   s8     + + +'-FNm<[<[_eix_x_xF_x_x_xr#      z#node counter of Add operator: fp32=z fp16=znode_block_list: )keep_io_typesrE   rW   force_fp16_initializersz!auto_mixed_precision parameters: rG   )convert_float_to_float16disable_shape_inferrN   )nodesset
differenceloggerinfographoutputnameoutput_name_to_noderP   inputget_initializerr   debugwarninginput_name_to_nodesget_nodes_by_op_typeget_parentsget_childrenlenappendr^   float16r9   )rD   rE   rF   rG   op_full_setfp32_op_setfp16_op_setlogits_output_nameis_weight_fp16_precisionrh   rR   last_matmul_nodeinitializerri   max_diffr\   rm   fp32_addchanged	add_nodesparentschildrenblocked_childrenblocked_parentsfp16_add
parametersr^   rW   s    `                         @r!   auto_mixed_precisionzT5Helper.auto_mixed_precision   s   "    M DC
0@0@0B0BCCC-((!,,[99DDD{DDEEE (--//6q9> $) (<<>>!%88888"#56<8###KKIdiIIJJJK  (88??*E +
 1==HLLftyff\dffggg'/$$$NNm4<mmbfbkmmnnn( 	6/?/KUf/K/0M/45O%%","@"@"B"BHG"77>>I +% + +Dy77","8"8?R"S"S#-#:#:4AT#U#U, , , , ,/7, , ,(+ + + + +18+ + +   011C4H4HHAMM+2249===$MH&*G!  +" 9~~0HKKXhXXhXXYYY999::: +*.'?	
 

 	D
DDEEE# 	/J/\\\Q[\\\\ 988888$$  $(    r#   optimized_model_path
is_float16num_attention_headshidden_sizer   use_gpuforce_fp16_ioc	           	      
   ddl m}	 d}
|r |	d          }
| |
_        t          | d||d|
|          }|r5|rt                              ||           n|                    |           |                    ||d	           dS )
zHOptimize ONNX model with an option to convert it to use mixed precision.r   )FusionOptionsNr$   )r'   	num_headsr   	opt_leveloptimization_optionsr   )rF   )cast_input_outputT)all_tensors_to_one_file)fusion_optionsr   enable_skip_layer_normr   r   r    convert_model_float32_to_float16save_model_to_file)r<   r   r   r   r   r>   r   r   r   r   r   ms               r!   optimize_onnxzT5Helper.optimize_onnx   s     	100000# 	S#0=#6#6 >R:R 7)#!5
 
 
  	T# T--a=-QQQQ22]2SSS	13Keijjjjjr#   ort_sessionc                     t          | t                    rt          j        | |||          S t	          j        | |||          S )zQCompare the result from PyTorch and OnnxRuntime to verify the ONNX model is good.)rB   r	   r
   verify_onnxr   )r9   r   r&   r@   s       r!   r   zT5Helper.verify_onnx#  sH     e122 	h-9%fVfggg*5+vGWXXXr#   )r   F)r$   r   F)TFTF)NFT)FTFF)__name__
__module____qualname__staticmethodstrboolr"   r3   r&   dictr	   r   r;   rC   r   listr   intr   r   r   rN   r#   r!   r   r       s         	= === = 	=
 
= = = \=8 
 !%*+; +;+;+; +; 	+;
 +; #+; 
c')33	4+; +; +; \+;Z 
 ).&*!& //  	
 #'  $    \:  +/"')-	n nnCy4'n  n #'	n n n \n`  */%)#%k %k%k!%k %k !	%k
 %k #'%k #%k %k %k %k %k \%kN 
Y//
Y%
Y 
Y 	
Y 
Y 
Y \
Y 
Y 
Yr#   r   )loggingr   pathlibr   r3   rs   r   rD   r   	optimizerr   
t5_decoderr   r   t5_encoder_decoder_initr	   r
   transformersr   r   onnxruntimer   	getLoggerr   rc   PRETRAINED_T5_MODELSPRETRAINED_MT5_MODELSr   rN   r#   r!   <module>r      s:    				        - - - - - -             $ $ $ $ $ $ 1 1 1 1 1 1 1 1 T T T T T T T T P P P P P P P P ( ( ( ( ( (		8	$	$MMM    NY NY NY NY NY NY NY NY NY NYr#   