
    fPiP%                         d dl Z d dlZd dlZd dlmZ d dlmZmZ d dlmZ d dl	Z	 e j
        e          Z G d dej                  Z	 	 	 	 dded	ed
edededefdZ	 dded	efdZdS )    N)SAM2Base)compare_tensors_with_tolerancerandom_sam2_input_image)nnc            
       z     e Zd Zdeddf fdZ	 d	dej        dedeej        ej        ej        f         fdZ	 xZ
S )
SAM2ImageEncoder	sam_modelreturnNc                     t                                                       || _        |j        | _        |j        | _        d S )N)super__init__modelimage_encoderno_mem_embed)selfr	   	__class__s     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/models/sam2/image_encoder.pyr   zSAM2ImageEncoder.__init__   s<    
&4%2    Fimageenable_nvtx_profilec                 d   d}|rddl m}  |ddg          }||                    d           |                     |          }|*|                    d           |                    d           | j        j                            |d         d                   |d         d<   | j        j                            |d         d                   |d         d<   |d         | j        j	         d         }|d         | j        j	         d         }d	 |D             }d
 |D             }	|	d         | j
        z   |	d<   d t          |	ddd         |ddd         d          D             ddd         }
|)|                    d           |                                 |
d         |
d         |
d         fS )a  
        Encodes images into features.

        Only supports H=W=1024. If you want to use different image sizes like 512x512,
        see https://github.com/facebookresearch/segment-anything-2/issues/138.

        Args:
            image (torch.Tensor): images of shape [B, 3, H, W], B is batch size, H and W are height and width.
            enable_nvtx_profile (bool): enable NVTX profiling.

        Returns:
            image_features_0: image features of shape [B, 32, H/4, W/4] - high resolution features of level 0
            image_features_1: image features of shape [B, 64, H/8, W/8] - high resolution features of level 1
            image_embeddings: image features of shape [B, 256, H/16, W/16] - 16 is the backbone_stride
        Nr   )
NvtxHelperr   post_processbackbone_fpn   vision_pos_encc                 B    g | ]}|j         d          |j         d         fS ))shape.0xs     r   
<listcomp>z,SAM2ImageEncoder.forward.<locals>.<listcomp>D   s)    LLLQqwr{AGBK0LLLr   c                 b    g | ],}|                     d                               d dd          -S )   r   r   )flattenpermuter!   s     r   r$   z,SAM2ImageEncoder.forward.<locals>.<listcomp>H   s4    LLL!		!,,Q155LLLr   r   c                 \    g | ])\  }} |                     d dd          j        d dg|R  *S )r   r&   r   r   )r(   reshape)r"   feat	feat_sizes      r   r$   z,SAM2ImageEncoder.forward.<locals>.<listcomp>L   sS     
 
 
i *DLLAq!!)!R<)<<<
 
 
r   F)strictr&   )nvtx_helperr   start_profiler   stop_profiler   sam_mask_decoderconv_s0conv_s1num_feature_levelsr   zipprint_latency)r   r   r   r.   r   backbone_outfeature_mapsvision_pos_embeds
feat_sizesvision_featsfeatss              r   forwardzSAM2ImageEncoder.forward   s   (  	H......$*o~%FGGK"%%o666))%00"$$_555%%n555 +/**E*M*Ml[iNjklNm*n*n^$Q'*.**E*M*Ml[iNjklNm*n*n^$Q' $N3TZ5R4R4T4TU()9:DJ<Y;Y;[;[\LL:KLLL
 ML|LLL'+d.??R
 
#&|DDbD'9:ddd;KTY#Z#Z#Z
 
 
 $B$
 "$$^444%%'''Qxq58++r   F)__name__
__module____qualname__r   r   torchTensorbooltupler=   __classcell__)r   s   @r   r   r      s        3( 3t 3 3 3 3 3 3 %*<, <,|<, "<, 
u|U\5<7	8	<, <, <, <, <, <, <, <,r   r   F
sam2_modelonnx_model_pathdynamic_batch_axesverbosedynamoclear_dynamo_metadatac                    t                      }t          |                                           } ||          \  }}	}
t                              d|j                   t                              d|j                   t                              d|	j                   t                              d|
j                   d }|rddiddiddiddid}t          j                    5  |s@t          j        dt          j
        j        	           t          j        dt          	           |s-t          j                            |||d
dd
dgg d|	  	         nd
t          j        j        _        t          j                            ||fddt          j        j        j        ig          }t          j                            |dddgg dd
          }|                                 |                    |dz   d           dd l}ddlm} |                    |dz   d
          }|rtd|j        j        d         j        j        j        j        d         _        t?          d          D ]3}d|j        j         |         j        j        j        j        d         _        4 ||          }|!                                 |r|"                                 dd l#}|j$        %                    |          r|&                    |           |j$        %                    |dz             r|&                    |dz              |j'        (                    |d
d
d
           d d d            n# 1 swxY w Y   tS          d|           d S )Nimage.shape: %simage_features_0.shape: %simage_features_1.shape: %simage_embeddings.shape: %sr   
batch_size)r   image_features_0image_features_1image_embeddingsignore)categoryT   r   )rS   rT   rU   )export_paramsopset_versiondo_constant_foldinginput_namesoutput_namesdynamic_axesF)argsr-   dynamic_shapes )rZ   r\   r]   rK   z.dynamo.onnx)external_data)DynamoOnnxHelper)load_external_data   z.data)use_external_data_formatall_tensors_to_one_fileconvert_attributezencoder onnx model saved to)*r   r   cpuloggerinfor    warningscatch_warningsfilterwarningsrB   jitTracerWarningUserWarningonnxexport_dynamoconfigcapture_scalar_outputsDimAUTOoptimizesave+onnxruntime.transformers.dynamo_onnx_helperrc   
load_modelgraphinputtypetensor_typedim	dim_paramrangeoutput!convert_constants_to_initializersclear_metadataospathexistsremover   save_model_to_fileprint)rG   rH   rI   rJ   rK   rL   r   sam2_encoderrS   rT   rU   r^   eponnx_programrr   rc   
onnx_modelionnx_model_helperr   s                       r   export_image_encoder_onnxr   X   s    $%%E#J//3355L;G<;N;N8&(8
KK!5;///
KK,.>.DEEE
KK,.>.DEEE
KK,.>.DEEEL 
&!"L 1!"L 1!"L 1	
 
 
	 	"	" > > 	D#Huy7NOOOO#H{CCCC 9	J" $($IYYY)  
 
 
 
 ;?EM 7$$X(-. 	 %  B !:,, $IYYY -  L !!###o>eTTTKKKTTTTTT>)I^bccJ! fT`
 &q).:@DQGQq f fAYeJ$+A.3?EI!LVV 0 0 < <??AAA$ 3!00222IIIw~~o.. +		/***w~~o788 5		/G3444#66$X\pt 7   y> > > > > > > > > > > > > > >@ 

'99999s   %I.MM#&M#c                 d   t          j        |dg          }|                                fdt          t	                              D             }t
                              d|           |                                fdt          t	                              D             }t
                              d|           |rddgndg}|D ]}t          |          }t          |           
                                }	 |	|                                          \  }
}}t
                              d	|j                   t
                              d
|
j                   t
                              d|j                   t
                              d|j                   |                    |d|                                i          }t          |          D ],\  }}t
                              d|||         j                   -|\  }}}t!          d|
t#          j        |          d          rat!          d|t#          j        |          d          r<t!          d|t#          j        |          d          rt'          d| d|            t'          d| d|            d S )NCPUExecutionProvider)	providersc                 *    g | ]}|         j         S ra   name)r"   r   model_inputss     r   r$   z+test_image_encoder_onnx.<locals>.<listcomp>   s     JJJA<?'JJJr   zinput_names: %sc                 *    g | ]}|         j         S ra   r   )r"   r   model_outputss     r   r$   z+test_image_encoder_onnx.<locals>.<listcomp>   s!    MMMaM!$)MMMr   zoutput_names: %sr   r&   rN   rO   rP   rQ   r   zoutput %s shape %srS   )mismatch_percentage_tolerancerT   rU   z,onnx model has been verified for batch_size=z: z.onnx model verification failed for batch_size=)onnxruntimeInferenceSession
get_inputsr   lenrj   rk   get_outputsr   r   ri   cloner    runnumpy	enumerater   rB   tensorr   )rG   rH   rI   ort_sessionr\   r]   batch_sizesrR   r   r   rS   rT   rU   outputsr   output_nameort_image_features_0ort_image_features_1ort_image_embeddingsr   r   s                      @@r   test_image_encoder_onnxr      s   
 .KaJbcccK))++LJJJJs<7H7H1I1IJJJK
KK!;///++--MMMMM5]9K9K3L3LMMML
KK"L111.71a&&QCK! 'd 'd
'
33'
337799?K|EKKMM?Z?Z<*,<%u{33302B2HIII02B2HIII02B2HIII//,%++--0HII'55 	M 	MNA{KK,k71:;KLLLLKRH24H +" 122./	  	d /" 122./	  	d /" 122./	  	d( `````aaaab:bbQ`bbccccO'd 'dr   )FFFFr>   )loggingrl   rB   sam2.modeling.sam2_baser   
sam2_utilsr   r   r   r   	getLoggerr?   rj   Moduler   strrD   r   r   ra   r   r   <module>r      s]  
    , , , , , , N N N N N N N N          		8	$	$C, C, C, C, C,ry C, C, C,R  %"'Z: Z:Z:Z: Z: 	Z:
 Z:  Z: Z: Z: Z:@ 7d 7d7d7d 7d 7d 7d 7d 7dr   