
    fPiE*              	           d dl Z d dlZd dlZd dlmc mZ d dlmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ  e j        e          Z G d d	ej                  Z	 	 ddedededefdZ	 ddedefdZdS )    N)SAM2ImageEncoderrandom_sam2_input_image)SAM2MaskDecoder)SAM2PromptEncoder)SAM2Base)compare_tensors_with_tolerance)nnc                        e Zd Z	 	 	 ddededededed	d
f fdZ ej                    	 ddej	        dej	        dej	        dej	        dej	        dej	        dej	        dej	        defd            Z
 xZS )SAM2ImageDecoderTF        	sam_modelmultimask_outputdynamic_multimask_via_stabilityreturn_logitsmask_thresholdreturnNc                     t                                                       t          |          | _        t	          |||          | _        || _        || _        d S )N)super__init__r   prompt_encoderr   mask_decoderr   r   )selfr   r   r   r   r   	__class__s         /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/models/sam2/image_decoder.pyr   zSAM2ImageDecoder.__init__   sW     	/	::+I7GIhii*,    image_features_0image_features_1image_embeddingspoint_coordspoint_labelsinput_maskshas_input_masksoriginal_image_sizeenable_nvtx_profilec
                    d}
|	rddl m}  |g d          }
|
|
                    dd           |                     ||||          \  }}}|
,|
                    d           |
                    dd	           |                     ||||||          \  }}|
,|
                    d           |
                    d
d           t          j        ||d         |d         fdd          }t          j	        |dd          }| j
        s|| j        k    }|
)|
                    d
           |
                                 |||fS )a  
        Decode masks from image features and prompts. Batched images are not supported. H=W=1024.

        Args:
            image_features_0 (torch.Tensor): [1, 32, H/4, W/4]. high resolution features of level 0 from image encoder.
            image_features_1 (torch.Tensor): [1, 64, H/8, W/8]. high resolution features of level 1 from image encoder.
            image_embeddings (torch.Tensor): [1, 256, H/16, W/16]. image embedding from image encoder.
            point_coords (torch.Tensor): [L, P, 2] shape and float32 dtype and contains the absolute pixel
                                         coordinate in (x, y) format of the P input points in image of size 1024x1024.
            point_labels (torch.Tensor): shape [L, P] and int32 dtype, where 1 means
                                         positive (foreground), 0 means negative (background), -1 means padding,
                                         2 (box left upper corner), 3 (box right bottom corner).
            input_masks (torch.Tensor): [L, 1, H/4, W/4]. Low resolution mask input to the model.
                                        Typically coming from a previous iteration.
            has_input_masks (torch.Tensor): [L]. 1.0 if input_masks is used, 0.0 otherwise.
            original_image_size(torch.Tensor): [2]. original image size H_o, W_o.
            enable_nvtx_profile (bool): enable NVTX profiling.

        Returns:
            masks (torch.Tensor): [1, M, H_o, W_o] where M=3 or 1. Masks of original image size.
            iou_predictions (torch.Tensor): [1, M]. scores for M masks.
            low_res_masks (torch.Tensor, optional): [1, M, H/4, W/4]. low resolution masks.
        Nr   )
NvtxHelper)r   r   post_processr   blue)colorr   redr'   green   bilinearF)modealign_cornersg      @g      @@)nvtx_helperr&   start_profiler   stop_profiler   Finterpolatetorchclampr   r   print_latency)r   r   r   r   r   r    r!   r"   r#   r$   r0   r&   sparse_embeddingsdense_embeddingsimage_pelow_res_masksiou_predictionsmaskss                     r   forwardzSAM2ImageDecoder.forward#   s   H  	Y......$*%W%W%WXXK"%%&6f%EEE8<8K8K,_9
 9
5+X "$$%5666%%nE%BBB)-):):.0@(L]_o*
 *
& "$$^444%%nG%DDD  #%8%;<	
 
 
 M5$??! 	0D//E"$$^444%%'''o}44r   )TFr   F)__name__
__module____qualname__r   boolfloatr   r5   no_gradTensorr>   __classcell__)r   s   @r   r   r      s3       
 15# #- -- - *.	-
 - - 
- - - - - - U]__ %*L5 L5,L5  ,L5  ,	L5
 lL5 lL5 \L5 L5 #\L5 "L5 L5 L5 _L5 L5 L5 L5 L5r   r   F
sam2_modelonnx_model_pathr   verbosec                    d}t          |          }t          |                                           } ||          \  }}}	t                              d|j                   t                              d|j                   t                              d|	j                   t          | |d                                          }
d}d}t          j        d	d
||dft          j	                  }t          j        d	d||ft          j
                  }t          j        |dddt          j	                  }t          j        dt          j	                  }t          j        ddgt          j
                  }|||	|||||f}t                              d|j                   t                              d|j                   t                              d|j                   t                              d|j                   t                              d|j                   |ri |
| \  }}}t                              d|j                   t                              d|j                   t                              d|j                   g d}g d}ddddddd	did	diddddd	did	did }t          j                    5  |s@t          j        d!t          j        j        "           t          j        d!t&          "           t          j                            |
||dd#d|||$	  	         d d d            n# 1 swxY w Y   t                              d%|           d S )&Nr,   zimage_features_0.shape: %szimage_features_1.shape: %szimage_embeddings.shape: %sTr   r         r      lowhighsizedtype   rT   i  i  zpoint_coords.shape: %szpoint_labels.shape: %szinput_masks.shape: %szhas_input_masks.shape: %szoriginal_image_size.shape: %szmasks.shape: %sziou_predictions.shape: %szlow_res_masks.shape: %s)r   r   r   r   r    r!   r"   r#   )r=   r<   r;   
num_labels
num_points)r   r,   original_image_heightoriginal_image_width)r   rM   rN   )r   r    r!   r"   r=   r;   r<   ignore)category   )export_paramsopset_versiondo_constant_foldinginput_namesoutput_namesdynamic_axeszdecoder onnx model saved to %s)r   r   cpuloggerinfoshaper   r5   randintrD   int32zerosonestensorwarningscatch_warningsfilterwarningsjitTracerWarningUserWarningonnxexport)rH   rI   r   rJ   
batch_sizeimagesam2_encoderr   r   r   sam2_decoderrW   rX   r   r    r!   r"   r#   example_inputsr=   r<   r;   ra   rb   rc   s                            r   export_decoder_onnxrz   s   s    J#J//E#J//3355L;G<;N;N8&(8
KK,.>.DEEE
KK,.>.DEEE
KK,.>.DEEE#)(,   
cee	  JJ=QTZQR8S[`[fgggL=QQj*5MUZU`aaaL+j!S#U[IIIKj%+666O,d|5;GGG 		N KK(,*<===
KK(,*<===
KK'):;;;
KK+_-BCCC
KK/1D1JKKK D0<n0M-%u{333/1FGGG-}/BCCC	 	 	K A@@L )\::(\::<(|,!&=BXYY\*|, L 
	 	"	" 
 
 	D#Huy7NOOOO#H{CCCC
 $#%% 	 
	
 
	
 
	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
" KK0/BBBBBs   A+L;;L?L?c                 X   ! d}t          |          }t          |                                           } ||          \  }}}t          | |d                                          }	d}
d}t	          j        dd|
|dft          j                  }t	          j        dd|
|ft          j                  }t	          j        |
dd	d	t          j        
          }t	          j        dt          j        
          }t	          j	        ddgt          j        
          }||||||||f |	 \  }}}dd l
}|                    |dg          }|                                  fdt          t                               D             }t                              d|           |                                !!fdt          t          !                    D             }t                              d|            fdt          t                               D             }|                    ||          }t'          |          D ].\  }}t                              | d||         j                   /|\  }}}t+          d|                                t	          j	        |                                                    rXt+          d|t	          j	        |                    r5t+          d|t	          j	        |                    rt-          d|           d S t-          d|           d S )Nr,   TrL      r   rO   rM   rP   rU   rV   i  CPUExecutionProvider)	providersc                 *    g | ]}|         j         S  name).0imodel_inputss     r   
<listcomp>z%test_decoder_onnx.<locals>.<listcomp>   s     JJJA<?'JJJr   zinput_names: %sc                 *    g | ]}|         j         S r   r   )r   r   model_outputss     r   r   z%test_decoder_onnx.<locals>.<listcomp>   s!    MMMaM!$)MMMr   zoutput_names: %sc                 \    i | ](}|         j         |                                         )S r   )r   numpy)r   r   ry   r   s     r   
<dictcomp>z%test_decoder_onnx.<locals>.<dictcomp>  s3    ```!l1o"N1$5$;$;$=$=```r   z
.shape: %sr=   r<   r;   zonnx model has been verified:zonnx model verification failed:)r   r   rd   r   r5   rh   rD   ri   rj   rl   onnxruntimeInferenceSession
get_inputsrangelenre   rf   get_outputsrun	enumeraterg   r   print)"rH   rI   r   ru   rv   rw   r   r   r   sam2_image_decoderrW   rX   r   r    r!   r"   r#   r=   r<   r;   r   ort_sessionra   rb   inputsoutputsr   output_name	ort_masksort_iou_predictionsort_low_res_masksry   r   r   s"                                  @@@r   test_decoder_onnxr      sI   
 J#J//E#J//3355L;G<;N;N8&(8))(,   
cee	  JJ=QTZQR8S[`[fgggL=QQj*5MUZU`aaaL+j!S#U[IIIKk!5;777O,d|5;GGG 		N -?,>,O)E?M..KaJb.ccK))++LJJJJs<7H7H1I1IJJJK
KK!;///++--MMMMM5]9K9K3L3LMMML
KK"L111`````uSQ]M^M^G_G_```FoolF33G#L11 B B;{...
0@AAAA8?5I"$5&wu|I?V?V?\?\?^?^__B*+<ou|\oOpOpqqB +?M5<XiKjKjkkB
 	-?????/AAAAAr   )FFr?   )loggingrm   r5   torch.nn.functionalr	   
functionalr3   image_encoderr   r   r   r   r   r   sam2.modeling.sam2_baser   
sam2_utilsr   	getLoggerr@   re   Moduler   strrC   rz   r   r   r   r   <module>r      s  
             C C C C C C C C ( ( ( ( ( ( , , , , , , , , , , , , 5 5 5 5 5 5      		8	$	$\5 \5 \5 \5 \5ry \5 \5 \5D #	\C \C\C\C \C 	\C \C \C \CD >B >B>B>B >B >B >B >B >Br   