
    fPi"              	           d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	 d dlm
Z
  e j        e          Z G d de
j                  Z	 	 dd
e	dededefdZd
e	dededefdZdS )    N)SAM2ImageEncoderrandom_sam2_input_image)SAM2PromptEncoder)SAM2Base)nnc                        e Zd Z	 ddedededdf fdZ ej                    dej        d	ej        d
ej        dej        dej        dej        fd            Z	 xZ
S )SAM2MaskDecoderT	sam_modelmultimask_outputdynamic_multimask_via_stabilityreturnNc                     t                                                       |j        | _        |j        | _        || _        || _        || _        d S )N)	super__init__sam_mask_decodermask_decodersam_prompt_encoderprompt_encodermodelr   r   )selfr
   r   r   	__class__s       /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/models/sam2/mask_decoder.pyr   zSAM2MaskDecoder.__init__   sN     	%6':
 0/N,,,    image_features_0image_features_1image_embeddingsimage_pesparse_embeddingsdense_embeddingsc           	      Z   | j                             |||||j        d         dk    ||g          \  }}}	}	| j        r#|ddddddddf         }|ddddf         }nH| j        r| j                             ||          \  }}n"|ddddddddf         }|ddddf         }||fS )a  
        Decode masks from image and prompt embeddings. Only support H=W=1024.

        Args:
            image_features_0 (torch.Tensor): [1, 32, H/4, W/4]. high resolution features of level 0 from image encoder.
            image_features_1 (torch.Tensor): [1, 64, H/8, W/8]. high resolution features of level 1 from image encoder.
            image_embeddings (torch.Tensor): [1, 256, H/16, W/16]. image embedding from image encoder.
            image_pe (torch.Tensor): [1, 256, H/16, W/16]. image positional encoding.
            sparse_embeddings (torch.Tensor): [L, P+1, 256], embedding for points and boxes.
            dense_embeddings (torch.Tensor):  [L, 256, H/16, W/16]. embedding for input masks.

        Returns:
            low_res_masks (torch.Tensor, optional): [1, M, H/4, W/4]. low resolution masks.
            iou_predictions (torch.Tensor): [1, M]. scores for M masks.
        r      )r   r   sparse_prompt_embeddingsdense_prompt_embeddingsrepeat_imagehigh_res_featuresN)r   predict_masksshaper   r    _dynamic_multimask_via_stability)
r   r   r   r   r   r   r   low_res_masksiou_predictions_s
             r   forwardzSAM2MaskDecoder.forward   s   2 04/@/N/N-%6$4*03a7/1AB 0O 0
 0
,1   	6)!!!QRRAAA+6M-aaae4OO1 		6 .2->-_-_. .*M?? *!!!QqS!!!QQQ,7M-aaa1f5Oo--r   )T)__name__
__module____qualname__r   boolr   torchno_gradTensorr,   __classcell__)r   s   @r   r	   r	      s        
 15	O OO O *.	O
 
O O O O O O U]__/.,/.  ,/.  ,	/.
 ,/. !</.  ,/. /. /. _/. /. /. /. /.r   r	   TF
sam2_modelonnx_model_pathr   r   c                    t          |                                           }t                      }t          |                                           } ||          \  }}	}
t                              d|j                   t                              d|	j                   t                              d|
j                   d}d}t          j        dd||dft          j	                  }t          j        dd	||ft          j	                  }t          j
        |d	d
d
t          j	                  }t          j        d	t          j	                  } |||||          \  }}}t                              d|j                   t                              d|j                   t                              d|j                   t          | ||          }||	|
|||f} || \  }}t                              d|j                   t                              d|j                   t          j                    5  |s@t          j        dt          j        j                   t          j        dt$                     t          j                            |||dddg dddgdddddiddiddid	  	         d d d            n# 1 swxY w Y   t+          d|           d S )Nzimage_features_0.shape: %szimage_features_1.shape: %szimage_embeddings.shape: %s      r      lowhighsizedtyper!      r?   zsparse_embeddings.shape: %szdense_embeddings.shape: %szimage_pe.shape: %szlow_res_masks.shape: %sziou_predictions.shape: %signore)categoryT   r   r   r   r   r   r   r)   r*   
num_labelsznum_points+1)r   r!   )r   r   r)   r*   )export_paramsopset_versiondo_constant_foldinginput_namesoutput_namesdynamic_axesz mask decoder onnx model saved to)r   cpur   r   loggerinfor'   r1   randintfloatzerosonesr	   warningscatch_warningsfilterwarningsjitTracerWarningUserWarningonnxexportprint)r5   r6   r   r   verbosesam2_prompt_encoderimagesam2_encoderr   r   r   rF   
num_pointspoint_coordspoint_labelsinput_maskshas_input_masksr   r   r   sam2_mask_decoderinputsr)   r*   s                           r   export_mask_decoder_onnxrh   R   s3    ,J77;;==#%%E#J//3355L;G<;N;N8&(8
KK,.>.DEEE
KK,.>.DEEE
KK,.>.DEEE JJ=QTZQR8S[`[fgggL=QQj*5MUZU`aaaL+j!S#U[IIIKj%+666O4G4GlK5 51' KK-/@/FGGG
KK,.>.DEEE
KK$hn555'
4DFeff 02BHN_aqrF%6%6%?"M?
KK)=+>???
KK+_-BCCC		 	"	" 
 
 	D#Huy7NOOOO#H{CCCC
 $   *+<=)5.%I%I%&$5"#\!2$%|#4	 ! 	 	
 	
 	
	
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8 

,o>>>>>s   6A=J??KKc           
      n   ! t          |                                           }t                      }t          |                                           } ||          \  }}}	d}
d}t	          j        dd|
|dft          j                  }t	          j        dd|
|ft          j                  }t	          j        |
dddt          j                  }t	          j        dt          j                  } |||||          \  }}}t          | ||          }|||	|||f} || \  }}dd l
}|                    |d	g
          }|                                  fdt          t                               D             }t                              d|           |                                !!fdt          t          !                    D             }t                              d|           |                    ||                                |                                |	                                |                                |                                |                                d          }t)          |          D ],\  }}t                              d|||         j                   -|\  }}t          j                            |t	          j        |          dd           t          j                            |t	          j        |          dd           t3          d|            d S )Nr!      r   r:   r8   r;   r@   rA   CPUExecutionProvider)	providersc                 *    g | ]}|         j         S  name).0imodel_inputss     r   
<listcomp>z*test_mask_decoder_onnx.<locals>.<listcomp>   s     JJJA<?'JJJr   zinput_names: %sc                 *    g | ]}|         j         S rn   ro   )rq   rr   model_outputss     r   rt   z*test_mask_decoder_onnx.<locals>.<listcomp>   s!    MMMaM!$)MMMr   zoutput_names: %srE   zoutput %s shape: %sg{Gzt?g-C6?)atolrtolzonnx model has been verified: )r   rM   r   r   r1   rP   rQ   randrS   r	   onnxruntimeInferenceSession
get_inputsrangelenrN   rO   get_outputsrunnumpy	enumerater'   testingassert_closetensorr\   )"r5   r6   r   r   r^   r_   r`   r   r   r   rF   ra   rb   rc   rd   re   r   r   r   rf   rg   r)   r*   rz   ort_sessionrJ   rK   outputsrr   output_nameort_low_res_masksort_iou_predictionsrs   rv   s"                                   @@r   test_mask_decoder_onnxr      s?    ,J77;;==#%%E#J//3355L;G<;N;N8&(8JJ=QTZQR8S[`[fgggL=QQj*5MUZU`aaaL*ZCEKHHHKj%+666O4G4GlK5 51' (
4DFeff 02BHN_aqrF%6%6%?"M?..KaJb.ccK))++LJJJJs<7H7H1I1IJJJK
KK!;///++--MMMMM5]9K9K3L3LMMML
KK"L111oo 0 6 6 8 8 0 6 6 8 8 0 6 6 8 8 ((!2!8!8!:!: 0 6 6 8 8	
 	

 
G $L11 J J;);
8HIIII-4**	M}el;L.M.MTX_cddd	M=P0Q0QX\cghhh	
<?
<
<=====r   )TF)loggingrT   r1   image_encoderr   r   r   r   sam2.modeling.sam2_baser   r   	getLoggerr-   rN   Moduler	   strr0   rh   r   rn   r   r   <module>r      s@  
    C C C C C C C C , , , , , , , , , , , ,      		8	$	$>. >. >. >. >.bi >. >. >.J -1B? B?B?B? B? &*	B? B? B? B?J9>9>9> 9> &*	9> 9> 9> 9> 9> 9>r   