
    fPi                          d dl Z d dlZd dlmZ d dlmZ d dlmZ  e j        e          Z	 G d dej
                  Zdedefd	Zdedefd
ZdS )    N)SAM2Base)compare_tensors_with_tolerance)nnc            	            e Zd Zdef fdZ ej                    dej        dej        dej        dej        fd            Zdej        dej        dej        fd	Z	dej        dej        dej        fd
Z
 xZS )SAM2PromptEncoder	sam_modelc                 n    t                                                       |j        | _        || _        d S )N)super__init__sam_prompt_encoderprompt_encodermodel)selfr   	__class__s     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/models/sam2/prompt_encoder.pyr   zSAM2PromptEncoder.__init__   s/    ':


    point_coordspoint_labelsinput_maskshas_input_masksc                     |                      ||          }|                     ||          }| j                                        }|||fS )aj  Encode prompts.

           Args:
            point_coords (torch.Tensor): [L, P, 2] shape and float32 dtype and contains the absolute pixel
                                         coordinate in (x, y) format of the P input points in image of size 1024x1024.
            point_labels (torch.Tensor): shape [L, P] and int32 dtype, where 1 means
                                         positive (foreground), 0 means negative (background), -1 means padding,
                                         2 (box left upper corner), 3 (box right bottom corner).
            input_masks (torch.Tensor): [L, 1, H/4, W/4]. Low resolution mask input to the model.
                                        Typically coming from a previous iteration.
            has_input_masks (torch.Tensor): [L]. 1.0 if input_masks is used, 0.0 otherwise.
        Returns:
            sparse_embeddings (torch.Tensor): [L, P+1, 256], embedding for points and boxes.
            dense_embeddings (torch.Tensor):  [L, 256, 64, 64]. embedding for input masks.
            image_pe (torch.Tensor, optional): [1, 256, 64, 64]. image positional encoding.
        )_embed_points_embed_masksr   get_dense_pe)r   r   r   r   r   sparse_embeddingsdense_embeddingsimage_pes           r   forwardzSAM2PromptEncoder.forward   sR    0 !..|\JJ,,[/JJ&3355 "2H<<r   returnc                    |dz   }t          j        |j        d         ddf|j                  }t          j        |j        d         df|j                   }t          j        ||gd          }t          j        ||gd          }|d d d d df         | j        j        z  |d d d d df<   |d d d d df         | j        j        z  |d d d d df<   | j        j	        
                    |          }|                    d                              |          }||dk    z  }|| j        j        j        |dk    z  z   }t          | j        j                  D ]#}|| j        j        |         j        ||k    z  z   }$|S )Ng      ?r         )device)dim)torchzerosshaper#   onescatr   
image_sizer   pe_layer_pe_encoding	unsqueeze	expand_asnot_a_point_embedweightrangenum_point_embeddingspoint_embeddings)r   r   r   padding_pointpadding_labelpoint_embeddingis          r   r   zSAM2PromptEncoder._embed_points3   s   #c)\%7%:Aq$A,J]^^^\%7%:A$>|GZ[[[[y,!>AFFFy,!>AFFF !-QQQ1W 5
8M MQQQ1W ,QQQ1W 5
8M MQQQ1W-6CCLQQ#--b11;;OLL)\R-?@)D,?,Q,X\hln\n,oot*?@@ 	u 	uA-0C0TUV0W0^bnrsbs0ttOOr   c                 $   | j                             |          }| j         j        j                            dddd          }t
                              d|j                   ||z  d|z
  |z  z   }t
                              d|j                   |S )Nr!   r%   zno_mask_embedding.shape: %sg      ?zmask_embedding.shape: %s)r   mask_downscalingno_mask_embedr1   reshapeloggerinfor(   )r   r   r   mask_embeddingno_mask_embeddings        r   r   zSAM2PromptEncoder._embed_masksJ   s    ,==kJJ /=DLLQPRTUWXYY13D3JKKK(>9S?=RVg<gg.0DEEEr   )__name__
__module____qualname__r   r   r&   no_gradTensorr   r   r   __classcell__)r   s   @r   r   r      s        (      
 U]__=l= l= \	=
 = = = _=:%, el W\Wc    . u| X]Xd        r   r   
sam2_modelonnx_model_pathc                    t          |                                           }d}d}t          j        dd||dft          j                  }t          j        dd||ft          j                  }t          j        |dddt          j                  }t          j        dt          j                  } |||||          \  }	}
}t          	                    d	|j
                   t          	                    d
|j
                   t          	                    d|j
                   t          	                    d|j
                   t          	                    d|	j
                   t          	                    d|
j
                   t          	                    d|j
                   t          j                            |||||f|dddg dg dddddddddidddddid	  	         t          d|           d S )Nr"      r      lowhighsizedtyper!      rP   zpoint_coords.shape: %szpoint_labels.shape: %szinput_masks.shape: %szhas_input_masks.shape: %szsparse_embeddings.shape: %szdense_embeddings.shape: %szimage_pe.shape: %sT   r   r   r   r   )r   r   r   
num_labels
num_points)r   r!   znum_points+1)r   r   r   r   r   )export_paramsopset_versiondo_constant_foldinginput_namesoutput_namesdynamic_axesz#prompt encoder onnx model saved to )r   cpur&   randintfloatint32r'   r)   r=   r>   r(   onnxexportprint)rG   rH   sam2_prompt_encoderrU   rV   r   r   r   r   r   r   r   s               r   export_prompt_encoder_onnxre   S   s    ,J77;;==JJ=QTZQR8S[`[fgggL=QQj*5MUZU`aaaL+j!S#U[IIIKj%+666O4G4GlK5 51' KK(,*<===
KK(,*<===
KK'):;;;
KK+_-BCCC
KK-/@/FGGG
KK,.>.DEEE
KK$hn555	J	|[/B VVVJJJ ,>> ,>>|,%1n!E!E!"L 1
 
    $ 

/AAAAAr   c                    t          |                                           }d}d}t          j        dd||dft          j                  }t          j        dd||ft          j                  }t          j        |dddt          j                  }t          j        dt          j                  } |||||          \  }	}
}dd l}|	                    |d	g
          }|
                                fdt          t                              D             }t                              d|           |                                fdt          t                              D             }t                              d|           |                    ||                                |                                |                                |                                d          }t%          |          D ],\  }}t                              d|||         j                   -|\  }}}t)          d|	t          j        |          d          r^t)          d|
t          j        |          d          r9t)          d|t          j        |          d          rt-          d|            d S t-          d|            d S )Nr!      r   rK   r"   rL   rQ   rR   CPUExecutionProvider)	providersc                 *    g | ]}|         j         S  name).0r8   model_inputss     r   
<listcomp>z,test_prompt_encoder_onnx.<locals>.<listcomp>   s     JJJA<?'JJJr   zinput_names: %sc                 *    g | ]}|         j         S rk   rl   )rn   r8   model_outputss     r   rp   z,test_prompt_encoder_onnx.<locals>.<listcomp>   s!    MMMaM!$)MMMr   zoutput_names: %srT   zoutput %s shape: %sr   g?)mismatch_percentage_tolerancer   r   zonnx model has been verified: z onnx model verification failed: )r   r]   r&   r^   r_   r`   randr)   onnxruntimeInferenceSession
get_inputsr2   lenr=   r>   get_outputsrunnumpy	enumerater(   r   tensorrc   )rG   rH   rd   rU   rV   r   r   r   r   r   r   r   ru   ort_sessionrZ   r[   outputsr8   output_nameort_sparse_embeddingsort_dense_embeddingsort_image_pero   rr   s                         @@r   test_prompt_encoder_onnxr      s     ,J77;;==JJ=QTZQR8S[`[fgggL=QQj*5MUZU`aaaL*ZCEKHHHKj%+666O4G4GlK5 51' ..KaJb.ccK))++LJJJJs<7H7H1I1IJJJK
KK!;///++--MMMMM5]9K9K3L3LMMML
KK"L111oo(..00(..00&,,...4466		
 	
 G $L11 J J;);
8HIIII@G=/&L.//*-		
 	
 	
D + 0%,?S2T2Ttw
 
 
D +%,|"<"<\_
 
 
D 	@@@AAAAABBBCCCCCr   )loggingr&   sam2.modeling.sam2_baser   
sam2_utilsr   r   	getLoggerrA   r=   Moduler   strre   r   rk   r   r   <module>r      s   
   , , , , , , 5 5 5 5 5 5      		8	$	$A A A A A	 A A AH,B,B,B ,B ,B ,B^;D;D;D ;D ;D ;D ;D ;Dr   