
     `iV/                        d Z ddlmZ ddlmZmZ ddlZddlm	Z	 ddl
mZmZmZ ddlmZmZmZmZ dd	lmZmZ dd
lmZ  e            rddlZ e            rddlZ G d de          Z G d ded          Z G d de          ZdgZdS )z
Processor class for SAM.
    )deepcopy)OptionalUnionN   )
ImageInput)ImagesKwargsProcessingKwargsProcessorMixin)
AudioInputBatchEncodingPreTokenizedInput	TextInput)is_tf_availableis_torch_available)
VideoInputc                       e Zd ZU ee         ed<   eeee                           ed<   eeee                           ed<   eeeee                                    ed<   ee         ed<   dS )SamImagesKwargssegmentation_mapsinput_pointsinput_labelsinput_boxespoint_pad_valueN)	__name__
__module____qualname__r   r   __annotations__listfloatint     z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/sam/processing_sam.pyr   r   &   s         
++++4U,----4S	?++++$tDK012222c]"""""r!   r   c                   &    e Zd ZU eed<   dddiiZdS )SamProcessorKwargsimages_kwargsr   N)r   r   r   r   r   	_defaultsr    r!   r"   r$   r$   .   s0         """"s
IIIr!   r$   F)totalc                       e Zd ZdZdgZdZ fdZ	 	 	 	 ddee         dee	e
eee
         ee         f                  dee         d	ee         d
ef
dZ	 	 	 	 	 ddZd Z	 ddedej        d
ej        fdZ	 	 	 ddZed             Zd Z xZS )SamProcessora  
    Constructs a SAM processor which wraps a SAM image processor and an 2D points & Bounding boxes processor into a
    single processor.

    [`SamProcessor`] offers all the functionalities of [`SamImageProcessor`]. See the docstring of
    [`~SamImageProcessor.__call__`] for more information.

    Args:
        image_processor (`SamImageProcessor`):
            An instance of [`SamImageProcessor`]. The image processor is a required input.
    image_processorSamImageProcessorc                 x    t                                          |           | j        j        d         | _        d S )Nlongest_edge)super__init__r+   sizetarget_size)selfr+   	__class__s     r"   r0   zSamProcessor.__init__G   s4    )))/4^Dr!   Nimagestextaudiovideoreturnc           
      2    | j         t          fdi i|}|d                             dd          }|d                             dd          }|d                             dd          }	|d                             dd          }
 | j        |fi |d         }|d         }t	          |d	          r|                                }|                     |||	
          \  }}}	|                     |||||	|d                             d          |
          }|S )z
        This method uses [`SamImageProcessor.__call__`] method to prepare image(s) for the model. It also prepares 2D
        points and bounding boxes for the model if they are provided.
        tokenizer_init_kwargsr%   r   Nr   r   r   original_sizesnumpy)r   r   r   common_kwargsreturn_tensors)r   r   r   r?   r   )	_merge_kwargsr$   popr+   hasattrr=   _check_and_preprocess_points_normalize_and_convertget)r3   r5   r6   r7   r8   kwargsoutput_kwargsr   r   r   r   encoding_image_processorr<   s                r"   __call__zSamProcessor.__call__K   sq    +*
 
"$
 
 

 %_599.$OO$_599.$OO#O488MM'8<<=NPTUU#74#7$
 $
O,$
 $
  22BC>7++ 	4+1133N262S2S%%# 3T 3
 3
/lK $(#>#>$%%#(9==>NOO+ $? $
 $
  ('r!   ptr&   c                 6    t                    t                    k    r fdD             n fdt                    D             t          fdD                       s|                     ||          \  }t	          j                  |t	          j        |          }|`t                    t          |          k    r fd|D             }n fdt          |          D             }t	          j        |          }||dk    rDt          j        |          }t          |j                  dk    r|	                    d          n|}nI|d	k    rCt          j        |          }t          |j                  dk    rt          j        |d          n|}|                    d
|i           |dk    rDt          j                  t          j                  dk    r	                    d          nnI|d	k    rCt          j                  t          j                  dk    rt          j        d          n|                    di           ||dk    rDt          j        |          }t          |j                  dk    r|	                    d          n|}nI|d	k    rCt          j        |          }t          |j                  dk    rt          j        |d          n|}|                    d|i           |S )Nc                 T    g | ]$}                     j        |d                    %S )r   _normalize_coordinatesr2   ).0pointr<   r3   s     r"   
<listcomp>z7SamProcessor._normalize_and_convert.<locals>.<listcomp>   sA          `eD//0@%XYIZ[[     r!   c                 N    g | ]!\  }}                     j        ||          "S r    rM   )rO   rP   original_sizer3   s      r"   rQ   z7SamProcessor._normalize_and_convert.<locals>.<listcomp>   sB          ,} //0@%WW     r!   c              3   D   K   | ]}|j         d          j         k    V  dS r   Nshape)rO   rP   r   s     r"   	<genexpr>z6SamProcessor._normalize_and_convert.<locals>.<genexpr>   s1      VVu{l1o&;;VVVVVVr!   c                 X    g | ]&}                     j        |d          d          'S )r   Tis_bounding_boxrM   )rO   boxr<   r3   s     r"   rQ   z7SamProcessor._normalize_and_convert.<locals>.<listcomp>   sJ        //0@#~VWGXjn/oo  r!   c                 R    g | ]#\  }}                     j        ||d           $S )TrZ   rM   )rO   r\   rS   r3   s      r"   rQ   z7SamProcessor._normalize_and_convert.<locals>.<listcomp>   sH       *] //0@#}fj/kk  r!   rJ   r      tfr      r   r   )lenzipall_pad_points_and_labelsnparraytorch
from_numpyrW   	unsqueezer_   convert_to_tensorexpand_dimsupdate)r3   rH   r<   r   r   r   r?   r   s   ` ``    r"   rD   z#SamProcessor._normalize_and_convert~   s    #>""c,&7&777         iu            03L.0Q0Q     
 VVVVVVVVV +151L1L$lO2 2.L, 8L11L#8L11L">""c+&6&666    *  
   .1+~.N.N   (;//K"%%#.{;;:=k>O:P:PTU:U:Uk33A666[f4'' 2;??@CKDU@V@VZ[@[@[bn[!<<<al$++]K,HIII#%%$/==<?@R<S<SWX<X<X|55a888^j4''!3LAABElFXBYBY]^B^B^r~lA>>>dp$++^\,JKKK#%%$/==<?@R<S<SWX<X<X|55a888^j4''!3LAABElFXBYBY]^B^B^r~lA>>>dp$++^\,JKKK''r!   c           	      r   t          d |D                       }g }t          |          D ]\  }}|j        d         |k    r\t          j        |t          j        ||j        d         z
  df          |z   gd          }t          j        ||         |g          ||<   |                    |           |}||fS )zh
        The method pads the 2D points and labels to the maximum number of points in the batch.
        c              3   0   K   | ]}|j         d          V  dS rU   rV   )rO   rP   s     r"   rX   z6SamProcessor._pad_points_and_labels.<locals>.<genexpr>   s(       J JEQ J J J J J Jr!   r      )axis)max	enumeraterW   re   concatenatezerosappend)r3   r   r   r   expected_nb_pointsprocessed_input_pointsirP   s           r"   rd   z#SamProcessor._pad_points_and_labels   s     ! J J\ J J JJJ!#!,// 	1 	1HAu{1~!333BH&85;q>&I1%MNNQ``ahi   #%)LOo=N"O"OQ"))%0000-\))r!   Fr2   coordsc                 B   |\  }}| j                             ||          \  }}t          |                              t                    }|r|                    ddd          }|d         ||z  z  |d<   |d         ||z  z  |d<   |r|                    dd          }|S )z~
        Expects a numpy array of length 2 in the final dimension. Requires the original image size in (H, W) format.
        )r.   ro   ).r   ).r^   r`   )r+   _get_preprocess_shaper   astyper   reshape)	r3   r2   ry   rS   r[   old_hold_wnew_hnew_ws	            r"   rN   z#SamProcessor._normalize_coordinates   s     %u+AA-^iAjju&!!((// 	.^^B1--F55=9v55=9v 	+^^B**Fr!   c                 r   |t          |d          r&|                                                                }t          |t                    rt          |d         t                    st          d          d |D             }nd}|t          |d          r&|                                                                }t          |t                    rt          |d         t                    st          d          d |D             }nd}|t          |d          r&|                                                                }t          |t                    r<t          |d         t                    r!t          |d         d         t                    st          d          d	 |D             }nd}|||fS )
a8  
        Check and preprocesses the 2D points, labels and bounding boxes. It checks if the input is valid and if they
        are, it converts the coordinates of the points and bounding boxes. If a user passes directly a `torch.Tensor`,
        it is converted to a `numpy.ndarray` and then to a `list`.
        Nr=   r   z7Input points must be a list of list of floating points.c                 6    g | ]}t          j        |          S r    re   rf   )rO   input_points     r"   rQ   z=SamProcessor._check_and_preprocess_points.<locals>.<listcomp>  s"    RRRkBH[11RRRr!   z-Input labels must be a list of list integers.c                 6    g | ]}t          j        |          S r    r   )rO   labels     r"   rQ   z=SamProcessor._check_and_preprocess_points.<locals>.<listcomp>  s     FFFBHUOOFFFr!   z>Input boxes must be a list of list of list of floating points.c                 p    g | ]3}t          j        |                              t           j                  4S r    )re   rf   r}   float32)rO   r\   s     r"   rQ   z=SamProcessor._check_and_preprocess_points.<locals>.<listcomp>  s0    SSS28C==//
;;SSSr!   )rB   r=   tolist
isinstancer   
ValueError)r3   r   r   r   s       r"   rC   z)SamProcessor._check_and_preprocess_points   s    #|W-- =+1133::<<lD11 \LQROUY9Z9Z \ !Z[[[RR\RRRLLL#|W-- =+1133::<<lD11 RLQROUY9Z9Z R !PQQQFFFFFLLL"{G,, ;)//1188:: {D11c!+a.$77c "+a."3T::c
 !!abbbSS{SSSKKK\;66r!   c                 B    | j         j        }t          |ddgz             S )Nr<   reshaped_input_sizes)r+   model_input_namesr   )r3   image_processor_input_namess     r"   r   zSamProcessor.model_input_names   s)    &*&:&L#/3CE[2\\]]]r!   c                 &     | j         j        |i |S )N)r+   post_process_masks)r3   argsrF   s      r"   r   zSamProcessor.post_process_masks%  s    6t#6GGGGr!   )NNNN)NNNrJ   r&   )F)NNN)r   r   r   __doc__
attributesimage_processor_classr0   r   r   r   r   r   r   r   r   r   rI   rD   rd   r   re   ndarrayrN   rC   propertyr   r   __classcell__)r4   s   @r"   r*   r*   7   s       
 
 $$J/E E E E E (,hl&*&*1( 1($1( uY(94	?DQbLccde1( 
#	1(
 
#1( 
1( 1( 1( 1(n L( L( L( L(\* * *" TY (*
	   . 	-7 -7 -7 -7^ ^ ^ X^H H H H H H Hr!   r*   )r   copyr   typingr   r   r=   re   image_utilsr   processing_utilsr   r	   r
   tokenization_utils_baser   r   r   r   utilsr   r   video_utilsr   rg   
tensorflowr_   r   r$   r*   __all__r    r!   r"   <module>r      s          " " " " " " " "     % % % % % % N N N N N N N N N N ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ 8 8 8 8 8 8 8 8 % % % % % %  LLL? # # # # #l # # #    )    oH oH oH oH oH> oH oH oHd 
r!   