
     `iY                         d Z ddlmZ ddlmZmZ ddlZddlm	Z	 ddl
mZ ddlmZ dd	lmZmZmZ dd
lmZ  ej        e          Z e            rddlZ ed           G d de                      ZdgZdS )z
Processor class for SAM2.
    deepcopy)OptionalUnionN   )
ImageInput)ProcessorMixin)BatchEncoding)
TensorTypeis_torch_availablelogging)requires)torch)backendsc                       e Zd ZdZdgZdZd&dee         def fdZ	 	 	 	 	 	 	 d'd	ee	         d
ee	         dee
eeeee                                    ej        f                  dee
eeee                           ej        f                  dee
eeee                           ej        f                  dee
eee                  ej        f                  dee
eef                  defdZ	 d(deddddfdZd)dZd*dZd+dZd Zd Z	 d*de
ej        ej        ef         dededed ee         defd!Zd,d"Z	 	 	 	 	 d-d%Z xZS ).Sam2Processora  
    Constructs a SAM2 processor which wraps a SAM2 image processor and an 2D points & Bounding boxes processor into a
    single processor.

    [`Sam2Processor`] offers all the functionalities of [`Sam2ImageProcessorFast`] and [`Sam2VideoProcessor`]. See the docstring of
    [`~Sam2ImageProcessorFast.__call__`] and [`~Sam2VideoProcessor.__call__`] for more information.

    Args:
        image_processor (`Sam2ImageProcessorFast`):
            An instance of [`Sam2ImageProcessorFast`].
        target_size (`int`, *optional*):
            The target size (target_size, target_size) to which the image will be resized.
        point_pad_value (`int`, *optional*, defaults to -10):
            The value used for padding input points.
    image_processorSam2ImageProcessorFastNtarget_sizepoint_pad_valuec                      t                      j        |fi | || _        ||n| j        j        d         | _        d S )Nheight)super__init__r   r   sizer   )selfr   r   r   kwargs	__class__s        |/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/sam2/processing_sam2.pyr   zSam2Processor.__init__:   sM    33F333.*5*A;;tG[G`aiGj    imagessegmentation_mapsinput_pointsinput_labelsinput_boxesoriginal_sizesreturn_tensorsreturnc                    | | j         |f||d|}	ne|Tt          |t          j                  r&|                                                                }t          d|i|          }	nt          d          |	d         }|Bt          |          dk    r/t          |          t          |          k    rt          d          |||| 	                    |dd	d
d          }
| 	                    |ddd          }| 	                    |dddd          }|
| 
                    |
          dd         }|| 
                    |          dd         }|| 
                    |          dd         |
|||k    rt          d          |=t          |          dk    r*t          fd|D                       rt          d          |
i|                     |
|dgz             }t          j        |t          j                  }|                     ||d           |	                    d|i           |M|                     ||          }t          j        |t          j                  }|	                    d|i           |Ot          j        |t          j                  }|                     ||d           |	                    d|i           |	S )a  
        This method uses [`Sam2ImageProcessorFast.__call__`] method to prepare image(s) for the model. It also prepares 2D
        points and bounding boxes for the model if they are provided.

        Args:
            images (`ImageInput`, *optional*):
                The image(s) to process.
            segmentation_maps (`ImageInput`, *optional*):
                The segmentation maps to process.
            input_points (`list[list[list[list[float]]]]`, `torch.Tensor`, *optional*):
                The points to add to the frame.
            input_labels (`list[list[list[int]]]`, `torch.Tensor`, *optional*):
                The labels for the points.
            input_boxes (`list[list[list[float]]]`, `torch.Tensor`, *optional*):
                The bounding boxes to add to the frame.
            original_sizes (`list[list[float]]`, `torch.Tensor`, *optional*):
                The original sizes of the images.
            return_tensors (`str` or `TensorType`, *optional*):
                The type of tensors to return.
            **kwargs:
                Additional keyword arguments to pass to the image processor.

        Returns:
            A [`BatchEncoding`] with the following fields:
            - `pixel_values` (`torch.Tensor`): The processed image(s).
            - `original_sizes` (`list[list[float]]`): The original sizes of the images.
            - `reshaped_input_sizes` (`torch.Tensor`): The reshaped input sizes of the images.
            - `labels` (`torch.Tensor`): The processed segmentation maps (if provided).
            - `input_points` (`torch.Tensor`): The processed points.
            - `input_labels` (`torch.Tensor`): The processed labels.
            - `input_boxes` (`torch.Tensor`): The processed bounding boxes.
        N)r#   r(   r'   )tensor_typez0Either images or original_sizes must be provided   z{original_sizes must be of length 1 or len(images). If you are passing a single image, you must pass a single original_size.   pointsz;[image level, object level, point level, point coordinates]   )expected_depth
input_nameexpected_formatexpected_coord_sizer   labelsz([image level, object level, point level])r0   r1   r2   boxesz)[image level, box level, box coordinates]zbInput points and labels have inconsistent dimensions. Please ensure they have the same dimensions.c              3   J   K   | ]}t          |          d          k     V  dS r,   N)len).0	img_boxesboxes_max_dimss     r    	<genexpr>z)Sam2Processor.__call__.<locals>.<genexpr>   s3      [[is9~~q(99[[[[[[r!   zInput boxes have inconsistent dimensions that would require padding, but boxes cannot be padded due to model limitations. Please ensure all images have the same number of boxes.)dtypeT)preserve_paddingr$   r%   is_bounding_boxr&   )r   
isinstancer   Tensorcputolistr
   
ValueErrorr8   _validate_single_input_get_nested_dimensionsany_pad_nested_listtensorfloat32_normalize_tensor_coordinatesupdateint64)r   r"   r#   r$   r%   r&   r'   r(   r   encoding_image_processorprocessed_pointsprocessed_labelsprocessed_boxespoints_max_dimslabels_max_dimspadded_pointsfinal_pointspadded_labelsfinal_labelsfinal_boxesr;   s                       @r    __call__zSam2Processor.__call__?   s   V ';t';("3-( ( 	( ($$ '.%,77 ?!/!3!3!5!5!<!<!>!>'46F5Wes't't't$$OPPP 22BC#n"5"5":":s>?R?RVYZ`VaVa?a?a N  
 #|'?;CZ#:: # ]$%  ;      $:: # J	  ;     #99 " K$% :  O  +"&"="=>N"O"OPRQRPR"S+"&"="=>N"O"OPRQRPR"S*!%!<!<_!M!Mbqb!Q  +0@0L"o55$|  
 *s?/C/Cq/H/H[[[[?[[[[[ $R    + $ 5 56F[\Z]H] ^ ^$|MOOO22<bf2ggg(//0NOOO+ $ 5 56F X X$|MMMM(//0NOOO*#l?%-PPP22;`d2eee(//0LMMM''r!   Fcoordsztorch.Tensorc                     |\  }}||}}t          |                                          }|r|                    ddd          }|d         ||z  z  |d<   |d         ||z  z  |d<   |r|                    dd          }|S )a  
        Expects a numpy array of length 2 in the final dimension. Requires the original image size in (H, W) format.

        Args:
            target_size (`int`):
                The target size of the image.
            coords (`torch.Tensor`):
                The coordinates to be normalized.
            original_size (`tuple`):
                The original size of the image.
            is_bounding_box (`bool`, *optional*, defaults to `False`):
                Whether the coordinates are bounding boxes.
        r/   ).r   ).r,   r-   )r   floatreshape)	r   r   r[   original_sizer@   old_hold_wnew_hnew_ws	            r    _normalize_coordinatesz$Sam2Processor._normalize_coordinates   s      %u"Ku&!!'')) 	.^^B1--F55=9v55=9v 	+^^B**Fr!   r   c                     |dS t          |t          j                  rWdz
  k    st          |j                  dk    r&|                                                                S  fd|D             S t          |t          j                  rEdz
  k    st          |j                  dk    r|                                S  fd|D             S t          |t                    rk    r|S  fd|D             S t          |t          t          f          r|S t          dt          |                     )aS  
        Recursively convert various input formats (tensors, numpy arrays, lists) to nested lists.

        Args:
            data: Input data in any format
            expected_depth: Expected nesting depth
            current_depth: Current depth in recursion

        Returns:
            Nested list representation of the data
        Nr/   c                 D    g | ]}                     |d z             S r,   _convert_to_nested_listr9   itemcurrent_depthr0   r   s     r    
<listcomp>z9Sam2Processor._convert_to_nested_list.<locals>.<listcomp>   2    ooobf44T>=[\K\]]ooor!   c                 D    g | ]}                     |d z             S rh   ri   rk   s     r    rn   z9Sam2Processor._convert_to_nested_list.<locals>.<listcomp>   ro   r!   c                 D    g | ]}                     |d z             S rh   ri   rk   s     r    rn   z9Sam2Processor._convert_to_nested_list.<locals>.<listcomp>  ro   r!   zUnsupported data type: )rA   r   rB   r8   shapenumpyrD   npndarraylistintr^   rE   type)r   datar0   rm   s   ` ``r    rj   z%Sam2Processor._convert_to_nested_list   sk    <4 dEL)) 	E 222c$*oo6J6Jzz||**,,,oooooojnoooobj)) 	E 222c$*oo6J6J{{}}$oooooojnooood## 
	E.. pooooojnoooosEl++ 	EKCtDzzCCDDDr!   c                 8   |g }t          |t                    s|S t          |          dk    r#|                    t          |                     n&t	          |d         t          |                    |d<   t          |          dk    r|D ]}t          |t                    ru|                     |          }t          |          D ]P\  }}|dz   t          |          k    r|                    |           1t	          ||dz            |          ||dz   <   Q|S )a`  
        Get the maximum dimensions at each level of nesting.

        Args:
            nested_list (`list`):
                Nested list structure.
            max_dims (`list`, *optional*):
                Current maximum dimensions (for recursion).

        Returns:
            `list`: A list of maximum dimensions for each nesting level.
        Nr   r,   )rA   rv   r8   appendmaxrG   	enumerate)r   nested_listmax_dimsrl   sub_dimsidims          r    rG   z$Sam2Processor._get_nested_dimensions  s+    H+t,, 	Ox==AOOC,,----hqk3{+;+;<<HQK{a# H HdD)) H#::4@@H"+H"5"5 H H3q5CMM11$OOC0000.1(1q5/3.G.GHQUOOr!   c                   	 || j         }|t          |          k    r|S t          |t                    s|g}t          |          }||         }|t          |          dz
  k    r|                    |g||z
  z             n|dk    r{|t          |          dz
  k     r$||dz   d         }|                     ||          	n|g||dz            z  	|                    	fdt          ||z
            D                        nQ||dz   d         }|                     ||          	|                    	fdt          |          D                        |t          |          dz
  k     r^t          t          |                    D ]A}t          ||         t                    r$|                     ||         ||dz   |          ||<   B|S )a  
        Recursively pad a nested list to match target dimensions.

        Args:
            nested_list (`list`):
                Nested list to pad.
            target_dims (`list`):
                Target dimensions for each level.
            current_level (`int`, *optional*, defaults to 0):
                Current nesting level.
            pad_value (`int`, *optional*):
                Value to use for padding.

        Returns:
            `list`: The padded nested list.
        Nr,   r   r/   c                 .    g | ]}t                    S  r   r9   _templates     r    rn   z2Sam2Processor._pad_nested_list.<locals>.<listcomp>[  s!    #b#b#b1HX$6$6#b#b#br!   c                 .    g | ]}t                    S r   r   r   s     r    rn   z2Sam2Processor._pad_nested_list.<locals>.<listcomp>`  s!    #S#S#S1HX$6$6#S#S#Sr!   )r   r8   rA   rv   extend_create_empty_nested_structurerangerI   )
r   r~   target_dimscurrent_level	pad_valuecurrent_sizer   template_dimsr   r   s
            @r    rI   zSam2Processor._pad_nested_list,  s    " ,IC,,,, +t,, 	(&-K ;''!-0 C,,q000	{kL.HIJJJJ a 3{#3#3a#777$/0A0C0C$DM#BB=R[\\HH !*{[9J-KKH""#b#b#b#bkT`F`@a@a#b#b#bcccc !,MA,=,?,? @>>}iXX""#S#S#S#Sk@R@R#S#S#STTT 3{++a///3{++,, v vk!nd33 v%)%:%:;q>;XehiXikt%u%uKNr!   c                      t                    dk    rgd         z  S  fdt          d                   D             S )a  
        Create an empty nested structure with given dimensions filled with pad_value.

        Args:
            dims (`list`):
                The dimensions of the nested structure.
            pad_value (`int`):
                The value to fill the structure with.
        r,   r   c                 L    g | ] }                     d d                   !S r7   )r   )r9   r   dimsr   r   s     r    rn   z@Sam2Processor._create_empty_nested_structure.<locals>.<listcomp>w  s2    eeeQRD77QRR)LLeeer!   )r8   r   )r   r   r   s   ```r    r   z,Sam2Processor._create_empty_nested_structurej  sW     t99>>;a((eeeeeeV[\`ab\cVdVdeeeer!   c                 
   t          |t                    r3t          |          dk    rdS d|                     |d                   z   S t          |t          j        t          j        f          rt          |j                  S dS )z
        Get the nesting level of a list structure.

        Args:
            input_list (`list`):
                The list to get the nesting level of.
        r   r,   )	rA   rv   r8   _get_nesting_levelrt   ru   r   rB   rr   )r   
input_lists     r    r   z Sam2Processor._get_nesting_levely  sz     j$'' 	):!##qt..z!}====
RZ$>?? 	)z'(((qr!   ry   r0   r1   r2   r3   c                    |dS t          |t          j        t          j        f          ry|j        |k    r!t          d| d| d| d|j         d	          |5|j        d         |k    r$t          d| d| d|j        d          d	          |                     ||          S t          |t                    rM| 
                    |          }||k    rt          d| d
| d| d| d	          |                     ||          S dS )a  
                Validate a single input by ensuring proper nesting and raising an error if the input is not valid.

                Args:
                    data (`torch.Tensor`, `np.ndarray`, or `list`):
                        Input data to process.
                    expected_depth (`int`):
                        Expected nesting depth.
                    input_name (`str`):
                        Name of the input for error messages.
                    expected_format (`str`):
                        The expected format of the input.
                    expected_coord_size (`int`, *optional*):
                        Expected coordinate size (2 for points, 4 for boxes, None for labels).
        .
        NzInput z must be a tensor/array with z, dimensions. The expected nesting format is z. Got z dimensions.r]   z as the last dimension, got .z must be a nested list with z( levels. The expected nesting format is z levels.)rA   r   rB   rt   ru   ndimrE   rr   rj   rv   r   )r   ry   r0   r1   r2   r3   rm   s          r    rF   z$Sam2Processor._validate_single_input  s   0 <4 dU\2:677 	FyN**  qZ  q  qn  q  q  CR  q  q  Z^  Zc  q  q  q   %0:b>%888$ M  M  MJ]  M  M{  |F  GI  |J  M  M  M   //nEEE dD!! 	F 33D99M..  lZ  l  l^  l  l  ~M  l  l  Ub  l  l  l   //nEEE	F 	Fr!   c                    |r"|| j         k    }|                    dd          }t          t          |                    D ]}||j        d         k     r|t          |          k     r||         n|d         }|                     | j        ||         ||          }	|rA||         }
t          j        |
	                    ||                   |	||                   ||<   |	||<   dS )a  
        Helper method to normalize coordinates in a tensor across multiple images.

        Args:
            tensor (`torch.Tensor`):
                Input tensor with coordinates.
            original_sizes (`list`):
                Original image sizes.
            is_bounding_box (`bool`, *optional*, defaults to `False`):
                Whether coordinates are bounding boxes.
            preserve_padding (`bool`, *optional*, defaults to `False`):
                Whether to preserve padding values (for points).
        r]   T)r   keepdimr   r?   N)
r   allr   r8   rr   re   r   r   where	expand_as)r   rJ   r'   r@   r>   mask
coord_maskimg_idxr`   normalized_coordsimg_masks              r    rL   z+Sam2Processor._normalize_tensor_coordinates  s     	8T11Db$77JS0011 	8 	8Ga((;BSEXEX;X;Xw 7 7^lmn^o$($?$?$fWo}Ve %@ % %! $ 8)'2H&+k **6'?;;=NPVW^P_' 'F7OO '8F7O	8 	8r!           Tc           	      4     | j         j        |||||||fi |S )a-  
        Remove padding and upscale masks to the original image size.

        Args:
            masks (`Union[List[torch.Tensor], List[np.ndarray]]`):
                Batched masks from the mask_decoder in (batch_size, num_channels, height, width) format.
            original_sizes (`Union[torch.Tensor, List[Tuple[int,int]]]`):
                The original sizes of each image before it was resized to the model's expected input shape, in (height,
                width) format.
            mask_threshold (`float`, *optional*, defaults to 0.0):
                Threshold for binarization and post-processing operations.
            binarize (`bool`, *optional*, defaults to `True`):
                Whether to binarize the masks.
            max_hole_area (`float`, *optional*, defaults to 0.0):
                The maximum area of a hole to fill.
            max_sprinkle_area (`float`, *optional*, defaults to 0.0):
                The maximum area of a sprinkle to fill.
            apply_non_overlapping_constraints (`bool`, *optional*, defaults to `False`):
                Whether to apply non-overlapping constraints to the masks.

        Returns:
            (`torch.Tensor`): Batched masks in batch_size, num_channels, height, width) format, where (height, width)
            is given by original_size.
        )r   post_process_masks)	r   masksr'   mask_thresholdbinarizemax_hole_areamax_sprinkle_area!apply_non_overlapping_constraintsr   s	            r    r   z Sam2Processor.post_process_masks  sC    F 7t#6-	
 	
 	
 	
 		
r!   )Nr   )NNNNNNN)F)r   )N)r   N)FF)r   Tr   r   F)__name__
__module____qualname____doc__
attributesimage_processor_classr   rw   r   r   r   rv   r^   r   rB   strr   r
   rZ   re   rj   rG   rI   r   r   rt   ru   rF   rL   r   __classcell__)r   s   @r    r   r   %   s          $$J4k kXc] k\_ k k k k k k (,26UYMQNRKO;?B( B($B( $J/B( uT$tDK/@*A%BEL%PQR	B(
 uT$tCy/%:EL%HIJB( eDd5k):$;U\$IJKB( !tDK'8%,'F!GHB( !sJ!78B( 
B( B( B( B(J X] (6	   <$E $E $E $EL# # # #J< < < <|f f f  . .20F 0FEL"*d230F 0F 	0F
 0F &c]0F 
0F 0F 0F 0Fd!8 !8 !8 !8N */,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
r!   r   )r   copyr   typingr   r   rs   rt   image_utilsr   processing_utilsr	   tokenization_utils_baser
   utilsr   r   r   utils.import_utilsr   
get_loggerr   loggerr   r   __all__r   r!   r    <module>r      s6          " " " " " " " "     % % % % % % . . . . . . 4 4 4 4 4 4 < < < < < < < < < < * * * * * * 
	H	%	% LLL 
:e
 e
 e
 e
 e
N e
 e
 e
P 
r!   