
     `ia                        d dl mZ d dlmZmZ d dlZddlmZm	Z	m
Z
mZ ddlmZmZmZmZmZ ddlmZmZmZmZmZmZmZmZmZmZ ddlmZmZ  ej         e!          Z"d	ej#        d
e$de%ej#                 fdZ& G d de          Z'dgZ(dS )    )Iterable)OptionalUnionN   )BaseImageProcessorBatchFeatureget_patch_output_sizeselect_best_resolution)PaddingModeconvert_to_rgbpadresizeto_channel_dimension_format)
ChannelDimension
ImageInputPILImageResamplingget_image_sizeinfer_channel_dimension_formatis_scaled_imagemake_flat_list_of_imagesto_numpy_arrayvalid_imagesvalidate_preprocess_arguments)
TensorTypeloggingimage
patch_sizereturnc                 $   g }t          | |          \  }}t          d||          D ]g}t          d||          D ]S}|t          j        k    r| |||z   |||z   f         }n| dd|||z   |||z   f         }|                    |           Th|S )a  
    Divides an image into patches of a specified size.

    Args:
        image (`np.ndarray`):
            The input image.
        patch_size (`int`):
            The size of each patch.
        input_data_format (`ChannelDimension` or `str`):
            The channel dimension format of the input image.

    Returns:
        list: A list of np.ndarray representing the patches.
    channel_dimr   N)r   ranger   LASTappend)	r   r   input_data_formatpatchesheightwidthijpatchs	            /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/aria/image_processing_aria.pydivide_to_patchesr-   .   s     G"56GHHHMFE1fj)) " "q%,, 	" 	"A $4$999a!j.0!a*n2DDEaaaQ^!3QZ5GGHNN5!!!!	" N    c                    4    e Zd ZdZg dZddddddddddej        fd	eee	                  d
eee	                  de
de
deeee
e
f                           dee         dee         dedee
e	f         dee         def fdZdddddddddddej        dfdeeee         f         d	eee	ee	         f                  d
eee	ee	         f                  dee
         dee
         dee         dee         dee         dee	         dee         dee         deeeef                  dee         deeeef                  fdZdej        dededej        fdZdedefd Zdej        dededej        fd!Zej        d"ddfdej        d#ee
ee
e
f         eee
e
f                  f         d$ed%ee	ee	         f         deeeef                  deeeef                  dej        fd&Zdej        d'eee
e
f                  d(e
dedededeej                 fd)Zd-d*e
d+e
fd,Z xZ S ).AriaImageProcessoraG  
    A vision processor for the Aria model that handles image preprocessing.
    Initialize the AriaImageProcessor.

    Args:
        image_mean (`list`, *optional*, defaults to [0.5, 0.5, 0.5]):
            Mean values for normalization.
        image_std (`list`, *optional*, defaults to [0.5, 0.5, 0.5]):
            Standard deviation values for normalization.
        max_image_size (`int`, *optional*, defaults to 980):
            Maximum image size.
        min_image_size (`int`, *optional*, defaults to 336):
            Minimum image size.
        split_resolutions (`list`, *optional*, defaults to a list of optimal,resolutions as tuples):
            The optimal resolutions for splitting the image.
        split_image (`bool`, *optional*, defaults to `False`):
            Whether to split the image.
        do_convert_rgb (`bool`, *optional*, defaults to `True`):
            Whether to convert the image to RGB.
        do_rescale (`bool`, *optional*, defaults to `True`):
            Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by `do_rescale` in
            the `preprocess` method.
        rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
            Scale factor to use if rescaling the image. Can be overridden by `rescale_factor` in the `preprocess`
            method.
        do_normalize (`bool`, *optional*, defaults to `True`):
            Whether to normalize the image.
        resample (PILImageResampling, *optional*, defaults to `BICUBIC`):
            The resampling filter to use if resizing the image.
    pixel_values
pixel_mask	num_cropsN  iP  FTgp?
image_mean	image_stdmax_image_sizemin_image_sizesplit_resolutionssplit_imagedo_convert_rgb
do_rescalerescale_factordo_normalizeresamplec                     t                      j        di | |g d}|g d}|| _        || _        || _        || _        || _        |g d}d |D             }|| _        || _        || _	        |	| _
        |
| _        || _        d S )N)      ?rB   rB   ))      )rC   r   )rC      )rC      )rC      )rC      )rC      )rD   rE   )rD   r   )rD   rD   )rD   rC   )r   rC   )r   rD   )rE   rC   )rE   rD   )rF   rC   )rG   rC   )rH   rC   )rI   rC   c                 :    g | ]}|d          dz  |d         dz  fS )r     rC    ).0els     r,   
<listcomp>z/AriaImageProcessor.__init__.<locals>.<listcomp>   s-     X X X"Q%#+r!us{!; X X Xr.   rL   )super__init__r8   r9   r6   r7   r;   r:   r<   r=   r>   r?   r@   )selfr6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   kwargs	__class__s                r,   rQ   zAriaImageProcessor.__init__l   s     	""6"""(J'I,,$"&$ !y  !y  !y X XFW X X X!2,$,( r.   ptimagesreturn_tensorsdata_formatr%   c           	      x   ||n| j         }||n| j        }||n| j        }||n| j        }||n| j        }||n| j        }||n| j        }|	|	n| j        }	|
|
n| j        }
||n| j	        }|dvrt          d          |                     |          }t          |          }t          |          st          d          t          |
|||||	           |rd |D             }d |D             }|r/t          |d                   rt                               d	           |t%          |d                   }g }g }d}|D ]}|r!|                     || j        ||||
          }n|g}|t+          |          |k    rt+          |          }|D ]i}t-          |          \  }}|t/          ||          z  }||k    r#t/          t1          ||z            |          |f}n"|t/          t1          ||z            |          f}t3          |||||          }||d         z
  ||d         z
  }}t5          |d|fd|ff||
          }t7          j        ||ft:                    }d|d|d         d|d         f<   |                    |           |r|                     ||	|          }|
r9|                      || j         | j        ||
          }|tC          |||          n|}|                    |           ktE          t7          j#        |d          t7          j#        |d          |d|          S )aI  
        Process a list of images.

        Args:
            images (ImageInput or list of ImageInput):
                The input image or a list of images.
            image_mean (`list`, *optional*, defaults to [0.5, 0.5, 0.5]):
                Mean values for normalization.
            image_std (`list`, *optional*, defaults to [0.5, 0.5, 0.5]):
                Standard deviation values for normalization.
            max_image_size (`int`, *optional*, defaults to `self.max_image_size` (980)):
                Maximum image size.
            min_image_size (`int`, *optional*, defaults to `self.min_image_size` (336)):
                Minimum image size.
            split_image (`bool`, *optional*, defaults to `self.split_image` (False)):
                Whether to split the image.
            do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb` (True)):
                Whether to convert the image to RGB.
            do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
                Whether to rescale the image.
            rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
                Rescale factor to rescale the image by if `do_rescale` is set to `True`.
            do_normalize (`bool`, *optional*, defaults to `self.do_normalize` (True)):
                Whether to normalize the image.
            resample (PILImageResampling, *optional*, defaults to `self.resample` (BICUBIC)):
                The resampling filter to use if resizing the image.
            return_tensors (`str` or `TensorType`, *optional*, defaults to "pt"):
                The type of tensor to return.
            data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format for the output image. Can be one of:
                    - `"channels_first"` or `ChannelDimension.FIRST`:
                        image in (num_channels, height, width) format.
                    - `"channels_last"` or `ChannelDimension.LAST`:
                        image in (height, width, num_channels) format.
                If unset, will use same as the input image.
            input_data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format for the input image. Can be one of:
                    - `"channels_first"` or `ChannelDimension.FIRST`:
                        image in (num_channels, height, width) format.
                    - `"channels_last"` or `ChannelDimension.LAST`:
                        image in (height, width, num_channels) format.
                If unset, will use the inferred format of the input image.

        Returns:
            BatchFeature:
                A BatchFeature object containing:
                - 'pixel_values':
                    Tensor of processed image pixel values.
                - 'pixel_mask':
                    Boolean pixel mask. This mask is a 2D tensor of shape (max_image_size, max_image_size) where:
                    - True (1) values indicate pixels that belong to the original resized image.
                    - False (0) values indicate pixels that are part of the padding.
                  The mask helps distinguish between actual image content and padded areas in subsequent processing steps.
                - 'num_crops':
                    The maximum number of crops across all images.
        N)rK   r5   z(max_image_size must be either 490 or 980zkInvalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray.)r?   r6   r7   r@   r=   r>   c                 ,    g | ]}t          |          S rL   )r   rM   r   s     r,   rO   z1AriaImageProcessor.preprocess.<locals>.<listcomp>   s     @@@nU++@@@r.   c                 ,    g | ]}t          |          S rL   )r   r[   s     r,   rO   z1AriaImageProcessor.preprocess.<locals>.<listcomp>   s     <<<E.''<<<r.   r   zIt looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.)rX   r%   )r@   rX   r%   rC   )dtype)r   scaler%   )axisr1   )datatensor_type)$r6   r7   r8   r9   r;   r<   r=   r>   r?   r@   
ValueErrorfetch_imagesr   r   r   r   loggerwarning_oncer   get_image_patchesr:   lenr   maxintr   r   npzerosboolr$   rescale	normalizer   r   stack)rR   rV   r6   r7   r8   r9   r;   r<   r=   r>   r?   r@   rW   rX   r%   r2   pixel_masksr4   r   crop_images
crop_imagehwr^   new_sizecrop_image_resizedpadding_bottompadding_rightcrop_image_paddedr3   s                                 r,   
preprocesszAriaImageProcessor.preprocess   st   R $.#9ZZt
!*!6IIDN	+9+E4K^+9+E4K^%0%<kk$BR+9+E4K^#-#9ZZt
+9+E4K^'3'?||TEV'388++GHHH""6**)&11F## 	:  
 	&%!!)	
 	
 	
 	
  	A@@@@@F =<V<<< 	/&)44 	s  
 $ >vay I I	 @	7 @	7E 
&"44*" 1&7 5    %g C$4$4y$@$@,,	) 17 17
%j111&Q266 #CE	NNN C C^THH .CE	NNN0S0STH%+% 1&7& & &" 1?!0Ln_ghi_jNj$'&(1m*<= 1&7	% % %!  X~~&FdSSS
;<
=Xa[=-HQK-78"":... (,/~Yj )5 ) )%   (,)$5*; )7 ) )% '2 44E{Tefff. & ##$56666c17d  "A > > > h{;;;& 
 '
 
 
 	
r.   r   target_resolutionr   c                 X    t          |||          \  }}t          |||f||          }|S )aG  
        Resizes an image to a target resolution while maintaining aspect ratio.

        Args:
            image (np.ndarray):
                The input image.
            target_resolution (tuple):
                The target resolution (height, width) of the image.
            resample (`PILImageResampling`):
                Resampling filter to use if resizing the image.
            input_data_format (`ChannelDimension` or `str`):
                The channel dimension format of the input image.

        Returns:
            np.ndarray: The resized and padded image.
        r@   r%   )r	   r   )rR   r   r{   r@   r%   
new_height	new_widthresized_images           r,   _resize_for_patchingz'AriaImageProcessor._resize_for_patchingW  s@    & !6e=NPa b b
I uz9&=duvvvr.   original_resolutionc                     |\  }}|\  }}t          ||z
  d          \  }}t          ||z
  d          \  }	}
|	|	|
z   f|||z   ffS )NrD   )divmod)rR   r   r{   original_heightoriginal_widthtarget_heighttarget_widthpaste_xr_xpaste_yr_ys              r,   _get_padding_sizez$AriaImageProcessor._get_padding_sizeq  sc    *='&7#|l^;Q??mo=qAA3''7S=)AAAr.   c                     t          |||          }|                     ||          }|                     ||          }|S )zU
        Pad an image to a target resolution while maintaining aspect ratio.
        )padding)r	   r   r   )rR   r   r{   r%   new_resolutionr   padded_images          r,   _pad_for_patchingz$AriaImageProcessor._pad_for_patchingx  sH     /u6GIZ[[((9JKKxxwx77r.   g        r   modeconstant_valuesc                 j   t          |t                    st          |          dk    rt          ||||||          S |t	          |          }t
          j        dt
          j        dt
          j        dt
          j	        di}t          j        ||||         |          }|t          |||          n|}|S )a	  
        Pads the `image` with the specified `padding` and `mode`. Padding can be in the (`height`, `width`)
        dimension of in the (`num_patches`) dimension. In the second case an iterable if tuples is expected
        as input.

        Args:
            image (`np.ndarray`):
                The image to pad.
            padding (`int` or `tuple[int, int]` or `Iterable[tuple[int, int]]`):
                Padding to apply to the edges of the height, width axes. Can be one of three formats:
                - `((before_height, after_height), (before_width, after_width))` unique pad widths for each axis.
                - `((before, after),)` yields same before and after pad for height and width.
                - `(pad,)` or int is a shortcut for before = after = pad width for all axes.
            mode (`PaddingMode`):
                The padding mode to use. Can be one of:
                    - `"constant"`: pads with a constant value.
                    - `"reflect"`: pads with the reflection of the vector mirrored on the first and last values of the
                    vector along each axis.
                    - `"replicate"`: pads with the replication of the last value on the edge of the array along each axis.
                    - `"symmetric"`: pads with the reflection of the vector mirrored along the edge of the array.
            constant_values (`float` or `Iterable[float]`, *optional*):
                The value to use for the padding if `mode` is `"constant"`.
            data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format for the output image. Can be one of:
                    - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                    - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                If unset, will use same as the input image.
            input_data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format for the input image. Can be one of:
                    - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                    - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                If unset, will use the inferred format of the input image.

        Returns:
            `np.ndarray`: The padded image.

        rE   Nconstantreflectedge	symmetric)r   r   )
isinstanceri   rg   r   r   r   CONSTANTREFLECT	REPLICATE	SYMMETRICrj   r   )rR   r   r   r   r   rX   r%   padding_mode_mappings           r,   r   zAriaImageProcessor.pad  s    ` gs## 	^s7||q'8'8ugt_kK\]]]$ >u E E  *!6!;	 
 ug,@,FXghhhR]Ri'{<MNNNot 	 r.   grid_pinpointsr   c                 <   t          |t                    st          d          |}t          |          }t	          ||          }	|                     ||	|          }
|                     |
|	          }t          ||          }fd|D             }|S )a]  
        Process an image with variable resolutions by dividing it into patches.

        Args:
            image (`np.ndarray`):
                The input image to be processed.
            grid_pinpoints (list[tuple[int, int]]):
                A list of possible resolutions as tuples.
            patch_size (`int`):
                Size of the patches to divide the image into.
            resample (`PILImageResampling`):
                Resampling filter to use if resizing the image.
            data_format (`ChannelDimension` or `str`):
                The channel dimension format for the output image.
            input_data_format (`ChannelDimension` or `str`):
                The channel dimension format of the input image.

        Returns:
            `list[np.ndarray]`: A list of NumPy arrays containing the processed image patches.
        z6grid_pinpoints must be a list of possible resolutions.r    r}   )r%   )r   r%   c                 4    g | ]}t          |           S ))r!   input_channel_dim)r   )rM   r+   rX   r%   s     r,   rO   z8AriaImageProcessor.get_image_patches.<locals>.<listcomp>  s9     
 
 
 (;Zklll
 
 
r.   )r   list	TypeErrorr   r
   r   r   r-   )rR   r   r   r   r@   rX   r%   possible_resolutions
image_sizebest_resolutionr   r   r&   s        ``      r,   rf   z$AriaImageProcessor.get_image_patches  s    : .$// 	VTUUU-#E7HIII
0=QRR11?XIZ 2 
 
 --m_`q-rr#LZ[lmmm
 
 
 
 
 
 
 
 r.   r'   r(   c                     |                     d| j                  }|                     d| j                  }t          ||f| j                  \  }}|sdn
||z  |z  |z  }|S )a  
        A utility that returns number of image patches for a given image size.

        Args:
            height (`int`):
                Height of the input image.
            width (`int`):
                Width of the input image.
            images_kwargs (`dict`, *optional*)
                Any kwargs to override defaults of the image processor.
        Returns:
            `int`: Number of patches per image.
        r;   r8   rC   )getr;   r8   r
   r:   )	rR   r'   r(   images_kwargsr;   r8   resized_heightresized_widthnum_patchess	            r,   get_number_of_image_patchesz.AriaImageProcessor.get_number_of_image_patches  sv     $''t7GHH&**+;T=PQQ(>PTPf(g(g%*raa.0PS`0`dr0rr.   )N)!__name__
__module____qualname____doc__model_input_namesr   BICUBICr   r   floatri   tuplerl   r   rQ   r   FIRSTr   strr   rz   rj   ndarrayr   r   r   r   r   r   r   rf   r   __classcell__)rT   s   @r,   r0   r0   J   s        > DCC -1+/!!=A&+)-,3'+'9'A"! "!T%[)"! DK("! 	"!
 "! $DsCx$9:"! d^"! !"! "! c5j)"! tn"! %"! "! "! "! "! "!N ;?9=(,(,&*)-%)*.'+15;?2B2HDHE
 E
j$z"223E
 U5$u+#567E
 E%e"456	E

 !E
 !E
 d^E
 !E
 TNE
 !E
 tnE
 -.E
 !sJ!78E
 ./E
 $E#/?*?$@AE
 E
 E
 E
NZ49Xh	   4BU Bu B B B BZ49N^	   " (09<>BDH@ @z@ sE#s(OXeCHo-FFG@ 	@
 uhuo56@ eC)9$9:;@ $E#/?*?$@A@ 
@ @ @ @D0z0 U38_-0 	0
 %0 &0 ,0 
bj	0 0 0 0d # c        r.   r0   ))collections.abcr   typingr   r   numpyrj   image_processing_utilsr   r   r	   r
   image_transformsr   r   r   r   r   image_utilsr   r   r   r   r   r   r   r   r   r   utilsr   r   
get_loggerr   rd   r   ri   r   r-   r0   __all__rL   r.   r,   <module>r      s  * % $ $ $ $ $ " " " " " " " "     u u u u u u u u u u u u e e e e e e e e e e e e e e                        ) ( ( ( ( ( ( ( 
	H	%	%RZ S PTUWU_P`    8B B B B B+ B B BJ  
 r.   