
     `i{                     ~   d Z ddlZddlmZmZ ddlZddlmZm	Z	m
Z
 ddlmZmZmZ ddlmZmZmZmZmZmZmZmZmZmZ ddlmZmZmZmZmZm Z m!Z!  e            rddl"Z" e j#        e$          Z%d	ee&e&e                  e&e         ef         d
e&e&e                  fdZ' G d de	          Z( G d de          Z)dgZ*dS )zImage processor class for Fuyu.    N)OptionalUnion   )BaseImageProcessorBatchFeatureget_size_dict)padresizeto_channel_dimension_format)
ChannelDimension
ImageInputPILImageResamplingget_image_sizeinfer_channel_dimension_formatis_scaled_imageis_valid_imagemake_list_of_imagesto_numpy_arrayvalidate_preprocess_arguments)
TensorTypefilter_out_non_signature_kwargsis_torch_availableis_torch_deviceis_torch_dtypeloggingrequires_backendsimagesreturnc                     t          |           r| ggS t          | t                    rt          d | D                       r| S t          | t                    rd | D             S t	          d          )Nc              3   @   K   | ]}t          |t                    V  d S N)
isinstancelist.0images     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/fuyu/image_processing_fuyu.py	<genexpr>z.make_list_of_list_of_images.<locals>.<genexpr>@   s,      'T'TE
5$(?(?'T'T'T'T'T'T    c                 ,    g | ]}t          |          S  )r   r$   s     r'   
<listcomp>z/make_list_of_list_of_images.<locals>.<listcomp>D   s!    ???u#E**???r)   zHimages must be a list of list of images or a list of images or an image.)r   r"   r#   all
ValueError)r   s    r'   make_list_of_list_of_imagesr/   :   s     f z&$ C'T'TV'T'T'T$T$T &$ @??????
_
`
``r)   c                   D    e Zd ZdZddeeeef                  fdZd	dZ	dS )
FuyuBatchFeaturez
    BatchFeature class for Fuyu image processor and processor.

    The outputs dictionary from the processors contains a mix of tensors and lists of tensors.
    Ntensor_typec                    || S |                      |          \  fdfd|                                 D ]|\  }t          |t                    r-t          |d         t                    rfd|D             | <   Gt          |t                    rfd|D             | <   n |          | <   }| S )a5  
        Convert the inner content to tensors.

        Args:
            tensor_type (`str` or [`~utils.TensorType`], *optional*):
                The type of tensors to use. If `str`, should be one of the values of the enum [`~utils.TensorType`]. If
                `None`, no modification is done.
        N)r2   c                 4     |           r| S  |           S r!   r+   )elem	as_tensor	is_tensors    r'   _convert_tensorz<FuyuBatchFeature.convert_to_tensors.<locals>._convert_tensor^   s&    y 9T??"r)   c                 n    	  |           S #  dk    rt          d          t          d          xY w)Noverflowing_valueszKUnable to create tensor returning overflowing values of different lengths. zUnable to create tensor, you should probably activate padding with 'padding=True' to have batched tensors with the same length.)r.   )r5   r8   keys    r'   _safe_convert_tensorzAFuyuBatchFeature.convert_to_tensors.<locals>._safe_convert_tensorc   sT    &t,,,...$%rsss X  s   
 &4r   c                 ,    g | ]}fd |D             S )c                 &    g | ]} |          S r+   r+   r%   r5   r<   s     r'   r,   zBFuyuBatchFeature.convert_to_tensors.<locals>.<listcomp>.<listcomp>r   s%    KKKT22488KKKr)   r+   )r%   elemsr<   s     r'   r,   z7FuyuBatchFeature.convert_to_tensors.<locals>.<listcomp>r   s/    ___PUKKKKUKKK___r)   c                 &    g | ]} |          S r+   r+   r?   s     r'   r,   z7FuyuBatchFeature.convert_to_tensors.<locals>.<listcomp>u   s%    JJJD11$77JJJr)   )_get_is_as_tensor_fnsitemsr"   r#   )selfr2   valuer8   r<   r6   r7   r;   s      @@@@@r'   convert_to_tensorsz#FuyuBatchFeature.convert_to_tensorsP   s    K#99k9RR	9	# 	# 	# 	# 	# 	#
		 		 		 		 		 		 **,, 		8 		8JC%&& 8:eAh+E+E 8____Y^___S		E4(( 8JJJJEJJJS		 1077S		r)   r   r   c                   	
 t          | dg           ddli }                    d          

t                    dk    rtd         }t	          |          rn\t          |t                    s$t          |          st          |t                    r|
n t          dt          |           d          
fd	| 
                                D ]\  }}t          |t                    rIt          |d         t                    r.g }|D ]#}|                    	fd|D                        $|||<   ct          |t                    r	fd	|D             ||<    	|          ||<   || _        | S )
a  
        Send all values to device by calling `v.to(*args, **kwargs)` (PyTorch only). This should support casting in
        different `dtypes` and sending the `BatchFeature` to a different `device`.

        Args:
            args (`Tuple`):
                Will be passed to the `to(...)` function of the tensors.
            kwargs (`Dict`, *optional*):
                Will be passed to the `to(...)` function of the tensors.

        Returns:
            [`BatchFeature`]: The same instance after modification.
        torchr   Ndevicez*Attempting to cast a BatchFeature to type z. This is not supported.c                 r     j         |           r | j        i S |                               S | S )N)rI   )is_floating_pointto)r5   argsrI   kwargsrH   s    r'   _toz FuyuBatchFeature.to.<locals>._to   sN    &u&t,, 0tw////!wwfw---Kr)   c                 &    g | ]} |          S r+   r+   r%   r5   rO   s     r'   r,   z'FuyuBatchFeature.to.<locals>.<listcomp>   s!    !>!>!>##d))!>!>!>r)   c                 &    g | ]} |          S r+   r+   rQ   s     r'   r,   z'FuyuBatchFeature.to.<locals>.<listcomp>   s!    777Tss4yy777r)   )r   rH   getlenr   r"   strr   intr.   rC   r#   appenddata)rD   rM   rN   new_dataargkvnew_vr@   rO   rI   rH   s    ``      @@@r'   rL   zFuyuBatchFeature.to{   s    	$	***H%%>c$ii!mmq'Cc"" rC%% r)=)= rCQTAUAU r !!pcRUhh!p!p!pqqq	 	 	 	 	 	 	 	 JJLL 	% 	%DAq!T"" 
%z!A$'='= 
% @ @ELL!>!>!>!>!>!>!>????#At$$ %7777Q777!c!ff	r)   r!   )r   r   )
__name__
__module____qualname____doc__r   r   rU   r   rF   rL   r+   r)   r'   r1   r1   I   sa         ) )huS*_7M.N ) ) ) )V8 8 8 8 8 8r)   r1   c            !       >    e Zd ZdZg dZddej        dddddddddfd	ed
ee	e
ef                  dededede
dedeeee         f         deeee         f         dededee	e
ef                  f fdZej        ddfdej        d
e	e
ef         dedeee
ef                  deee
ef                  dej        fdZ	 	 	 	 d-dej        d
e	e
ef         de
dedeee
ef                  deee
ef                  dej        fdZ e            ddddddddddddej        ddfd	ee         d
ee	e
ef                  dee         dee         dee         dee
         dee         dee         dee         dee         dee         dee	e
ef                  deee
ef                  deee
ef                  dee         fd            Zd.d ed!edee	e
ef                  defd"Zd.dd#dee	e
ef                  dd#fd$Z	 d.d%d#d&d#d'd#d(d#d)ed*ed+edee	e
ef                  defd,Z xZS )/FuyuImageProcessora	  
    This class should handle the image processing part before the main FuyuForCausalLM. In particular, it should
    handle:

    - Processing Images:
        Taking a batch of images as input. If the images are variable-sized, it resizes them based on the desired patch
        dimensions. The image output is always img_h, img_w of (1080, 1920)

        Then, it patches up these images using the patchify_image function.

    - Creating Image Input IDs:
        For each patch, a placeholder ID is given to identify where these patches belong in a token sequence. For
        variable-sized images, each line of patches is terminated with a newline ID.

    - Image Patch Indices:
        For each image patch, the code maintains an index where these patches should be inserted in a token stream.


    Args:
        do_resize (`bool`, *optional*, defaults to `True`):
            Whether to resize the image to `size`.
        size (`dict[str, int]`, *optional*, defaults to `{"height": 1080, "width": 1920}`):
            Dictionary in the format `{"height": int, "width": int}` specifying the size of the output image.
        resample (`PILImageResampling`, *optional*, defaults to `Resampling.BILINEAR`):
            `PILImageResampling` filter to use when resizing the image e.g. `PILImageResampling.BILINEAR`.
        do_pad (`bool`, *optional*, defaults to `True`):
            Whether to pad the image to `size`.
        padding_value (`float`, *optional*, defaults to 1.0):
            The value to pad the image with.
        padding_mode (`str`, *optional*, defaults to `"constant"`):
            The padding mode to use when padding the image.
        do_normalize (`bool`, *optional*, defaults to `True`):
            Whether to normalize the image.
        image_mean (`float`, *optional*, defaults to 0.5):
            The mean to use when normalizing the image.
        image_std (`float`, *optional*, defaults to 0.5):
            The standard deviation to use when normalizing the image.
        do_rescale (`bool`, *optional*, defaults to `True`):
            Whether to rescale the image.
        rescale_factor (`float`, *optional*, defaults to `1 / 255`):
            The factor to use when rescaling the image.
        patch_size (`dict[str, int]`, *optional*, defaults to `{"height": 30, "width": 30}`):
            Dictionary in the format `{"height": int, "width": int}` specifying the size of the patches.
    r   image_input_idsimage_patchesimage_patch_indices_per_batch#image_patch_indices_per_subsequenceTN      ?constantg      ?gp?	do_resizesizeresampledo_padpadding_valuepadding_modedo_normalize
image_mean	image_std
do_rescalerescale_factor
patch_sizec                      t                      j        di | || _        ||nddd| _        || _        || _        || _        || _        || _        || _	        |	| _
        |
| _        || _        ||nddd| _        d S )Ni8  i  )heightwidth   r+   )super__init__rk   rl   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   )rD   rk   rl   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   rN   	__class__s                 r'   r|   zFuyuImageProcessor.__init__   s      	""6"""" ,DDTD2Q2Q	 *(($"$,(2(>**r\^D_D_r)   r&   data_formatinput_data_formatr   c           	         t          ||          \  }}|d         |d         }
}	||
k    r||	k    r|S |	|z  }|
|z  }t          ||          }t          ||z            }t          ||z            }t          d|||f|||d|}|S )a  
        Resize an image to `(size["height"], size["width"])`.

        Args:
            image (`np.ndarray`):
                Image to resize.
            size (`dict[str, int]`):
                Dictionary in the format `{"height": int, "width": int}` specifying the size of the output image.
            resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BILINEAR`):
                `PILImageResampling` filter to use when resizing the image e.g. `PILImageResampling.BILINEAR`.
            data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the output image. If unset, the channel dimension format of the input
                image is used. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.

        Returns:
            `np.ndarray`: The resized image.
        rx   ry   )r&   rl   rm   r~   r   r+   )r   minrV   r
   )rD   r&   rl   rm   r~   r   rN   image_heightimage_widthtarget_heighttarget_widthheight_scale_factorwidth_scale_factoroptimal_scale_factor
new_height	new_widthscaled_images                    r'   r
   zFuyuImageProcessor.resize
  s    F %35:K$L$L!k&*8nd7m|,&&<=+H+HL+l:)K7"#68JKK(<<==
&::;;	 
i(#/
 
 
 
 r)   modeconstant_valuesc                     t          ||          \  }}|d         |d         }
}	d}d}|	|z
  }|
|z
  }t          |||f||ff||||          }|S )a  
        Pad an image to `(size["height"], size["width"])`.

        Args:
            image (`np.ndarray`):
                Image to pad.
            size (`dict[str, int]`):
                Dictionary in the format `{"height": int, "width": int}` specifying the size of the output image.
            data_format (`ChannelDimension` or `str`, *optional*):
                The data format of the output image. If unset, the same format as the input image is used.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format of the input image. If not provided, it will be inferred.
        rx   ry   r   )paddingr   r   r~   r   )r   r	   )rD   r&   rl   r   r   r~   r   r   r   r   r   padding_toppadding_leftpadding_bottompadding_rightpadded_images                   r'   	pad_imagezFuyuImageProcessor.pad_imageD  s    , %35:K$L$L!k&*8nd7m|&5${2!>2\=4QR+#/
 
 
 r)   return_tensorsc           
      p   	
 ||n j         }n j        ||n j        }||n j        }||n j        }n j        ||n j        }		n j        	

n j        
n j	        n j
        ||n j        }n j        ||n j        }t          |t                    r(t          d |D                       rt          d          t!          |          }t#          ||	
||           d |D             }d |D             d         d         }|r)t%          |          rt&                              d           t+          |          fd	|D             }t-                    |r fd
|D             }fd|D             }d |D             }d |D             }d t/          ||          D             }|r fd|D             }|r fd|D             }|r	
 fd|D             }fd|D             }||||d}t1          ||          S )a  

        Utility function to preprocess the images and extract necessary information about original formats.

        Args:
            images (`ImageInput`):
                Images to preprocess. Expects a single image, a list or images or a list of lists of images. Pixel
                values range from 0 to 255, or between 0 and 1 if `do_rescale` is `False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image to `size`.
            size (`dict[str, int]`, *optional*, defaults to `self.size`):
                Dictionary in the format `{"height": int, "width": int}` specifying the size of the output image.
            resample (`PILImageResampling`, *optional*, defaults to `self.resample`):
                `PILImageResampling` filter to use when resizing the image e.g. `PILImageResampling.BILINEAR`.
            do_pad (`bool`, *optional*, defaults to `self.do_pad`):
                Whether to pad the image to `size`.
            padding_value (`float`, *optional*, defaults to `self.padding_value`):
                The value to pad the image with.
            padding_mode (`str`, *optional*, defaults to `self.padding_mode`):
                The padding mode to use when padding the image.
            do_normalize (`bool`, *optional*, defaults to `self.do_normalize`):
                Whether to normalize the image.
            image_mean (`float`, *optional*, defaults to `self.image_mean`):
                The mean to use when normalizing the image.
            image_std (`float`, *optional*, defaults to `self.image_std`):
                The standard deviation to use when normalizing the image.
            do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
                Whether to rescale the image.
            rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
                The factor to use when rescaling the image.
            patch_size (`dict[str, int]`, *optional*, defaults to `self.patch_size`):
                Dictionary in the format `{"height": int, "width": int}` specifying the size of the patches.
            return_tensors (`str` or `TensorType`, *optional*):
                The type of tensors to return. Can be one of:
                - Unset: Return a list of `np.ndarray`.
                - `TensorType.TENSORFLOW` or `'tf'`: Return a batch of type `tf.Tensor`.
                - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`.
                - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
                - `TensorType.JAX` or `'jax'`: Return a batch of type `jax.numpy.ndarray`.
            data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`):
                The channel dimension format of the output image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
        Nc              3   f   K   | ],}t          |t                    ot          |          d k    V  -dS )   N)r"   r#   rT   )r%   r5   s     r'   r(   z0FuyuImageProcessor.preprocess.<locals>.<genexpr>  s=      +i+iZ^JtT,B,B,Us4yyTU~+i+i+i+i+i+ir)   z:Multiple images for a single sample are not yet supported.)rt   ru   rq   rr   rs   rk   rl   rm   c                 &    g | ]}d  |D             S )c                 ,    g | ]}t          |          S r+   )r   r$   s     r'   r,   z<FuyuImageProcessor.preprocess.<locals>.<listcomp>.<listcomp>  s     CCC5..CCCr)   r+   r%   r   s     r'   r,   z1FuyuImageProcessor.preprocess.<locals>.<listcomp>  s'    ___CCFCCC___r)   c                     g | ]}||S r+   r+   r   s     r'   r,   z1FuyuImageProcessor.preprocess.<locals>.<listcomp>  s    KKK&FKvKKKr)   r   zIt looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.c                 B    g | ]}|t          |d                    S r   )channel_dimr   r%   r   r   s     r'   r,   z1FuyuImageProcessor.preprocess.<locals>.<listcomp>  sA      
  
  
IOci 
6!92CDDD 
  
  
r)   c                 0    g | ]}fd |D             S )c                 @    g | ]}                     |           S ))rl   r   )r
   )r%   r&   r   rD   rl   s     r'   r,   z<FuyuImageProcessor.preprocess.<locals>.<listcomp>.<listcomp>  s-    hhhX]UARSShhhr)   r+   )r%   r   r   rD   rl   s     r'   r,   z1FuyuImageProcessor.preprocess.<locals>.<listcomp>  sE        ihhhhhaghhh  r)   c                 B    g | ]}|t          |d                    S r   r   r   s     r'   r,   z1FuyuImageProcessor.preprocess.<locals>.<listcomp>  s2    uuuTZntu~fQi=NOOOuuur)   c                      g | ]}|d          gS r   r+   r%   
image_sizes     r'   r,   z1FuyuImageProcessor.preprocess.<locals>.<listcomp>  s    !P!P!Pj:a=/!P!P!Pr)   c                      g | ]}|d          gS )   r+   r   s     r'   r,   z1FuyuImageProcessor.preprocess.<locals>.<listcomp>  s     O O OZ*Q- O O Or)   c                 8    g | ]\  }}|d          |d          z  gS r   r+   )r%   original_sizeresized_sizes      r'   r,   z1FuyuImageProcessor.preprocess.<locals>.<listcomp>  s=     
 
 
+| !_}Q//0
 
 
r)   c                 4    g | ]}fd |D             S )c           	      D    g | ]}                     |           S ))rl   r   r   r   )r   )r%   r&   r   rp   ro   rD   rl   s     r'   r,   z<FuyuImageProcessor.preprocess.<locals>.<listcomp>.<listcomp>  sM     	 	 	  NN!)(5*; #  	 	 	r)   r+   )r%   r   r   rp   ro   rD   rl   s     r'   r,   z1FuyuImageProcessor.preprocess.<locals>.<listcomp>  sb        	 	 	 	 	 	 	 	 "(	 	 	  r)   c                 0    g | ]}fd |D             S )c                 @    g | ]}                     |           S ))scaler   )rescale)r%   r&   r   ru   rD   s     r'   r,   z<FuyuImageProcessor.preprocess.<locals>.<listcomp>.<listcomp>  s-    tttdie>M^__tttr)   r+   )r%   r   r   ru   rD   s     r'   r,   z1FuyuImageProcessor.preprocess.<locals>.<listcomp>  sE        utttttmsttt  r)   c                 2    g | ]}fd |D             S )c                 B    g | ]}                     |           S ))meanstdr   )	normalize)r%   r&   rr   rs   r   rD   s     r'   r,   z<FuyuImageProcessor.preprocess.<locals>.<listcomp>.<listcomp>  s>        NN5zy\mNnn  r)   r+   )r%   r   rr   rs   r   rD   s     r'   r,   z1FuyuImageProcessor.preprocess.<locals>.<listcomp>
  s[       
 	      !'    r)   c                 .    g | ]}fd |D             S )c                 2    g | ]}t          |          S r+   )r   )r%   r&   r~   r   s     r'   r,   z<FuyuImageProcessor.preprocess.<locals>.<listcomp>.<listcomp>  s(    hhhX],UKARSShhhr)   r+   )r%   r   r~   r   s     r'   r,   z1FuyuImageProcessor.preprocess.<locals>.<listcomp>  sB        ihhhhaghhh  r)   )r   image_unpadded_heightsimage_unpadded_widthsimage_scale_factors)rX   r2   )rk   rl   rm   rn   rt   ru   rq   rr   rs   ro   rp   rv   r"   r#   anyr.   r/   r   r   loggerwarning_oncer   r   zipr1   )rD   r   rk   rl   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   r~   r   r   batch_imagesfirst_image_in_listoriginal_image_sizesimage_sizesr   r   r   rX   s   `  `  `` `` ` ``         r'   
preprocesszFuyuImageProcessor.preprocessj  s   L "+!6IIDN	'ttTY'388!-4;#-#9ZZt
+9+E4K^'3'?||TEV#-#9ZZt
!*!6IIDN	)6)BHZ'3'?||TEV#-#9ZZt
+9+E4K^#-#9ZZt
fd## 	[+i+ibh+i+i+i(i(i 	[YZZZ26::%!)%!		
 		
 		
 		
 `_R^___ LKLKKKANqQ 	/*=>> 	s  
 $ >?R S S 
  
  
  
S_ 
  
  
 T"" 	     *  L
 vuuu^juuu!P!PK!P!P!P O O; O O O
 
/23G/U/U
 
 

  	        +  L  	     *  L
  	      
 +  L "    *  L #&<%:#6	
 
  T~FFFFr)   r   r   c                     ||n| j         }| j         d         | j         d         }}||z  dk    rt          d|d|           ||z  dk    rt          d|d|           ||z  }||z  }||z  }|S )a  
        Calculate number of patches required to encode an image.

        Args:
            image_height (`int`):
                Height of the image.
            image_width (`int`):
                Width of the image.
            patch_size (`dict[str, int]`, *optional*, defaults to `self.patch_size`):
                Dictionary in the format `{"height": int, "width": int}` specifying the size of the patches.
        Nrx   ry   r   zimage_height=z must be divisible by zimage_width=)rv   r.   )	rD   r   r   rv   patch_heightpatch_widthnum_patches_per_dim_hnum_patches_per_dim_wnum_patchess	            r'   get_num_patchesz"FuyuImageProcessor.get_num_patches   s     $.#9ZZt
$(OH$=tw?Wk,&!++SSS\SSTTT$))QQQKQQRRR , < +{ :+.CCr)   ztorch.Tensorc                    t          | dg           ||n| j        }|d         |d         }}|j        \  }}}}|                    d||          }|                    d||          }	|	                                }	|	                    ||d||          }	|	                    dddd	d
          }	|	                    |d||z  |z            }	|	S )a|  
        Convert an image into a tensor of patches.

        Args:
            image (`torch.Tensor`):
                Image to convert. Shape: [batch, channels, height, width]
            patch_size (`dict[str, int]`, *optional*, defaults to `self.patch_size`):
                Dictionary in the format `{"height": int, "width": int}` specifying the size of the patches.
        rH   Nrx   ry   r   r   r      r   )r   rv   shapeunfold
contiguousviewpermutereshape)
rD   r&   rv   r   r   
batch_sizechannels_unfolded_along_heightpatchess
             r'   patchify_imagez!FuyuImageProcessor.patchify_image9  s     	$	***#-#9ZZt
$.x$8*W:Mk
 &+["
Ha %Ql K K'..q+{KK$$&&,,z8R{SS//!Q1a00//*b(\2IK2WXXr)   image_inputimage_presentimage_unpadded_himage_unpadded_wimage_placeholder_idimage_newline_idvariable_sizedc	           
         t          | dg           ||n| j        }|d         |d         }
}	g }g }g }t          |j        d                   D ]{}g }g }t          |j        d                   D ].}|||f         r|||f         }|j        d         |j        d         }}|rut	          |t          j        |||f         |	z            |	z            }t	          |t          j        |||f         |
z            |
z            }|ddd|d|f         }||}}|                     ||          }t          j	        |g|t          j
        |j        	          }|                     |                    d          
                              d          }||j        d         k    sJ |rz|                    d||
z            }t          j	        |j        d         dg|t          j
        |j        	          }t          j        ||gd          }|                    d          }|                    |g           |                    |           |                    |           |                    t          j        g t          j
        |j        	                     0|                    |           |                    |           }g }g }|D ]}d}g }g } |D ]}!|!|k    }"t          j        |"          }t          j        |t          j        |!j        	                              |!          }#t          j        |!d          }$t          j        |!d          }%t          j        |"d          d         }&|#|z   |$|&<   |#|%|&<   |                    |$           |                     |%           ||z  }|                    |           |                    |            t3          |||||d          S )a  Process images for model input. In particular, variable-sized images are handled here.

        Args:
            image_input (`torch.Tensor` of shape [batch_size, subsequence_size, num_channels, height, width]):
                Tensor of images padded to model input size.
            image_present (`torch.Tensor` of shape [batch_size, subsequence_size, num_images]):
                Tensor of 1s and 0s indicating whether an image is present.
            image_unpadded_h (`torch.Tensor` of shape [batch_size, subsequence_size]):
                Tensor of unpadded image heights.
            image_unpadded_w (`torch.Tensor` of shape [batch_size, subsequence_size]):
                Tensor of unpadded image widths.
            image_placeholder_id (int):
                The id of the image placeholder token. Comes from an associated tokenizer.
            image_newline_id (int):
                The id of the image newline token. Comes from an associated tokenizer.
            variable_sized (bool):
                Whether to process images as variable-sized.
            patch_size (`dict[str, int]`, *optional*, defaults to `self.patch_size`):
                Size of the patches.
        rH   Nrx   ry   r   r   r   )r   r   )dtyperI   )r&   r   )dimT)as_tuplerd   )rX   )r   rv   ranger   r   mathceilr   rH   fullint32rI   r   	unsqueezesqueezer   catrW   tensorcount_nonzeroarangeint64type_as	full_likenonzeror1   )'rD   r   r   r   r   r   r   r   rv   r   r   r   batch_image_patchesbatch_image_input_idsbatch_indexre   rf   subseq_indexr&   r   r   new_hnew_wr   tensor_of_image_idsr   newline_idsrg   rh   sample_image_input_idsindex_offsetper_batch_indicesper_subsequence_indicessubseq_image_input_idspatches_maskindicesindices_in_stream_per_batch!indices_in_stream_per_subsequencepatches_indss'                                          r'   preprocess_with_tokenizer_infoz1FuyuImageProcessor.preprocess_with_tokenizer_infoS  s   > 	$	***#-#9ZZt
$.x$8*W:Mk ,.8::< !21!566 /	6 /	6K OM %k&7&: ; ; )k )k l!:; (k'\(ABE05AA+L% A !$( I&6{L7P&QT`&`aadpp! ! !$' I&6{L7P&QT_&_``cnn! ! !&aaa%%&7 8495k"&"6"6L^i"6"j"jK*/*$';5;WbWi+ + +' #118J8J1KKSSTUVVG&'-*:::::% 
N.A.I.I"k]hNh.i.i+&+j06q91=,"'+#.#5	' ' ' /4i9Lk8Z`a.b.b.b+.A.I.I".M.M+MM5'***#**+>???!((1111#**5<%+VaVh+i+i+ijjjj!((999&&}5555 CE%HJ+&; 	P 	P"L "&(#*@ , ,&59MM#1,??,{%+NdNkllltt* 
 /4o>TVX.Y.Y+49ODZ\^4_4_1$}\DIII!L<Cl<R+L9BI1,?!(()DEEE'../PQQQ+)001BCCC/667NOOOO #8!41N7Z 
 
 
 	
r)   )rj   ri   NNr!   )r^   r_   r`   ra   model_input_namesr   BILINEARboolr   dictrU   rV   floatr   r#   r|   npndarrayr   r
   r   r   FIRSTr   r   r   r   r1   r
  __classcell__)r}   s   @r'   rc   rc      s       + +Z   )-'9'B"&!03/2 '/3` `` tCH~&` %	`
 ` ` ` ` %e,-` U+,` ` ` T#s(^,` ` ` ` ` `D (:'B>BDH8 8z8 38n8 %	8
 eC)9$9:;8 $E#/?*?$@A8 
8 8 8 8| !$>BDH$ $z$ 38n$ 	$
 $ eC)9$9:;$ $E#/?*?$@A$ 
$ $ $ $L %$&& %))-15!%)-&*'+&*%)%)*./3>N>TDH/3#sG sG D>sG tCH~&	sG
 -.sG sG  sG smsG tnsG UOsG E?sG TNsG !sG T#s(^,sG eC)9$9:;sG  $E#/?*?$@A!sG" !,#sG sG sG '&sGj C c xX\]`be]eXfOg sv    2 N cSVh@X dr    F 04B
 B
#B
 &B
 )	B

 )B
 "B
 B
 B
 T#s(^,B
 
B
 B
 B
 B
 B
 B
 B
 B
r)   rc   )+ra   r   typingr   r   numpyr  image_processing_utilsr   r   r   image_transformsr	   r
   r   image_utilsr   r   r   r   r   r   r   r   r   r   utilsr   r   r   r   r   r   r   rH   
get_loggerr^   r   r#   r/   r1   rc   __all__r+   r)   r'   <module>r     sP   & %  " " " " " " " "     U U U U U U U U U U         
                                          LLL 
	H	%	%a$tJ'($z*:JFGa	$z
a a a aj j j j j| j j jZ_
 _
 _
 _
 _
+ _
 _
 _
D  
 r)   