
     `i;$                         d Z ddlmZmZmZ ddlmZ ddlmZm	Z	 ddl
mZmZ ddlmZmZmZmZmZmZ ddlmZmZ g d	Zg d
Zd Z G d de          ZdgZdS )z"Image processor class for Idefics.    )CallableOptionalUnion)Image   )BaseImageProcessorBatchFeature)resizeto_channel_dimension_format)ChannelDimension
ImageInputPILImageResamplingmake_flat_list_of_imagesto_numpy_arrayvalid_images)
TensorTypeis_torch_available)g3<4'?gwgM?gy{ ?)gB91?gwt.?g	U?c                     | j         dk    r| S |                     d          }t          j        d|j        d          }t          j        ||          }|                    d          }|S )NRGBRGBA)   r   r   )modeconvertr   newsizealpha_composite)image
image_rgba
backgroundr   s       /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/idefics/image_processing_idefics.pyconvert_to_rgbr!   &   sg     zUv&&J6:?ODDJ+J
CCO%--e44O    c                       e Zd ZdZdgZ	 	 	 	 	 	 dded	eeee	e         f                  d
eeee	e         f                  dee         de
deeef         ddf fdZdddddddej        fdedee         deeeef                  d	eeee	e         f                  d
eeee	e         f                  dee         dee
         dee         deeeef                  defdZ xZS )IdeficsImageProcessora  
    Constructs a Idefics image processor.

    Args:
        image_size (`int`, *optional*, defaults to 224):
            Resize to image size
        image_mean (`float` or `list[float]`, *optional*, defaults to `IDEFICS_STANDARD_MEAN`):
            Mean to use if normalizing the image. This is a float or list of floats the length of the number of
            channels in the image. Can be overridden by the `image_mean` parameter in the `preprocess` method. Can be
            overridden by the `image_mean` parameter in the `preprocess` method.
        image_std (`float` or `list[float]`, *optional*, defaults to `IDEFICS_STANDARD_STD`):
            Standard deviation to use if normalizing the image. This is a float or list of floats the length of the
            number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method.
            Can be overridden by the `image_std` parameter in the `preprocess` method.
        image_num_channels (`int`, *optional*, defaults to 3):
            Number of image channels.
        do_rescale (`bool`, *optional*, defaults to `True`):
            Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by `do_rescale` in
            the `preprocess` method.
        rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
            Scale factor to use if rescaling the image. Can be overridden by `rescale_factor` in the `preprocess`
            method.
    pixel_values   Nr   Tp?
image_size
image_mean	image_stdimage_num_channels
do_rescalerescale_factorreturnc                      t                      j        di | || _        || _        ||nt          | _        ||nt          | _        || _        || _	        d S )N )
super__init__r(   r+   IDEFICS_STANDARD_MEANr)   IDEFICS_STANDARD_STDr*   r,   r-   )	selfr(   r)   r*   r+   r,   r-   kwargs	__class__s	           r    r2   zIdeficsImageProcessor.__init__N   sh     	""6"""$"4(2(>**DY&/&;AU$,r"   images	transformreturn_tensorsc
                     ||n j         }||n j        }n j        n j        ||n j        }n j        ||ft          |t                    rt          |          dk    rg S  	                    |          }t          |          }t          |          st          d          Dt                      st          d          ddl}fd|D             }|                    |          S d |D             }d |D             }fd|D             } fd	|D             } fd
|D             }d |D             }t#          d|i|	          d         }|S )a|  
        Preprocess a batch of images.

        Args:
            images (`ImageInput`):
                A list of images to preprocess.
            image_size (`int`, *optional*, defaults to `self.image_size`):
                Resize to image size
            image_num_channels (`int`, *optional*, defaults to `self.image_num_channels`):
                Number of image channels.
            image_mean (`float` or `list[float]`, *optional*, defaults to `IDEFICS_STANDARD_MEAN`):
                Mean to use if normalizing the image. This is a float or list of floats the length of the number of
                channels in the image. Can be overridden by the `image_mean` parameter in the `preprocess` method. Can
                be overridden by the `image_mean` parameter in the `preprocess` method.
            image_std (`float` or `list[float]`, *optional*, defaults to `IDEFICS_STANDARD_STD`):
                Standard deviation to use if normalizing the image. This is a float or list of floats the length of the
                number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess`
                method. Can be overridden by the `image_std` parameter in the `preprocess` method.
            transform (`Callable`, *optional*, defaults to `None`):
                A custom transform function that accepts a single image can be passed for training. For example,
                `torchvision.Compose` can be used to compose multiple transforms. If `None` - an inference mode is
                assumed - and then a preset of inference-specific transforms will be applied to the images
            do_rescale (`bool`, *optional*, defaults to `True`):
                Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by `do_rescale` in
                the `preprocess` method.
            rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
                Scale factor to use if rescaling the image. Can be overridden by `rescale_factor` in the `preprocess`
                method.

        Returns:
            a PyTorch tensor of the processed images

        Nr   zkInvalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray.z.To pass in `transform` torch must be installedc                 &    g | ]} |          S r0   r0   ).0xr9   s     r    
<listcomp>z4IdeficsImageProcessor.preprocess.<locals>.<listcomp>   s!    333qiill333r"   c                 ,    g | ]}t          |          S r0   )r!   r=   r>   s     r    r?   z4IdeficsImageProcessor.preprocess.<locals>.<listcomp>        444.##444r"   c                 ,    g | ]}t          |          S r0   )r   rA   s     r    r?   z4IdeficsImageProcessor.preprocess.<locals>.<listcomp>   rB   r"   c                 H    g | ]}t          |t          j                   S ))resample)r
   r   BICUBIC)r=   r>   r   s     r    r?   z4IdeficsImageProcessor.preprocess.<locals>.<listcomp>   s,    WWW1&D+=+EFFFWWWr"   c                 >    g | ]}                     |           S ))r   scale)rescale)r=   r   r-   r5   s     r    r?   z4IdeficsImageProcessor.preprocess.<locals>.<listcomp>   s)    VVVe$,,U.,AAVVVr"   c                 @    g | ]}                     |           S ))meanstd)	normalize)r=   r>   r)   r*   r5   s     r    r?   z4IdeficsImageProcessor.preprocess.<locals>.<listcomp>   s+    TTT$...CCTTTr"   c                 B    g | ]}t          |t          j                  S r0   )r   r   FIRSTrA   s     r    r?   z4IdeficsImageProcessor.preprocess.<locals>.<listcomp>   s'    YYYQ-a1A1GHHYYYr"   r%   )datatensor_type)r(   r+   r)   r*   r,   r-   
isinstancelistlenfetch_imagesr   r   
ValueErrorr   ImportErrortorchstackr	   )r5   r8   r+   r(   r)   r*   r9   r,   r-   r:   r6   rX   r   s   `   ``` `   @r    
preprocessz IdeficsImageProcessor.preprocessa   s   \ $.#9ZZt
3E3Q//W[Wn#-#9ZZt
!*!6IIDN	#-#9ZZt
+9+E4K^J'fd## 	Fq(8(8I""6**)&11F## 	:    %'' T!"RSSSLLL3333F333F;;v&&& 54V44444V444WWWWPVWWWVVVVVvVVVTTTTTTVTTTYYRXYYYNF#;XXXYghr"   )r&   NNr   Tr'   )__name__
__module____qualname____doc__model_input_namesintr   r   floatrS   boolr2   r   PYTORCHr   dictstrr   rZ   __classcell__)r7   s   @r    r$   r$   3   s        0 (( :>9=,-,3- -- U5$u+#567- E%e"456	-
 %SM- - c5j)- 
- - - - - -, -./3:>9=(,%)*.;E;M\ \\ %SM\ T#s(^,	\
 U5$u+#567\ E%e"456\ H%\ TN\ !\ !sJ!78\ 
\ \ \ \ \ \ \ \r"   r$   N)r^   typingr   r   r   PILr   image_processing_utilsr   r	   image_transformsr
   r   image_utilsr   r   r   r   r   r   utilsr   r   r3   r4   r!   r$   __all__r0   r"   r    <module>rn      sB   ) ( , , , , , , , , , ,       F F F F F F F F C C C C C C C C                4 3 3 3 3 3 3 3 <;; ;;; 
 
 
J J J J J. J J JZ #
#r"   