
    *`i              	       :   d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlZ	d dl
mZ d dlmZmZ d dlmZmZ d dlmZmZ  ej        e          Z e            rd dlZe G d	 d
                      Ze G d d                      Zdeez  defdZdZdZ G d dee          Ze G d d                      Z dej        dej        fdZ!de	j"        de#e$e$e$f         de#e$e$e$f         de	j"        fdZ%dej        de#e&e&f         de	j"        fdZ' G d d          Z(dS )     N)	dataclass)Enum)BytesIO)Image)SerializableImagedownload_image)assert_opencv_installedis_opencv_installed)
ImageChunkImageURLChunkc                   >    e Zd ZU dZee         ed<   ej        ed<   dS )ImageEncodingzA tokenized image.

    Attributes:
        tokens: The token ids.
        image: The image as a numpy array.

    Examples:
        >>> import numpy as np
        >>> image_encoding = ImageEncoding(tokens=[1, 2, 3], image=np.array([[0., 0.5, 1.]]))
    tokensimageN)	__name__
__module____qualname____doc__listint__annotations__npndarray     z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/image.pyr   r      s<         	 	 I:r   r   c                   2    e Zd ZU dZeed<   eed<   eed<   dS )SpecialImageIDsa  Special image tokens ids.

    Attributes:
        img: The image token id.
        img_break: The image break token id.
        img_end: The image end token id.

    Examples:
        >>> special_image_ids = SpecialImageIDs(img=1, img_break=2, img_end=3)
    img	img_breakimg_endN)r   r   r   r   r   r   r   r   r   r   r   &   s7         	 	 
HHHNNNLLLLLr   r   chunkreturnc                    t          | t                    r| j        S |                                                     d          rb|                                                     d          d         }t          j        |          }t          j	        t          |                    S |                                                     d          rHt          j	        t          |                                                     dd          d                    S |                                                     d          r!t          |                                           S t          d	|                                            )
zGet a serializable image from a chunk.

    Args:
        chunk: The chunk to get the image from.

    Returns:
        The image as a PIL Image object.
    z
data:image,   filezfile:// rbhttpzUnsupported image url scheme )
isinstancer   r   get_url
startswithsplitbase64	b64decoder   openr   replacer   RuntimeError)r"   data
image_datas      r   image_from_chunkr6   8   s(    %$$ {}}!!,// /}}$$S))!,%d++
z'*--...}}!!&)) Nz$u}}66y"EEtLLMMM}}!!&)) /emmoo...
Hu}}HH
I
IIr   )g3<4'?gwgM?gy{ ?)gB91?gwt.?g	U?c                   .    e Zd ZdZdZedd            ZdS )MultiModalVersionzVersion of the image tokenizer.m1r#   ImageConfigc                 b    | j         dk    rt          dd          S t          | j                    )Nr9      i   )namer:   NotImplementedErrorselfs    r   configzMultiModalVersion.configY   s1    9r4(((!TY.111r   N)r#   r:   )r   r   r   r   r9   propertyrA   r   r   r   r8   r8   T   s=        **	B2 2 2 X2 2 2r   r8   c                   6    e Zd ZU dZeed<   eed<   dZeed<   dS )r:   z'Configuration for the image tokenizers.image_patch_sizemax_image_sizer&   spatial_merge_sizeN)r   r   r   r   r   r   rF   r   r   r   r:   r:   a   sA         22r   r:   r   c                     | j         dk    r| S | j         dk    r|                     d          } t          j        d| j        d          }|                    | d|            |                    d          S )zUConvert a PIL image to RGB.

    We ensure transparent background becomes white.
    RGBRGBAWHITE)r   r   )modeconvertr   newsizepaste)r   white_bgs     r   _convert_to_rgbrQ   j   ss    
 zUzVf%%!Ifej'BBHNN5&%(((E"""r   np_imagemeanstdc                 6   | dz  } t          | j                  dk    sJ d| j                    | j        d         t          |          cxk    rt          |          k    sn J d| j        d|d|            | |z
  |z  } |                     ddd          S )	a  Normalize a tensor image with mean and standard deviation.

    Args:
        np_image: Image to be normalized.
        mean: Mean for each channel.
        std: Standard deviation for each channel.

    Returns:
        Normalized image with shape (C, H, W).
    g     o@   znp_image.shape=   z, mean=z, std=r   r&   )lenshape	transpose)rR   rS   rT   s      r   	normalizer[   x   s     %Hx~!###%9%9%9###>!D		5555SXX555557\(.7\7\T7\7\VY7\7\55543&HaA&&&r   new_sizec                     t                       t          j        t          j        t          |           t          j                  |t          j                  }t          |t          t                    S )zTransform an image to a numpy array with the given size.

    Args:
        image: Image to be transformed.
        new_size: New size of the image.

    Returns:
        Transformed image with shape (C, H, W).
    )dtype)interpolation)r	   cv2resizer   arrayrQ   float32INTER_CUBICr[   DATASET_MEANDATASET_STD)r   r\   rR   s      r   transform_imagerg      sU     z"(?5#9#9LLLhfifuvvvHX|[999r   c                       e Zd ZdZdededdfdZedefd            Zde	j	        de
eef         fd	Zd
eez  defdZedefd            ZdS )ImageEncoderz&Image encoder for the image tokenizer.image_configspecial_idsr#   Nc                 "    || _         || _        dS )zInitialize the image encoder.

        Args:
            image_config: Configuration for the image tokenizer.
            special_ids: Special image tokens ids.
        N)rj   rk   )r@   rj   rk   s      r   __init__zImageEncoder.__init__   s     )&r   c                     | j         S N)rj   r?   s    r   	mm_configzImageEncoder.mm_config   s    
   r   r   c                 N   |j         \  }}t          || j        j        z  || j        j        z            }|dk    r$t	          ||z            }t	          ||z            }|dz
  | j        j        | j        j        z  z  dz   }|dz
  | j        j        | j        j        z  z  dz   }||fS )Nr&   )rN   maxrj   rE   roundrD   rF   )r@   r   whratiowidth_tokensheight_tokenss          r   _image_to_num_tokensz!ImageEncoder._image_to_num_tokens   s     x1A)88!d>O>^:^__199a%i  Aa%i  AA4#4#EHYHl#lmpqqQD$5$FIZIm$mnqrr]**r   contentc                    t          |          }|                     |          \  }}|dk    sJ |dk    sJ | j        j        g|z  | j        j        gz   |z  }| j        j        |d<   || j        j        z  | j        j        z  || j        j        z  | j        j        z  f}t          ||          }t          ||          S )zConverts an image chunk to an image encoding.

        Args:
            content: image chunk to be converted.

        Returns:
            Image encoding.
        r   )r   r   )r6   ry   rk   r   r    r!   rj   rD   rF   rg   r   )r@   rz   r   rt   ru   image_tokensnew_image_sizeprocessed_images           r   __call__zImageEncoder.__call__   s     !))((//11uuuu1uuuu)-.2d6F6P5QQUVV+3R!22T5F5YY!22T5F5YY
 *%@@LHHHHr   c                     | j         j        S ro   )rk   r   r?   s    r   image_tokenzImageEncoder.image_token   s    ##r   )r   r   r   r   r:   r   rm   rB   rp   r   tupler   ry   r   r   r   r   r   r   r   r   ri   ri      s        11'[ ' 'SW ' ' ' ' !; ! ! ! X!+ +c3h + + + +I
] : I} I I I I, $S $ $ $ X$ $ $r   ri   ))r/   loggingdataclassesr   enumr   ior   numpyr   PILr   mistral_common.imager   r   mistral_common.importsr	   r
   &mistral_common.protocol.instruct.chunkr   r   	getLoggerr   loggerr`   r   r   r6   re   rf   strr8   r:   rQ   r   r   floatr[   r   rg   ri   r   r   r   <module>r      s     ! ! ! ! ! !                       B B B B B B B B O O O O O O O O L L L L L L L L		8	$	$  JJJ                 "JMJ6 J;L J J J J. 32
2 
2 
2 
2 
2T 
2 
2 
2                #5; #5; # # # #'j'
ue#
$' 
ueU"	#' Z	' ' ' '2:5; :%S/ :bj : : : : ;$ ;$ ;$ ;$ ;$ ;$ ;$ ;$ ;$ ;$r   