
    Pi,B                        U d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	 d dl
mZmZmZmZmZ d dlZd dlZddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZmZm Z  er
d dl!Z"ddl#m$Z$ da%ee&e'                  e(d<   ej)        dk    rdndZ* ej+        d           ej+        d           ej+        d           ej+        d           ej+        d           ej+        d           ej+        d           ej+        d           ej+        d           ej+        d           ej+        d           ej+        d           ej+        d           ej+        d          gZ,e G d  d!                      Z-d"e&e'         fd#Z.d$d%d"e/fd&Z0d$d%d"e1fd'Z2d(ej3        d"e1fd)Z4d*ee&e'         e&e1         e&ej3                 e&d%         f         d"e&e1         fd+Z5dS ),    N)	dataclassfield)BytesIO)Path)TYPE_CHECKINGAnyClassVarOptionalUnion   )config)DownloadConfig)
array_cast)is_local_pathxopen)first_non_null_valueno_op_if_value_is_nullstring_to_dict   )FeatureType_IMAGE_COMPRESSION_FORMATSlittle<>z|b1|u1z<u2z>u2z<i2z>i2z<u4z>u4z<i4z>i4z<f4z>f4z<f8z>f8c                      e Zd ZU dZdZee         ed<   dZe	ed<    e
dd          Zee         ed<   d	Zee         ed
<    ej         ej                     ej                    d          Zee         ed<    e
d dd          Zeed<   d Zdeeeeeej        d	f         defdZddedd	fdZdedeedf         f         fdZdeej        ej         ej!        f         dej         fdZ"ddej         dej         fdZ#dS )Imagea=  Image [`Feature`] to read image data from an image file.

    Input: The Image feature accepts as input:
    - A `str`: Absolute path to the image file (i.e. random access is allowed).
    - A `pathlib.Path`: path to the image file (i.e. random access is allowed).
    - A `dict` with the keys:

        - `path`: String with relative path of the image file to the archive file.
        - `bytes`: Bytes of the image file.

      This is useful for parquet or webdataset files which embed image files.

    - An `np.ndarray`: NumPy array representing an image.
    - A `PIL.Image.Image`: PIL image object.

    Output: The Image features output data as `PIL.Image.Image` objects.

    Args:
        mode (`str`, *optional*):
            The mode to convert the image to. If `None`, the native mode of the image is used.
        decode (`bool`, defaults to `True`):
            Whether to decode the image data. If `False`,
            returns the underlying dictionary in the format `{"path": image_path, "bytes": image_bytes}`.

    Examples:

    ```py
    >>> from datasets import load_dataset, Image
    >>> ds = load_dataset("AI-Lab-Makerere/beans", split="train")
    >>> ds.features["image"]
    Image(decode=True, id=None)
    >>> ds[0]["image"]
    <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=500x500 at 0x15E52E7F0>
    >>> ds = ds.cast_column('image', Image(decode=False))
    {'bytes': None,
     'path': '/root/.cache/huggingface/datasets/downloads/extracted/b0a21163f78769a2cf11f58dfc767fb458fc7cea5c05dccc0144a2c0f0bc1292/train/healthy/healthy_train.85.jpg'}
    ```
    NmodeTdecodeF)defaultrepridPIL.Image.Imagedtypebytespathpa_type)r    initr!   _typec                     | j         S N)r(   )selfs    k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/datasets/features/image.py__call__zImage.__call___   s
    |    valuereturnc                    t           j        rddl}nt          d          t	          |t
                    rt          j        |          }t	          |t                    r|ddS t	          |t                    r$t          |
                                          ddS t	          |t          t          f          rd|dS t	          |t          j                  rt          |          S t	          ||j        j                  rt!          |          S |                    d          =t$          j                            |d                   rd|                    d          dS |                    d          |                    d          +|                    d          |                    d          dS t+          d| d	          )
a   Encode example into a format for Arrow.

        Args:
            value (`str`, `np.ndarray`, `PIL.Image.Image` or `dict`):
                Data passed as input to Image feature.

        Returns:
            `dict` with "path" and "bytes" fields
        r   N4To support encoding images, please install 'Pillow'.r'   r&   r'   r%   r&   zUAn image sample should have one of 'path' or 'bytes' but they are missing or None in .)r   PIL_AVAILABLE	PIL.ImageImportError
isinstancelistnparraystrr   absoluter&   	bytearrayndarrayencode_np_arrayr   encode_pil_imagegetosr'   isfile
ValueError)r-   r1   PILs      r.   encode_examplezImage.encode_exampleb   s     	VTUUUeT"" 	$HUOOEeS!! 	!D111t$$ 	 0 011DAAAy122 	 5111rz** 	"5)))sy// 	#E***YYv*rw~~eFm/L/L*!599V+<+<===YYw+uyy/@/@/L"YYw//69J9JKKKphmppp  r0   c                    | j         st          d          t          j        r	ddl}ddl}nt          d          |i }|d         |d         }}|7|t          d| d          t          |          r|j	        
                    |          }n|                    d	          d
         }|                    t          j                  rt          j        nt          j        }t!          ||          }	|	|                    |	d                   nd}
t%          |
          }t'          |d|          5 }t)          |                                          }ddd           n# 1 swxY w Y   |j	        
                    |          }n'|j	        
                    t)          |                    }|                                 |                                                    |j	        j        j        j                  |j                            |          }| j        r*| j        |j        k    r|                    | j                  }|S )aq  Decode example image file into image data.

        Args:
            value (`str` or `dict`):
                A string with the absolute image file path, a dictionary with
                keys:

                - `path`: String with absolute or relative image file path.
                - `bytes`: The bytes of the image file.
            token_per_repo_id (`dict`, *optional*):
                To access and decode
                image files from private repositories on the Hub, you can pass
                a dictionary repo_id (`str`) -> token (`bool` or `str`).

        Returns:
            `PIL.Image.Image`
        zMDecoding is disabled for this feature. Please use Image(decode=True) instead.r   Nz4To support decoding images, please install 'Pillow'.r'   r&   zCAn image should have one of 'path' or 'bytes' but both are None in r6   ::repo_idtokenrbdownload_config)r   RuntimeErrorr   r7   r8   PIL.ImageOpsr9   rG   r   r   opensplit
startswithHF_ENDPOINTHUB_DATASETS_URLHUB_DATASETS_HFFS_URLr   rD   r   r   r   readloadgetexifExifTagsBaseOrientationImageOpsexif_transposer   convert)r-   r1   token_per_repo_idrH   r'   bytes_image
source_urlpatternsource_url_fieldsrO   rR   fs                r.   decode_examplezImage.decode_example   sn   $ { 	pnooo 	VTUUU$ "V}eGnf>| !ogl!o!o!oppp && 3INN400EE!%D!1!1"!5J &001CDD://#9 
 )7z7(K(K%O`Ol)--.?	.JKKKrv  '55&A&A&AOtT?KKK 3q!(!2!23 3 3 3 3 3 3 3 3 3 3 3 3 3 3INN622EEINN76??33E

==??sy16BCCOL//66E9 	-ej00MM$),,Es   5"E##E'*E'r   c                 N    ddl m} | j        r| n |d           |d          dS )zfIf in the decodable state, return the feature itself, otherwise flatten the feature into a dictionary.r   )Valuebinarystringr%   )featuresrm   r   )r-   rm   s     r.   flattenzImage.flatten   sK    ###### {DD xh 	
r0   storagec                    t           j                            |j                  rR	 |                    t          j                              }n*# t           j        $ r}t          d|           |d}~ww xY wt           j                            |j                  rrt          j	        dgt          |          z  t          j                              }t           j                            ||gddg|                                          }nt           j                            |j                  rrt          j	        dgt          |          z  t          j                              }t           j                            ||gddg|                                          }nt           j                            |j                  r|j                            d          dk    r|                    d          }n8t          j	        dgt          |          z  t          j                              }|j                            d          dk    r|                    d          }n8t          j	        dgt          |          z  t          j                              }t           j                            ||gddg|                                          }nt           j                            |j                  rt          j	        d |                                D             t          j                              }t          j	        dgt          |          z  t          j                              }t           j                            ||gddg|                                          }t+          || j                  S )	a  Cast an Arrow array to the Image arrow storage type.
        The Arrow types that can be converted to the Image pyarrow storage type are:

        - `pa.string()` - it must contain the "path" data
        - `pa.large_string()` - it must contain the "path" data (will be cast to string if possible)
        - `pa.binary()` - it must contain the image bytes
        - `pa.struct({"bytes": pa.binary()})`
        - `pa.struct({"path": pa.string()})`
        - `pa.struct({"bytes": pa.binary(), "path": pa.string()})`  - order doesn't matter
        - `pa.list(*)` - it must contain the image array data

        Args:
            storage (`Union[pa.StringArray, pa.StructArray, pa.ListArray]`):
                PyArrow array to cast.

        Returns:
            `pa.StructArray`: Array in the Image arrow storage type, that is
                `pa.struct({"bytes": pa.binary(), "path": pa.string()})`.
        zvFailed to cast large_string to string for Image feature. This can happen if string values exceed 2GB. Original error: Ntyper&   r'   maskr   c                 d    g | ]-}|'t          t          j        |                    d         nd .S )Nr&   )rB   r<   r=   ).0arrs     r.   
<listcomp>z&Image.cast_storage.<locals>.<listcomp>  s8    uuuZ]CO#//88QUuuur0   )patypesis_large_stringru   castro   ArrowInvalidrG   	is_stringr=   lenrn   StructArrayfrom_arraysis_null	is_binary	is_structget_field_indexr   is_list	to_pylistr   r(   )r-   rr   ebytes_array
path_arrays        r.   cast_storagezImage.cast_storage   s=   ( 8##GL11 	!,,ry{{33?    +'(+ +  	 8gl++ 	(D6CLL#8ry{{KKKKn00+w1G'SYIZahapaparar0ssGGX-- 	4&3w<<"7bikkJJJJn00':1FRXHY`g`o`o`q`q0rrGGX-- 	|++G4499%mmG44 hvG'<29;;OOO|++F33q88$]]622

Xtfs7||&;")++NNN
n00+z1JWV\L]dkdsdsdudu0vvGGXgl++ 	(uuahararatatuuuY[[  K 4&3w<<"7bikkJJJJn00j)GV+<;CVCVCXCX 1  G '4<000s   &A A4A//A4c                    i t           fd            t          j        fd|                                D             t          j                              }t          j        d |                    d                                          D             t          j                              }t          j                            ||gddg|	                                          }t          || j                  S )	a8  Embed image files into the Arrow array.

        Args:
            storage (`pa.StructArray`):
                PyArrow array to embed.

        Returns:
            `pa.StructArray`: Array in the Image arrow storage type, that is
                `pa.struct({"bytes": pa.binary(), "path": pa.string()})`.
        Nc                    |                      d          d         }|                    t          j                  rt          j        nt          j        }t          ||          }|                    |d                   nd }t          |          }t          | d|          5 }|
                                cd d d            S # 1 swxY w Y   d S )NrK   rL   rM   rN   rP   rQ   )rV   rW   r   rX   rY   rZ   r   rD   r   r   r[   )r'   rg   rh   ri   rO   rR   rj   rd   s          r.   path_to_bytesz*Image.embed_storage.<locals>.path_to_bytes  s   D))"-J+5+@+@AS+T+Tv''Z`Zv  !/z7 C CK\Kh%))*;I*FGGGnrE,5999OtT?CCC  qvvxx                                   s   %CC
C
c                 Z    g | ]'}|!|d          |d                   n	|d         nd (S )Nr&   r'    )ry   xr   s     r.   r{   z'Image.embed_storage.<locals>.<listcomp>(  sU        UVTaQwZ-?qy)))QwZZgk  r0   rt   c                 T    g | ]%}|t           j                            |          nd &S r,   )rE   r'   basename)ry   r'   s     r.   r{   z'Image.embed_storage.<locals>.<listcomp>/  s2    pppdt'7RWd###Tpppr0   r'   r&   rv   )r   r|   r=   r   rn   r   ro   r   r   r   r   r(   )r-   rr   rd   r   r   r   s     `  @r.   embed_storagezImage.embed_storage  s    $ "			  		  		  		  
 			  h    **,,   
 
 
 Xppgmm\bNcNcNmNmNoNoppp
 
 

 .,,k:-FRXHY`k`s`s`u`u,vv'4<000r0   r,   )$__name__
__module____qualname____doc__r   r
   r>   __annotations__r   boolr   r"   r$   r	   r|   structrn   ro   r(   r   r*   r/   r   r&   r@   dictr<   rA   rI   rk   rq   StringArrayr   	ListArrayr   r   r   r0   r.   r   r   .   s        % %N D(3-FDd777B777,E8C=,,,&RYibikk'R'RSSGXc]SSSwU???E3???  'E#uirzSd*d$e 'jn ' ' ' 'R; ;D ;EV ; ; ; ;z
}d33E.FFG 
 
 
 
61E".".",*V$W 61\^\j 61 61 61 61p&1 &1R^ &1PRP^ &1 &1 &1 &1 &1 &1r0   r   r2   c                  X   t           j        rdd l} nt          d          t          }| j                                         t          t          | j        j	        
                                          t          | j        j        
                                          z            at          S )Nr   r4   )r   r7   r8   r9   r   r   r)   r;   setOPENkeysSAVE)rH   s    r.   list_image_compression_formatsr   6  s     RPQQQ ")	%)#cin.A.A.C.C*D*Ds39>K^K^K`K`GaGa*a%b%b"%%r0   rf   r#   c                     t                      }| j        t                      v r| j        }n| j        dv rdnd}|                     ||           |                                S )zmConvert a PIL Image object to bytes using native compression if possible, otherwise use PNG/TIFF compression.)1LLARGBRGBAPNGTIFF)format)r   r   r   r   savegetvalue)rf   bufferr   s      r.   image_to_bytesr   C  sd    YYF|57777*(GGGV	JJvfJ%%%??r0   c                 p    t          | d          r| j        dk    r
| j        d dS d t          |           dS )Nfilename r5   )hasattrr   r   )rf   s    r.   rC   rC   N  sF    uj!! >en&:&:666~e'<'<===r0   r=   c                    t           j        rdd l}nt          d          | j        }|j        dk    r|j        nt          }|j        }|j        }d }| j	        dd          rP|dvrt          d| d| d          t          j        d	          }||k    rt          j        d
| d| d           n|t          v r|}n|dk    rk||z   t          |          z   }t          j        |          t          v r0t          j        |          }t          j        d
| d| d           n|dz  }|dk    k|t          d| dt                     |j                            |                     |                    }d t'          |          dS )Nr   r4   =r   )uizUnsupported array dtype z for image encoding. Only z' is supported for multi-channel arrays.r   zDowncasting array dtype z to z to be compatible with 'Pillow'r   zCannot downcast dtype z- to a valid image dtype. Valid image dtypes: r5   )r   r7   r8   r9   r$   	byteorder_NATIVE_BYTEORDERkinditemsizeshape	TypeErrorr<   warningswarn_VALID_IMAGE_ARRAY_DTPYESr>   r   	fromarrayastyper   )	r=   rH   r$   dtype_byteorder
dtype_kinddtype_itemsize
dest_dtype	dtype_strrf   s	            r.   rB   rB   U  s    RPQQQKE).C)?)?eooEVOJ^NJ {122 Z''5J   Xe__
JMkUkk
kkklll	+	+	+

!!'*4s>7J7JJIx	""&???Xi00
oooJoooppp1$ !! xxx]vxx   IZ 8 899E>%#8#8999r0   objsc                    t           j        rddl}nt          d          | rt	          |           \  }}t          |t                    rd | D             S t          |t          j                  r"t          t                    fd| D             S t          ||j        j                  r"t          t                    fd| D             S | S | S )zmEncode a list of objects into a format suitable for creating an extension array of type `ImageExtensionType`.r   Nr4   c                      g | ]}||d dnd S )Nr5   r   )ry   objs     r.   r{   z2objects_to_list_of_image_dicts.<locals>.<listcomp>  s*    ^^^RUCOS4000^^^r0   c                 &    g | ]} |          S r   r   ry   r   obj_to_image_dict_funcs     r.   r{   z2objects_to_list_of_image_dicts.<locals>.<listcomp>  %    @@@C**3//@@@r0   c                 &    g | ]} |          S r   r   r   s     r.   r{   z2objects_to_list_of_image_dicts.<locals>.<listcomp>  r   r0   )r   r7   r8   r9   r   r:   r>   r<   rA   r   rB   r   rC   )r   rH   _r   r   s       @r.   objects_to_list_of_image_dictsr     s      RPQQQ %d++3c3 	_^^Y]^^^^c2:&& 	%;O%L%L"@@@@4@@@@SY_-- 	%;<L%M%M"@@@@4@@@@Kr0   )6rE   sysr   dataclassesr   r   ior   pathlibr   typingr   r   r	   r
   r   numpyr<   pyarrowr|   r   r   download.download_configr   tabler   utils.file_utilsr   r   utils.py_utilsr   r   r   r8   rH   rp   r   r   r;   r>   r   r   r   r$   r   r   r   r&   r   r   rC   rA   rB   r   r   r0   r.   <module>r      sB   					 



  ( ( ( ( ( ( ( (             @ @ @ @ @ @ @ @ @ @ @ @ @ @               5 5 5 5 5 5       3 3 3 3 3 3 3 3 Y Y Y Y Y Y Y Y Y Y  &%%%%%% 37 HT#Y/ 6 6 6=H44CC#  BHUOOBHUOOBHUOOBHUOOBHUOOBHUOOBHUOOBHUOOBHUOOBHUOOBHUOOBHUOOBHUOOBHUOO $ D1 D1 D1 D1 D1 D1 D1 D1N
&S	 
& 
& 
& 
&+     >- >$ > > > >(:2: (:$ (: (: (: (:V
S	4:tBJ'7>O9PP
Q	$Z     r0   