
     `ic                         d dl mZmZ ddlmZmZ ddlmZmZm	Z	  e            rd dl
mZ ddlmZ  e e	d	          d
           G d de                      ZdS )    )AnyUnion   )add_end_docstringsis_vision_available   )GenericTensorPipelinebuild_pipeline_init_args)Image)
load_imageT)has_image_processora  
        image_processor_kwargs (`dict`, *optional*):
                Additional dictionary of keyword arguments passed along to the image processor e.g.
                {"size": {"height": 100, "width": 100}}
        pool (`bool`, *optional*, defaults to `False`):
            Whether or not to return the pooled output. If `False`, the model will return the raw hidden states.
    c                        e Zd ZdZdZdZdZdZddZdde	e
ef         fdZd Zdd	Zd
ee
ded         ee
         f         dedee         f fdZ xZS )ImageFeatureExtractionPipelinea,  
    Image feature extraction pipeline uses no model head. This pipeline extracts the hidden states from the base
    transformer, which can be used as features in downstream tasks.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> extractor = pipeline(model="google/vit-base-patch16-224", task="image-feature-extraction")
    >>> result = extractor("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png", return_tensors=True)
    >>> result.shape  # This is a tensor of shape [1, sequence_length, hidden_dimension] representing the input image.
    torch.Size([1, 197, 768])
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

    This image feature extraction pipeline can currently be loaded from [`pipeline`] using the task identifier:
    `"image-feature-extraction"`.

    All vision models may be used for this pipeline. See a list of all models, including community-contributed models on
    [huggingface.co/models](https://huggingface.co/models).
    FTNc                 V    |i n|}i }|||d<   |||d<   d|v r|d         |d<   |i |fS )Npoolreturn_tensorstimeout )selfimage_processor_kwargsr   r   kwargspreprocess_paramspostprocess_paramss          /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/pipelines/image_feature_extraction.py_sanitize_parametersz3ImageFeatureExtractionPipeline._sanitize_parameters5   sd    "8"@BBF\)-v&%3A/0+1)+<i( "&888    returnc                     t          ||          } | j        |fd| j        i|}| j        dk    r|                    | j                  }|S )N)r   r   pt)r   image_processor	frameworktodtype)r   imager   r   model_inputss        r   
preprocessz)ImageFeatureExtractionPipeline.preprocessC   s\    5'222+t+Ekk$.kTjkk>T!!'??4:66Lr   c                       | j         di |}|S )Nr   )model)r   r&   model_outputss      r   _forwardz'ImageFeatureExtractionPipeline._forwardJ   s    "
22\22r   c                    ||nd}|rd|vrt          d          |d         }n|d         }|r|S | j        dk    r|                                S | j        dk    r&|                                                                S d S )NFpooler_outputzeNo pooled output was returned. Make sure the model has a `pooler` layer when using the `pool` option.r   r    tf)
ValueErrorr"   tolistnumpy)r   r*   r   r   outputss        r   postprocessz*ImageFeatureExtractionPipeline.postprocessN   s    'ttU 	'm33 {   $O4GG $A&G 	N>T!!>>###^t##==??))+++ $#r   argszImage.Imager   c                 6     t                      j        |i |S )a  
        Extract the features of the input(s).

        Args:
            images (`str`, `list[str]`, `PIL.Image` or `list[PIL.Image]`):
                The pipeline handles three types of images:

                - A string containing a http link pointing to an image
                - A string containing a local path to an image
                - An image loaded in PIL directly

                The pipeline accepts either a single image or a batch of images, which must then be passed as a string.
                Images in a batch must all be in the same format: all as http links, all as local paths, or all as PIL
                images.
            timeout (`float`, *optional*, defaults to None):
                The maximum time in seconds to wait for fetching images from the web. If None, no timeout is used and
                the call may block forever.
        Return:
            A nested list of `float`: The features computed by the model.
        )super__call__)r   r4   r   	__class__s      r   r7   z'ImageFeatureExtractionPipeline.__call__b   s!    *  uww0000r   )NNN)N)NF)__name__
__module____qualname____doc___load_processor_load_image_processor_load_feature_extractor_load_tokenizerr   dictstrr	   r'   r+   r3   r   listr   r7   __classcell__)r8   s   @r   r   r      s         0 O #O9 9 9 9 4PSUbPbKc      , , , ,(1eC]8KTRUY$VW 1cf 1kopskt 1 1 1 1 1 1 1 1 1 1r   r   N)typingr   r   utilsr   r   baser	   r
   r   PILr   image_utilsr   r   r   r   r   <module>rJ      s            ; ; ; ; ; ; ; ; C C C C C C C C C C  )(((((( 666	 	`1 `1 `1 `1 `1X `1 `1	 	`1 `1 `1r   