
     `i!                     P   d dl Z d dlmZ d dlmZmZmZ ddlmZm	Z	m
Z
mZmZmZ ddlmZmZ  e            rd dlmZ dd	lmZ  e
            r
d dlZdd
lmZ  e	            rddlmZ ddlmZ  ej        e          Z e ed                     G d de                      ZdS )    N)UserDict)AnyUnionoverload   )add_end_docstringsis_tf_availableis_torch_availableis_vision_availableloggingrequires_backends   )Pipelinebuild_pipeline_init_args)Image)
load_image)6MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES)9TF_MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES)stable_softmaxT)has_image_processorc                       e Zd ZdZdZdZdZdZ fdZe	de
edf         dee         ded	eeeef                  fd
            Ze	de
ee         ed         f         dee         ded	eeeeef                           fd            Zde
eee         ded         f         dee         ded	e
eeeef                  eeeeef                           f         f fdZddZ	 	 	 	 ddZd Zd Z xZS )#ZeroShotImageClassificationPipelineaL  
    Zero shot image classification pipeline using `CLIPModel`. This pipeline predicts the class of an image when you
    provide an image and a set of `candidate_labels`.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> classifier = pipeline(model="google/siglip-so400m-patch14-384")
    >>> classifier(
    ...     "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
    ...     candidate_labels=["animals", "humans", "landscape"],
    ... )
    [{'score': 0.965, 'label': 'animals'}, {'score': 0.03, 'label': 'humans'}, {'score': 0.005, 'label': 'landscape'}]

    >>> classifier(
    ...     "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
    ...     candidate_labels=["black and white", "photorealist", "painting"],
    ... )
    [{'score': 0.996, 'label': 'black and white'}, {'score': 0.003, 'label': 'photorealist'}, {'score': 0.0, 'label': 'painting'}]
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

    This image classification pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"zero-shot-image-classification"`.

    See the list of available models on
    [huggingface.co/models](https://huggingface.co/models?filter=zero-shot-image-classification).
    FTc                      t                      j        di | t          | d           |                     | j        dk    rt
          nt                     d S )Nvisiontf )super__init__r   check_model_type	frameworkr   r   )selfkwargs	__class__s     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/pipelines/zero_shot_image_classification.pyr   z,ZeroShotImageClassificationPipeline.__init__H   sk    ""6"""$)))~%% FEG	
 	
 	
 	
 	
    imagezImage.Imagecandidate_labelsr"   returnc                     d S Nr   r!   r&   r'   r"   s       r$   __call__z,ZeroShotImageClassificationPipeline.__call__R   s	      #sr%   c                     d S r*   r   r+   s       r$   r,   z,ZeroShotImageClassificationPipeline.__call__W   s	     &)Sr%   c                     d|v r|                     d          }|t          d           t                      j        |fd|i|S )a  
        Assign labels to the image(s) passed as inputs.

        Args:
            image (`str`, `list[str]`, `PIL.Image` or `list[PIL.Image]`):
                The pipeline handles three types of images:

                - A string containing a http link pointing to an image
                - A string containing a local path to an image
                - An image loaded in PIL directly

            candidate_labels (`list[str]`):
                The candidate labels for this image. They will be formatted using *hypothesis_template*.

            hypothesis_template (`str`, *optional*, defaults to `"This is a photo of {}"`):
                The format used in conjunction with *candidate_labels* to attempt the image classification by
                replacing the placeholder with the candidate_labels. Pass "{}" if *candidate_labels* are
                already formatted.

            timeout (`float`, *optional*, defaults to None):
                The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
                the call may block forever.

        Return:
            A list of dictionaries containing one entry per proposed label. Each dictionary contains the
            following keys:
            - **label** (`str`) -- One of the suggested *candidate_labels*.
            - **score** (`float`) -- The score attributed by the model to that label. It is a value between
                0 and 1, computed as the `softmax` of `logits_per_image`.
        imagesNzSCannot call the zero-shot-image-classification pipeline without an images argument!r'   )pop
ValueErrorr   r,   )r!   r&   r'   r"   r#   s       r$   r,   z,ZeroShotImageClassificationPipeline.__call__\   s[    J vJJx((E=rsssuwwSS8HSFSSSr%   Nc                     i }d|v r|d         |d<   d|v r|d         |d<   d|v r|d         |d<   |t          j        dt                     ||d<   |i i fS )Nr'   timeouthypothesis_templatez^The `tokenizer_kwargs` argument is deprecated and will be removed in version 5 of Transformerstokenizer_kwargs)warningswarnFutureWarning)r!   r5   r"   preprocess_paramss       r$   _sanitize_parametersz8ZeroShotImageClassificationPipeline._sanitize_parameters   s    ''4:;M4N01+1)+<i( F**7=>S7T34'Mp   5E01 "b((r%   This is a photo of {}.c                    |i }t          ||          }|                     |g| j                  }| j        dk    r|                    | j                  }||d<   fd|D             }ddi}d| j        j        j        v r|                    d	d
d           |                    |            | j	        |fd| j        i|}	|	g|d<   |S )N)r3   )r/   return_tensorsptr'   c                 :    g | ]}                     |          S r   )format).0xr4   s     r$   
<listcomp>zBZeroShotImageClassificationPipeline.preprocess.<locals>.<listcomp>   s(    MMMq(//22MMMr%   paddingTsiglip
max_length@   )rD   rF   
truncationr=   text_inputs)
r   image_processorr    todtypemodelconfig
model_typeupdate	tokenizer)
r!   r&   r'   r4   r3   r5   inputs	sequencestokenizer_default_kwargsrI   s
      `      r$   
preprocessz.ZeroShotImageClassificationPipeline.preprocess   s    #!5'222%%eWT^%TT>T!!YYtz**F%5!"MMMM<LMMM	$-t#4 tz(333$++LR\`+aaa ''(8999$dnYjjt~jQijj!,}r%   c                     |                     d          }|                     d          }t          |d         t                    r	|d         }n|d         d         } | j        di ||}||j        d}|S )Nr'   rI   r   )r'   logitsr   )r0   
isinstancer   rM   logits_per_image)r!   model_inputsr'   rI   outputsmodel_outputss         r$   _forwardz,ZeroShotImageClassificationPipeline._forward   s    '++,>??"&&}55k!nh// 	,%a.KK &a.+K$*;;{;l;; !1.
 
 r%   c                    |                     d          }|d         d         }| j        dk    rgd| j        j        j        v rTt          j        |                              d          }|                                }t          |t                    s|g}n| j        dk    rV|                    d                              d          }|                                }t          |t                    s|g}nZ| j        dk    r8t          |d	          }|                                                                }nt          d
| j                   d t          t!          ||          d           D             }|S )Nr'   rW   r   r>   rE   )dimr   )axiszUnsupported framework: c                     g | ]
\  }}||d S ))scorelabelr   )rA   rc   candidate_labels      r$   rC   zCZeroShotImageClassificationPipeline.postprocess.<locals>.<listcomp>   s4     
 
 
& o66
 
 
r%   c                     | d          S )Nr   r   )rB   s    r$   <lambda>zAZeroShotImageClassificationPipeline.postprocess.<locals>.<lambda>   s    _`ab_c^c r%   )key)r0   r    rM   rN   rO   torchsigmoidsqueezetolistrX   listsoftmaxr   numpyr1   sortedzip)r!   r\   r'   rW   probsscoresresults          r$   postprocessz/ZeroShotImageClassificationPipeline.postprocess   sr   (,,-?@@x(+>T!!h$*2C2N&N&NM&))11"55E\\^^Ffd++ " ^t##NNrN**22266E\\^^Ffd++ " ^t##"6333E[[]]))++FFGt~GGHHH
 
*0V=M1N1NTcTc*d*d*d
 
 
 r%   r*   )Nr;   NN)__name__
__module____qualname____doc___load_processor_load_image_processor_load_feature_extractor_load_tokenizerr   r   r   strrm   r   dictr,   r:   rU   r]   ru   __classcell__)r#   s   @r$   r   r   !   s        @ O #O
 
 
 
 
 #3-.#BFs)#WZ#	d38n	# # # X# )49d=&99:)NRSVi)cf)	d4S>"	#) ) ) X))TS$s)]D4GGH)T s))T 	)T
 
tDcN#T$tCH~*>%??	@)T )T )T )T )T )TV) ) ) )( 4   0  "      r%   r   ) r6   collectionsr   typingr   r   r   utilsr   r	   r
   r   r   r   baser   r   PILr   image_utilsr   ri   models.auto.modeling_autor   models.auto.modeling_tf_autor   tf_utilsr   
get_loggerrv   loggerr   r   r%   r$   <module>r      s                ' ' ' ' ' ' ' ' ' '                5 4 4 4 4 4 4 4  )(((((( cLLLbbbbbb? *hhhhhh))))))		H	%	% ,,FFFGGv v v v v( v v HGv v vr%   