
     `i!                        d dl mZmZmZmZ ddlmZmZmZm	Z	m
Z
 ddlmZmZ  e            rddlmZ  e            rd dlZddlmZmZ erd d	lmZ  e	j        e          Z e ed
                     G d de                      ZdS )    )TYPE_CHECKINGAnyUnionoverload   )add_end_docstringsis_torch_availableis_vision_availableloggingrequires_backends   )Pipelinebuild_pipeline_init_args)
load_imageN)(MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES,MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES)ImageT)has_image_processorc                       e Zd ZdZdZdZdZdZ fdZd Z	e
deedf         d	ed
edeeeef                  fd            Ze
deee         ed         f         d	ed
edeeeeef                           fd            Zdeeeeef                  eeeeef                           f         f fdZddZd ZddZdddeeef         fdZ xZS )ObjectDetectionPipelinea  
    Object detection pipeline using any `AutoModelForObjectDetection`. This pipeline predicts bounding boxes of objects
    and their classes.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> detector = pipeline(model="facebook/detr-resnet-50")
    >>> detector("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png")
    [{'score': 0.997, 'label': 'bird', 'box': {'xmin': 69, 'ymin': 171, 'xmax': 396, 'ymax': 507}}, {'score': 0.999, 'label': 'bird', 'box': {'xmin': 398, 'ymin': 105, 'xmax': 767, 'ymax': 507}}]

    >>> # x, y  are expressed relative to the top left hand corner.
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

    This object detection pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"object-detection"`.

    See the list of available models on [huggingface.co/models](https://huggingface.co/models?filter=object-detection).
    FTNc                 $    t                      j        |i | | j        dk    rt          d| j         d          t          | d           t          j                    }|                    t                     | 
                    |           d S )NtfzThe z is only available in PyTorch.vision)super__init__	framework
ValueError	__class__r   r   copyupdater   check_model_type)selfargskwargsmappingr   s       {/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/pipelines/object_detection.pyr   z ObjectDetectionPipeline.__init__8   s    $)&)))>T!!RDNRRRSSS$))):?AACDDDg&&&&&    c                 P    i }d|v r|d         |d<   i }d|v r|d         |d<   |i |fS )Ntimeout	threshold )r"   r$   preprocess_paramspostprocess_kwargss       r&   _sanitize_parametersz,ObjectDetectionPipeline._sanitize_parametersC   sR    +1)+<i(&  .4[.A{+ "&888r'   imagezImage.Imager#   r$   returnc                     d S Nr+   r"   r/   r#   r$   s       r&   __call__z ObjectDetectionPipeline.__call__L   s    mpmpr'   c                     d S r2   r+   r3   s       r&   r4   z ObjectDetectionPipeline.__call__O   s	     &)Sr'   c                 v    d|v rd|vr|                     d          |d<    t                      j        |i |S )ai  
        Detect objects (bounding boxes & classes) in the image(s) passed as inputs.

        Args:
            inputs (`str`, `list[str]`, `PIL.Image` or `list[PIL.Image]`):
                The pipeline handles three types of images:

                - A string containing an HTTP(S) link pointing to an image
                - A string containing a local path to an image
                - An image loaded in PIL directly

                The pipeline accepts either a single image or a batch of images. Images in a batch must all be in the
                same format: all as HTTP(S) links, all as local paths, or all as PIL images.
            threshold (`float`, *optional*, defaults to 0.5):
                The probability necessary to make a prediction.
            timeout (`float`, *optional*, defaults to None):
                The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
                the call may block forever.

        Return:
            A list of dictionaries or a list of list of dictionaries containing the result. If the input is a single
            image, will return a list of dictionaries, if the input is a list of several images, will return a list of
            list of dictionaries corresponding to each image.

            The dictionaries contain the following keys:

            - **label** (`str`) -- The class label identified by the model.
            - **score** (`float`) -- The score attributed by the model for that label.
            - **box** (`list[dict[str, int]]`) -- The bounding box of detected object in image's original size.
        imagesinputs)popr   r4   )r"   r#   r$   r   s      r&   r4   z ObjectDetectionPipeline.__call__T   sM    @ v(&"8"8%zz(33F8uww0000r'   c                 D   t          ||          }t          j        |j        |j        gg          }|                     |gd          }| j        dk    r|                    | j                  }| j	        $| 	                    |d         |d         d          }||d<   |S )N)r)   pt)r7   return_tensorswordsboxes)textr>   r<   target_size)
r   torch	IntTensorheightwidthimage_processorr   todtype	tokenizer)r"   r/   r)   r@   r8   s        r&   
preprocessz"ObjectDetectionPipeline.preprocessx   s    5'222oek'B&CDD%%eWT%JJ>T!!YYtz**F>%^^w`d^eeF +}r'   c                     |                     d          } | j        di |}|                    d|i|          }| j        |d         |d<   |S )Nr@   bboxr+   )r9   modelr   rH   )r"   model_inputsr@   outputsmodel_outputss        r&   _forwardz ObjectDetectionPipeline._forward   sf    "&&}55$*,,|,,))=+*Q*QRR>%$0$8M&!r'         ?c                 L    |d         } j         |d                                         \   fd|d                             d                              d                              d          \  }} fd|                                D             }fd|d	                             d          D             }g d
fdt          |                                ||          D             }n j                            ||          }	|	d         }
|
d         }|
d         }|
d         }|                                |
d<    fd|D             |
d<    fd|D             |
d<   g d
fdt          |
d         |
d         |
d                   D             }|S )Nr@   r   c           
                               t          j        | d         z  dz  | d         z  dz  | d         z  dz  | d         z  dz  g                    S )Nr   i  r   r      )_get_bounding_boxrA   Tensor)rK   rC   r"   rD   s    r&   unnormalizez8ObjectDetectionPipeline.postprocess.<locals>.unnormalize   sq    --L"T!W_t3#d1g-4"T!W_t3#d1g-4	 	 	 	r'   logits)dimc                 >    g | ]}j         j        j        |         S r+   )rL   configid2label).0
predictionr"   s     r&   
<listcomp>z7ObjectDetectionPipeline.postprocess.<locals>.<listcomp>   s&    ```dj'0<```r'   c                 &    g | ]} |          S r+   r+   )r^   rK   rW   s     r&   r`   z7ObjectDetectionPipeline.postprocess.<locals>.<listcomp>   s#    TTT4[[&&TTTr'   rK   )scorelabelboxc                 b    g | ]+}|d          k    t          t          |                    ,S )r   dictzip)r^   valskeysr*   s     r&   r`   z7ObjectDetectionPipeline.postprocess.<locals>.<listcomp>   s;    wwwDcghicjmvcvcv$s4//cvcvcvr'   scoreslabelsr>   c                 b    g | ]+}j         j        j        |                                         ,S r+   )rL   r\   r]   item)r^   rc   r"   s     r&   r`   z7ObjectDetectionPipeline.postprocess.<locals>.<listcomp>   s/    'e'e'eUZ
(9(B5::<<(P'e'e'er'   c                 :    g | ]}                     |          S r+   )rU   )r^   rd   r"   s     r&   r`   z7ObjectDetectionPipeline.postprocess.<locals>.<listcomp>   s'    &T&T&Tst'='=c'B'B&T&T&Tr'   c                 J    g | ]}t          t          |                     S r+   rf   )r^   ri   rj   s     r&   r`   z7ObjectDetectionPipeline.postprocess.<locals>.<listcomp>   s9        St__%%  r'   )rH   tolistsqueezesoftmaxmaxrh   rE   post_process_object_detection)r"   rO   r*   r@   rk   classesrl   r>   
annotationraw_annotationsraw_annotationrC   rj   rW   rD   s   ` `        @@@@r&   postprocessz#ObjectDetectionPipeline.postprocess   s   #M2>% (N1133MFE
 
 
 
 
 
 
 ,H5==a@@HHRHPPTTY[T\\OFG````w~~O_O_```FTTTT=3H3P3PQR3S3STTTE,,,DwwwwwCQWY^<_<_wwwJJ #2PPQ^`ikvwwO,Q/N#H-F#H-F"7+E'-}}N8$'e'e'e'e^d'e'e'eN8$&T&T&T&Te&T&T&TN7# -,,D   x 8.:RTbcjTkll  J
 r'   rd   ztorch.Tensorc                     | j         dk    rt          d          |                                                                \  }}}}||||d}|S )a%  
        Turns list [xmin, xmax, ymin, ymax] into dict { "xmin": xmin, ... }

        Args:
            box (`torch.Tensor`): Tensor containing the coordinates in corners format.

        Returns:
            bbox (`dict[str, int]`): Dict containing the coordinates in corners format.
        r;   z9The ObjectDetectionPipeline is only available in PyTorch.)xminyminxmaxymax)r   r   intrq   )r"   rd   r|   r}   r~   r   rK   s          r&   rU   z)ObjectDetectionPipeline._get_bounding_box   sa     >T!!XYYY!$!1!1!3!3dD$	
 
 r'   r2   )rQ   )__name__
__module____qualname____doc___load_processor_load_image_processor_load_feature_extractor_load_tokenizerr   r.   r   r   strr   listrg   r4   rI   rP   rz   r   rU   __classcell__)r   s   @r&   r   r      s        0 O #O	' 	' 	' 	' 	'9 9 9 peC$67ppspW[\`adfiai\jWkppp Xp)49d=&99:)CF)RU)	d4S>"	#) ) ) X)"15d38n1EtDQUVY[^V^Q_L`Ga1a+b "1 "1 "1 "1 "1 "1H	 	 	 	  + + + +Z^ S#X        r'   r   )typingr   r   r   r   utilsr   r	   r
   r   r   baser   r   image_utilsr   rA   models.auto.modeling_autor   r   PILr   
get_loggerr   loggerr   r+   r'   r&   <module>r      sh   6 6 6 6 6 6 6 6 6 6 6 6 k k k k k k k k k k k k k k 4 4 4 4 4 4 4 4  )((((((  LLL       
  		H	%	% ,,FFFGGq q q q qh q q HGq q qr'   