
     `i.              
       <   d dl Z d dlmZ d dlmZmZmZ d dlZddlm	Z	 ddl
mZmZ ddlmZmZ ddlmZmZmZmZ dd	lmZmZmZ d
dlmZ erddlmZ d dlmZ dddee ee          f         de!de def
dZ" G d de          Z#e G d de                      Z$dgZ%dS )    N)Iterable)TYPE_CHECKINGOptionalUnion   )BatchFeature)BaseImageProcessorFastDefaultFastImageProcessorKwargs)group_images_by_shapereorder_images)IMAGENET_STANDARD_MEANIMAGENET_STANDARD_STDPILImageResamplingSizeDict)
TensorTypeauto_docstringrequires_backends   )BeitImageProcessorFast)DepthEstimatorOutput)
functionalinput_imagetorch.Tensoroutput_sizekeep_aspect_ratiomultiplereturnc                    dd}| j         dd          \  }}|\  }}||z  }	||z  }
|r+t          d|
z
            t          d|	z
            k     r|
}	n|	}
 ||	|z  |          } ||
|z  |          }t          ||          S )Nr   c                     t          | |z            |z  }| ||k    rt          j        | |z            |z  }||k     rt          j        | |z            |z  }|S N)roundmathfloorceil)valr   min_valmax_valxs        w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/dpt/modular_dpt.pyconstrain_to_multiple_ofz>get_resize_output_image_size.<locals>.constrain_to_multiple_of4   sd    #.!!H,1w;;
3>**X5Aw;;	#.))H4A       )r   heightwidth)r   N)shapeabsr   )r   r   r   r   r*   input_heightinput_widthoutput_heightoutput_widthscale_heightscale_width
new_height	new_widths                r)   get_resize_output_image_sizer;   .   s    	 	 	 	 !, 1"## 6L+"-M< !</L,K 'q;#a,&6"7"777&LL 'K)),*EPXYYYJ(({)BXVVVI:Y7777r+   c                   l    e Zd ZU dZee         ed<   ee         ed<   ee         ed<   ee         ed<   dS )DPTFastImageProcessorKwargsa  
    ensure_multiple_of (`int`, *optional*, defaults to 1):
        If `do_resize` is `True`, the image is resized to a size that is a multiple of this value. Can be overridden
        by `ensure_multiple_of` in `preprocess`.
    size_divisor (`int`, *optional*):
        If `do_pad` is `True`, pads the image dimensions to be divisible by this value. This was introduced in the
        DINOv2 paper, which uses the model in combination with DPT.
    keep_aspect_ratio (`bool`, *optional*, defaults to `False`):
        If `True`, the image is resized to the largest possible size such that the aspect ratio is preserved. Can
        be overridden by `keep_aspect_ratio` in `preprocess`.
    do_reduce_labels (`bool`, *optional*, defaults to `self.do_reduce_labels`):
        Whether or not to reduce all label values of segmentation maps by 1. Usually used for datasets where 0
        is used for background, and background itself is not included in all classes of a dataset (e.g.
        ADE20k). The background label will be replaced by 255.
    ensure_multiple_ofsize_divisorr   do_reduce_labelsN)__name__
__module____qualname____doc__r   int__annotations__bool r+   r)   r=   r=   U   s^            !%%%3-~%%%tn$$$$$r+   r=   c            '       &   e Zd Zej        ZeZeZ	dddZ
dZdZdZdZdZdZdZdZdZdZdZeZ	 	 	 	 d&dd	d
eded         dedee         dedd	fdZ	 d'dd	dedd	fdZded	         deded
eded         dedededededeeeee         f                  deeeee         f                  dedee         dedee         dee         d eee e!f                  de"f&d!Z#	 d(d"d#d$eee!ee$eef                  df                  dee%e e!f                  fd%Z&dS ))DPTImageProcessorFasti  r.   TFgp?r-   Nimager   sizeinterpolationzF.InterpolationMode	antialiasr>   r   r   c                     |j         r|j        s$t          d|                                           t	          ||j         |j        f||          }t          j        | ||||          S )a<  
        Resize an image to `(size["height"], size["width"])`.

        Args:
            image (`torch.Tensor`):
                Image to resize.
            size (`SizeDict`):
                Dictionary in the format `{"height": int, "width": int}` specifying the size of the output image.
            interpolation (`InterpolationMode`, *optional*, defaults to `InterpolationMode.BILINEAR`):
                `InterpolationMode` filter to use when resizing the image e.g. `InterpolationMode.BICUBIC`.
            antialias (`bool`, *optional*, defaults to `True`):
                Whether to use antialiasing when resizing the image
            ensure_multiple_of (`int`, *optional*):
                If `do_resize` is `True`, the image is resized to a size that is a multiple of this value
            keep_aspect_ratio (`bool`, *optional*, defaults to `False`):
                If `True`, and `do_resize` is `True`, the image is resized to the largest possible size such that the aspect ratio is preserved.

        Returns:
            `torch.Tensor`: The resized image.
        zDThe size dictionary must contain the keys 'height' and 'width'. Got )r   r   r   )rM   rN   )r/   r0   
ValueErrorkeysr;   r	   resize)selfrK   rL   rM   rN   r>   r   r   s           r)   rR   zDPTImageProcessorFast.resize   s    : { 	s$* 	sqdhdmdmdodoqqrrr2dj1/'	
 
 
 &,%MY
 
 
 	
r+   r?   c                     |j         dd         \  }}d } |||          \  }} |||          \  }}	|||	|f}
t          j        ||
          S )a  
        Center pad a batch of images to be a multiple of `size_divisor`.

        Args:
            image (`torch.Tensor`):
                Image to pad.  Can be a batch of images of dimensions (N, C, H, W) or a single image of dimensions (C, H, W).
            size_divisor (`int`):
                The width and height of the image will be padded to a multiple of this number.
        r,   Nc                 \    t          j        | |z            |z  }|| z
  }|dz  }||z
  }||fS )Nr   )r"   r$   )rL   r?   new_sizepad_sizepad_size_leftpad_size_rights         r)   _get_padz1DPTImageProcessorFast.pad_image.<locals>._get_pad   sB    y!455DH$H$MM%5N .00r+   )r1   Fpad)rS   rK   r?   r/   r0   rZ   pad_top
pad_bottompad_left	pad_rightpaddings              r)   	pad_imagezDPTImageProcessorFast.pad_image   su     BCC(	1 	1 	1 'hv|<<&hul;;)Wi<uUG$$$r+   imagesr@   	do_resizedo_center_crop	crop_size
do_rescalerescale_factordo_normalize
image_mean	image_stddo_paddisable_groupingreturn_tensorsc           	      h   |r|                      |          }t          ||          \  }}i }|                                D ]&\  }}|r|                     |||||          }|||<   't	          ||          }t          ||          \  }}i }|                                D ]T\  }}|r|                     ||          }|r|                     ||          }|                     |||	|
||          }|||<   Ut	          ||          }|rt          j	        |d          n|}t          d|i          S )N)rm   )rK   rL   rM   r>   r   r   )dimpixel_values)data)reduce_labelr   itemsrR   r   center_croprb   rescale_and_normalizetorchstackr   )rS   rc   r@   rd   rL   rM   re   rf   rg   rh   ri   rj   rk   r   r>   rl   r?   rm   rn   kwargsgrouped_imagesgrouped_images_indexresized_images_groupedr1   stacked_imagesresized_imagesprocessed_images_groupedprocessed_imagess                               r)   _preprocessz!DPTImageProcessorFast._preprocess   s   ,  	/&&v..F 0EV^n/o/o/o,,!#%3%9%9%;%; 		; 		;!E> !%("/'9&7 "- " " -;"5))'(>@TUU 0E^fv/w/w/w,,#% %3%9%9%;%; 		= 		=!E> M!%!1!1.)!L!L N!%!M!M!77
NL*V_ N /=$U++)*BDXYYCQg5;'7Q????Wg.2B!CDDDDr+   outputsr   target_sizesc                    t          | d           |j        }|/t          |          t          |          k    rt          d          g }|dgt          |          z  n|}t	          ||          D ]~\  }}|`t
          j        j                            |	                    d          	                    d          |dd          
                                }|                    d	|i           |S )
a  
        Converts the raw output of [`DepthEstimatorOutput`] into final depth predictions and depth PIL images.
        Only supports PyTorch.

        Args:
            outputs ([`DepthEstimatorOutput`]):
                Raw outputs of the model.
            target_sizes (`TensorType` or `List[Tuple[int, int]]`, *optional*):
                Tensor of shape `(batch_size, 2)` or list of tuples (`Tuple[int, int]`) containing the target size
                (height, width) of each image in the batch. If left to None, predictions will not be resized.

        Returns:
            `List[Dict[str, TensorType]]`: A list of dictionaries of tensors representing the processed depth
            predictions.
        rw   Nz]Make sure that you pass in as many target sizes as the batch dimension of the predicted depthr   r-   bicubicF)rL   modealign_cornerspredicted_depth)r   r   lenrP   ziprw   nnr   interpolate	unsqueezesqueezeappend)rS   r   r   r   resultsdepthtarget_sizes          r)   post_process_depth_estimationz3DPTImageProcessorFast.post_process_depth_estimation  s   ( 	$(((!1$3+?+?3|CTCT+T+To   8D8LvO 4 444R^"%o|"D"D 	7 	7E;&+77OOA&&0033+Iej 8  '))  NN-u56666r+   )NTr-   F)r-   r    )'rA   rB   rC   r   BICUBICresampler   rj   r   rk   rL   rd   rg   ri   rl   rh   r>   r   r@   rf   re   r=   valid_kwargsr   r   rG   rE   rR   rb   listfloatr   strr   r   r   tupledictr   rH   r+   r)   rJ   rJ   l   s       !)H'J%IC((DIJLFNIN.L :>,-"'(
 (
(
 (
   56	(

 (
 %SM(
  (
 
(
 (
 (
 (
Z % %% % 
	% % % %89E^$9E 9E 	9E
 9E   569E 9E 9E 9E 9E 9E U5$u+#5679E E%e"4569E  9E %SM9E  !9E" sm#9E$ #4.%9E& !sJ!78'9E* 
+9E 9E 9E 9E| RV' ''' uZeCHo1F%LMN' 
d3
?#	$	' ' ' ' ' 'r+   rJ   )&r"   collections.abcr   typingr   r   r   rw   image_processing_baser   image_processing_utils_fastr	   r
   image_transformsr   r   image_utilsr   r   r   r   utilsr   r   r   beit.image_processing_beit_fastr   modeling_outputsr   torchvision.transforms.v2r   r[   rE   rG   r;   r=   rJ   __all__rH   r+   r)   <module>r      s  "  $ $ $ $ $ $ 1 1 1 1 1 1 1 1 1 1  1 1 1 1 1 1 b b b b b b b b E E E E E E E E                    
 E D D D D D  9888888 5 5 5 5 5 5$8$8sHSM)*$8 $8 	$8
 $8 $8 $8 $8N% % % % %"A % % %. { { { { {2 { { {| #
#r+   