
     `i                         d Z ddlmZmZ ddlZddlmZ ddlm	Z	 ddl
mZmZmZmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZmZmZ  G d de          Z G d de          ZdgZdS )z(Fast Video processor class for InternVL.    )OptionalUnionN)
functional   )BatchFeature)OPENAI_CLIP_MEANOPENAI_CLIP_STDPILImageResamplingSizeDict)UnpackVideosKwargs)
TensorType)BaseVideoProcessor)VideoMetadatagroup_videos_by_shapereorder_videosc                   ,    e Zd ZU eeeef         ed<   dS ) InternVLVideoProcessorInitKwargsinitial_shiftN)__name__
__module____qualname__r   boolfloatint__annotations__     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/internvl/video_processing_internvl.pyr   r      s*         uc)******r   r   c                       e Zd Zej        ZeZeZ	dddZ
dZdZdZdZdZdZeZdgZdee         f fdZ	 	 	 dd	ed
ee         deeeef                  deeeeef                  fdZ	 d ded         dedededed         dedededededeeeee         f                  deeeee         f                  deeee f                  de!fdZ" xZ#S )!InternVLVideoProcessori  )heightwidthTFpixel_values_videoskwargsc                 :     t                      j        di | d S )Nr   )super__init__)selfr%   	__class__s     r   r(   zInternVLVideoProcessor.__init__0   s&    ""6"""""r   Nmetadata
num_framesfpsr   c                 R   ||n| j         }||n| j        }|j        }|4|2||j        t	          d          t          ||j        z  |z            }|du r||z  dz  }||k    rt	          d| d| d          t          j        ||||z                                            }|S )a  
        Default sampling function which uniformly samples the desired number of frames between 0 and total number of frames.
        If `fps` is passed along with metadata, `fps` frames per second are sampled uniformty. Arguments `num_frames`
        and `fps` are mutually exclusive.

        Args:
            metadata (`VideoMetadata`):
                Metadata of the video containing information about total duration, fps and total number of frames.
            num_frames (`int`, *optional*):
                Maximum number of frames to sample. Defaults to `self.num_frames`.
            fps (`int` or `float`, *optional*):
                Target frames to sample per second. Defaults to `self.fps`.
            initial_shift (`bool`, `float` or `int`, defaults to `self.initial_shift`):
                The initial shift to apply when sampling frames. If `True`, the shift is set so that frames are sampled from the middle of the video.

        Returns:
            np.ndarray:
                Indices to sample video frames.
        NzAsked to sample `fps` frames per second but no video metadata was provided which is required when sampling with `fps`. Please pass in `VideoMetadata` object or use a fixed `num_frames` per input videoT   z(Video can't be sampled. The `num_frames=z` exceeds `total_num_frames=z`. )r,   r   total_num_framesr-   
ValueErrorr   torcharange)r)   r+   r,   r-   r   r%   r0   indicess           r   sample_framesz$InternVLVideoProcessor.sample_frames3   s    6 $.#9ZZt
)6)BHZ#4 #/8<#7 h   -<sBCCJD  ,z9A=M(((x:xxcsxxx   ,}.>@PS]@]^^bbddr   videosztorch.Tensordo_convert_rgb	do_resizesizeinterpolationzF.InterpolationModedo_center_crop	crop_size
do_rescalerescale_factordo_normalize
image_mean	image_stdreturn_tensorsreturnc           	      .   t          |          \  }}i }|                                D ];\  }}|r|                     |          }|r|                     |||          }|||<   <t	          ||          }t          |          \  }}i }|                                D ]<\  }}|r|                     ||          }|                     |||	|
||          }|||<   =t	          ||          }|rt          j        |d          n|}t          d|i|          S )N)r9   r:   r   )dimr$   )datatensor_type)
r   itemsconvert_to_rgbresizer   center_croprescale_and_normalizer2   stackr   )r)   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   r%   grouped_videosgrouped_videos_indexresized_videos_groupedshapestacked_videosresized_videosprocessed_videos_groupedprocessed_videoss                          r   _preprocessz"InternVLVideoProcessor._preprocessf   su   $ 0EV/L/L,,!#%3%9%9%;%; 	; 	;!E> E!%!4!4^!D!D e!%^$Vc!d!d,:"5))'(>@TUU 0E^/T/T,,#% %3%9%9%;%; 	= 	=!E> M!%!1!1.)!L!L!77
NL*V_ N /=$U++)*BDXYYCQg5;'7Q????Wg"79I!JXfggggr   )NNN)N)$r   r   r   r
   BICUBICresampler   r@   r	   rA   r9   r8   r=   r?   r7   r   do_sample_framesr   valid_kwargsmodel_input_namesr   r(   r   r   r   r   r   r   r5   listr   strr   r   rV   __classcell__)r*   s   @r   r!   r!   "   s       !)H!JIC((DIJLNM3L./#(H!I # # # # # # %)+/;?1 11 SM1 eCJ'(	1
  dE3&6 781 1 1 1B <@,h ,h^$,h ,h 	,h
 ,h   56,h ,h ,h ,h ,h ,h U5$u+#567,h E%e"456,h !sJ!78,h  
!,h ,h ,h ,h ,h ,h ,h ,hr   r!   )__doc__typingr   r   r2   torchvision.transforms.v2r   Fimage_processing_utilsr   image_utilsr   r	   r
   r   processing_utilsr   r   utilsr   video_processing_utilsr   video_utilsr   r   r   r   r!   __all__r   r   r   <module>rj      sD   / . " " " " " " " "  5 5 5 5 5 5 2 2 2 2 2 2 Z Z Z Z Z Z Z Z Z Z Z Z 4 4 4 4 4 4 4 4       8 8 8 8 8 8 O O O O O O O O O O+ + + + +| + + +ph ph ph ph ph/ ph ph phf $
$r   