
    %`i/Q              %          d dl Z d dlZd dlmZ d dlmZmZ d dlZddlm	Z	 ddl
mZ 	  e	d           d	Zn# eef$ r d
ZY nw xY weZ ed d          Z G d d          Z G d d          Zdeeef         ddfdZdej        dej        dej        dej        dej        dej        defdZdej        dej        deeef         dej        fdZdd	d d d d ded	d d defded ed!ed"ed#ed$ed%ed&eeef         d'ed(ed)ed*edeeef         d+edeej        ej        ef         fd,Zdedeee         ee         ef         fd-Zdedefd.Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d=d/ej        d ed!ed"ed#ed$ed%ed&eeef         d0ed1ed(ed)ed*edeeef         d2ed3edeej        ej        f         f"d4Z d/ej        deee         ee         ef         fd5Z!d/ej        defd6Z"	 	 	 d>ded8eeef         d9eeeef                  d:edeej        ej        e#eef         f         f
d;Z$	 d?ded:edeeee         ee         f         ee         f         fd<Z%dS )@    N)Fraction)OptionalUnion   )_load_library   ) _raise_video_deprecation_warningvideo_readerTFc                   4    e Zd ZeedZddgZdededdfdZdS )Timebase	numeratordenominatorr   r   returnNc                 "    || _         || _        d S )Nr   )selfr   r   s      m/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchvision/io/_video_opt.py__init__zTimebase.__init__   s    
 #&    )__name__
__module____qualname__int__annotations__	__slots__r    r   r   r   r      s[        $'<<Om,I'' ' 
	' ' ' ' ' 'r   r   c            	       4    e Zd ZeeeeeeeedZg dZddZdS )VideoMetaData)	has_videovideo_timebasevideo_duration	video_fps	has_audioaudio_timebaseaudio_durationaudio_sample_rater   Nc                     d| _         t          dd          | _        d| _        d| _        d| _        t          dd          | _        d| _        d| _        d S )NFr   r   g        )	r   r   r    r!   r"   r#   r$   r%   r&   )r   s    r   r   zVideoMetaData.__init__;   sU    &q!nn!&q!nn!!$r   )r   N)	r   r   r   boolr   floatr   r   r   r   r   r   r   r   %   s]        """	 	O	 	 	I% % % % % %r   r   	pts_ranger   c                     | d         | d         cxk    rdk    r%n d S t          d| d          d| d                    d S )Nr   r   z=Start pts should not be smaller than end pts, got start pts: z and end pts: )
ValueError)r*   s    r   _validate_ptsr-   F   sh    |il&&&&Q&&&&&&vIVWLvvhqrshtvv
 
 	
 '&r   	vtimebasevfps	vduration	atimebaseasample_rate	adurationc                    t                      }|                                 dk    rt          t          | d                                                   t          | d                                                             |_        | d                                         t          | d                                                   z  }|                                dk    r0d|_        t          |                                          |z  |_        |                                dk    r&t          |                                          |_	        |                                dk    rt          t          |d                                                   t          |d                                                             |_
        |d                                         t          |d                                                   z  }|                                dk    r0d|_        t          |                                          |z  |_        |                                dk    r&t          |                                          |_        |S )zE
    Build update VideoMetaData struct with info about the video
    r   r   T)r   numelr   r   itemr    r)   r   r!   r"   r$   r#   r%   r&   )r.   r/   r0   r1   r2   r3   metatimebases           r   
_fill_infor9   N   s    ??D1&s9Q<+<+<+>+>'?'?Yq\EVEVEXEXAYAYZZQ<$$&&y|/@/@/B/B)C)CC??q  !DN"'	(8(8"9"9H"DDzz||atyy{{++1&s9Q<+<+<+>+>'?'?Yq\EVEVEXEXAYAYZZQ<$$&&y|/@/@/B/B)C)CC??q  !DN"'	(8(8"9"9H"DDa!&|'8'8':':!;!;Kr   aframes
aframe_ptsaudio_pts_rangec                 n   |d         |d         }}|                      d          }t          ||z
  dz             t          |          z  }d}|}||d         k     rt          |d         |z
  |z            }|d         dk    r'||d         k    rt          |d         |z
  |z            }| ||d d f         S )Nr   r   )sizer)   r   )	r:   r;   r<   startendnum_samplesstep_per_aframes_idxe_idxs	            r   _align_audio_framesrF   n   s     A
23E,,q//KC%K!O,,u[/A/AAOEEq!!!_Q'%/?BCCqRC/!*<$<$<_Q'#-@AA5;>""r         ?r   r>   filenameseek_frame_marginread_video_streamvideo_widthvideo_heightvideo_min_dimensionvideo_max_dimensionvideo_pts_ranger    read_audio_streamaudio_samplesaudio_channelsr$   c                    t                       t          |           t          |           t          j        j                            | |d||||||d         |d         |j        |j        |	|
||d         |d         |j        |j                  }|\
  }}}}}}}}}}t          ||||||          }|	                                dk    rt          |||          }|||fS )ab  
    Reads a video from a file, returning both the video frames and the audio frames

    Args:
    filename (str): path to the video file
    seek_frame_margin (double, optional): seeking frame in the stream is imprecise. Thus,
        when video_start_pts is specified, we seek the pts earlier by seek_frame_margin seconds
    read_video_stream (int, optional): whether read video stream. If yes, set to 1. Otherwise, 0
    video_width/video_height/video_min_dimension/video_max_dimension (int): together decide
        the size of decoded frames:

            - When video_width = 0, video_height = 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the original frame resolution
            - When video_width = 0, video_height = 0, video_min_dimension != 0,
                and video_max_dimension = 0, keep the aspect ratio and resize the
                frame so that shorter edge size is video_min_dimension
            - When video_width = 0, video_height = 0, video_min_dimension = 0,
                and video_max_dimension != 0, keep the aspect ratio and resize
                the frame so that longer edge size is video_max_dimension
            - When video_width = 0, video_height = 0, video_min_dimension != 0,
                and video_max_dimension != 0, resize the frame so that shorter
                edge size is video_min_dimension, and longer edge size is
                video_max_dimension. The aspect ratio may not be preserved
            - When video_width = 0, video_height != 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the aspect ratio and resize
                the frame so that frame video_height is $video_height
            - When video_width != 0, video_height == 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the aspect ratio and resize
                the frame so that frame video_width is $video_width
            - When video_width != 0, video_height != 0, video_min_dimension = 0,
                and video_max_dimension = 0, resize the frame so that frame
                video_width and  video_height are set to $video_width and
                $video_height, respectively
    video_pts_range (list(int), optional): the start and end presentation timestamp of video stream
    video_timebase (Fraction, optional): a Fraction rational number which denotes timebase in video stream
    read_audio_stream (int, optional): whether read audio stream. If yes, set to 1. Otherwise, 0
    audio_samples (int, optional): audio sampling rate
    audio_channels (int optional): audio channels
    audio_pts_range (list(int), optional): the start and end presentation timestamp of audio stream
    audio_timebase (Fraction, optional): a Fraction rational number which denotes time base in audio stream

    Returns
        vframes (Tensor[T, H, W, C]): the `T` video frames
        aframes (Tensor[L, K]): the audio frames, where `L` is the number of points and
            `K` is the number of audio_channels
        info (Dict): metadata for the video and audio. Can contain the fields video_fps (float)
            and audio_fps (int)
    r   r   )r	   r-   torchopsr
   read_video_from_filer   r   r9   r5   rF   )rI   rJ   rK   rL   rM   rN   rO   rP   r    rQ   rR   rS   r<   r$   resultvframes_vframe_ptsr.   r/   r0   r:   r;   r1   r2   r3   infos                             r   _read_video_from_filer\   }   s   @ %&&&/"""/"""Y#88	 " "' F* qwmG[)T9gz9Vbdmiy)\9UUD}}%gz?KKGT!!r   c                 R   t           j        j                            | dddddddddddddddddd          }|\
  }}}}}}}}	}
}t	          ||||	|
|          }|                                                                }|                                                                }|||fS )z
    Decode all video- and audio frames in the video. Only pts
    (presentation timestamp) is returned. The actual frame pixel data is not
    copied. Thus, it is much faster than read_video(...)
    r   r   r>   )rU   rV   r
   rW   r9   numpytolist)rI   rX   _vframes
vframe_ptsr.   r/   r0   _aframesr;   r1   r2   r3   r[   s                r    _read_video_timestamps_from_filerc      s     Y#88								
						
		' F* rxnHj)T9h
IWceniy)\9UUD!!##**,,J!!##**,,Jz4''r   c                     t                       t          j        j                            |           }|\  }}}}}}t          ||||||          }|S )zO
    Probe a video file and return VideoMetaData with info about the video
    )r	   rU   rV   r
   probe_video_from_filer9   )	rI   rX   r.   r/   r0   r1   r2   r3   r[   s	            r   _probe_video_from_filerf     sV     %&&&Y#99(CCFEKBItY	<iy)\9UUDKr   
video_datavideo_timebase_numeratorvideo_timebase_denominatoraudio_timebase_numeratoraudio_timebase_denominatorc                 `   t                       t          |           t          |           t          | t          j                  sat          j                    5  t          j        dd           t          j        | t          j	                  } ddd           n# 1 swxY w Y   t          j
        j                            | |d||||||d         |d         ||	|
|||d         |d         ||          }|\
  }}}}}}}}}}|                                dk    rt          |||          }||fS )a  
    Reads a video from memory, returning both the video frames as the audio frames
    This function is torchscriptable.

    Args:
    video_data (data type could be 1) torch.Tensor, dtype=torch.int8 or 2) python bytes):
        compressed video content stored in either 1) torch.Tensor 2) python bytes
    seek_frame_margin (double, optional): seeking frame in the stream is imprecise.
        Thus, when video_start_pts is specified, we seek the pts earlier by seek_frame_margin seconds
    read_video_stream (int, optional): whether read video stream. If yes, set to 1. Otherwise, 0
    video_width/video_height/video_min_dimension/video_max_dimension (int): together decide
        the size of decoded frames:

            - When video_width = 0, video_height = 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the original frame resolution
            - When video_width = 0, video_height = 0, video_min_dimension != 0,
                and video_max_dimension = 0, keep the aspect ratio and resize the
                frame so that shorter edge size is video_min_dimension
            - When video_width = 0, video_height = 0, video_min_dimension = 0,
                and video_max_dimension != 0, keep the aspect ratio and resize
                the frame so that longer edge size is video_max_dimension
            - When video_width = 0, video_height = 0, video_min_dimension != 0,
                and video_max_dimension != 0, resize the frame so that shorter
                edge size is video_min_dimension, and longer edge size is
                video_max_dimension. The aspect ratio may not be preserved
            - When video_width = 0, video_height != 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the aspect ratio and resize
                the frame so that frame video_height is $video_height
            - When video_width != 0, video_height == 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the aspect ratio and resize
                the frame so that frame video_width is $video_width
            - When video_width != 0, video_height != 0, video_min_dimension = 0,
                and video_max_dimension = 0, resize the frame so that frame
                video_width and  video_height are set to $video_width and
                $video_height, respectively
    video_pts_range (list(int), optional): the start and end presentation timestamp of video stream
    video_timebase_numerator / video_timebase_denominator (float, optional): a rational
        number which denotes timebase in video stream
    read_audio_stream (int, optional): whether read audio stream. If yes, set to 1. Otherwise, 0
    audio_samples (int, optional): audio sampling rate
    audio_channels (int optional): audio audio_channels
    audio_pts_range (list(int), optional): the start and end presentation timestamp of audio stream
    audio_timebase_numerator / audio_timebase_denominator (float, optional):
        a rational number which denotes time base in audio stream

    Returns:
        vframes (Tensor[T, H, W, C]): the `T` video frames
        aframes (Tensor[L, K]): the audio frames, where `L` is the number of points and
            `K` is the number of channels
    ignore The given buffer is not writablemessagedtypeNr   r   )r	   r-   
isinstancerU   Tensorwarningscatch_warningsfilterwarnings
frombufferuint8rV   r
   read_video_from_memoryr5   rF   )rg   rJ   rK   rL   rM   rN   rO   rP   rh   ri   rQ   rR   rS   r<   rj   rk   rX   rY   rZ   r.   r/   r0   r:   r;   r1   r2   r3   s                              r   _read_video_from_memoryr{     s   J %&&&/"""/"""j%,// I$&& 	I 	I#H6XYYYY)*EKHHHJ	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I
 Y#::	 " "' F, qwmG[)T9gz9Vbdm}}%gz?KKGs   7BB!$B!c                 d   t          | t          j                  sat          j                    5  t          j        dd           t          j        | t          j                  } ddd           n# 1 swxY w Y   t          j        j	        
                    | dddddddddddddddddd          }t                       |\
  }}}}}}}}	}
}t          ||||	|
|          }|                                                                }|                                                                }|||fS )	z
    Decode all frames in the video. Only pts (presentation timestamp) is returned.
    The actual frame pixel data is not copied. Thus, read_video_timestamps(...)
    is much faster than read_video(...)
    rm   rn   ro   rq   Nr   r   r>   )rs   rU   rt   ru   rv   rw   rx   ry   rV   r
   rz   r	   r9   r^   r_   )rg   rX   r`   ra   r.   r/   r0   rb   r;   r1   r2   r3   r[   s                r   "_read_video_timestamps_from_memoryr}   z  s    j%,// I$&& 	I 	I#H6XYYYY)*EKHHHJ	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I Y#::								
						
		' F* %&&&qwnHj)T9h
IWceniy)\9UUD!!##**,,J!!##**,,Jz4''s   7A11A58A5c                    t                       t          | t          j                  sat	          j                    5  t	          j        dd           t          j        | t          j                  } ddd           n# 1 swxY w Y   t          j	        j
                            |           }|\  }}}}}}t          ||||||          }|S )zy
    Probe a video in memory and return VideoMetaData with info about the video
    This function is torchscriptable
    rm   rn   ro   rq   N)r	   rs   rU   rt   ru   rv   rw   rx   ry   rV   r
   probe_video_from_memoryr9   )	rg   rX   r.   r/   r0   r1   r2   r3   r[   s	            r   _probe_video_from_memoryr     s    %&&&j%,// I$&& 	I 	I#H6XYYYY)*EKHHHJ	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I Y#;;JGGFEKBItY	<iy)\9UUDKs   7A??BBpts	start_ptsend_ptspts_unitc           	         t                       t          d          dk    rt          j        d           t	          |           }|j        }|j        }fd}d}t          }	|r/t          |j	        j
        |j	        j                  }	 ||	          }d}
t          }|r/t          |j        j
        |j        j                  } ||          }
t          | d||	d|
|          \  }}}i }|r
|j        |d<   |r
|j        |d	<   |||fS )
Ninfr   mThe pts_unit 'pts' gives wrong results and will be removed in a follow-up version. Please use pts_unit 'sec'.c                    }}dk    rat          t          j        d| z  z                      }|t          d          k    r't          t          j        d| z  z                      }|t          d          k    rd}||fS )Nsecr   r   r>   )r   mathfloorr)   ceil)	time_basestart_offset
end_offsetr   r   r   s      r   get_ptsz_read_video.<locals>.get_pts  s     
utz)q9}*EFFGGLU5\\)) 7a)m+D!E!EFF
u%%JZ''r   rH   T)rK   rP   r    rQ   r<   r$   r"   	audio_fps)r	   r)   ru   warnrf   r   r#   default_timebaser   r    r   r   r$   r\   r"   r&   )rI   r   r   r   r[   r   r#   r   rP   r    r<   r$   rY   r:   _infos    ```           r   _read_videor     sn    %&&&,,5>	
 	
 	

 "(++DII	( 	( 	( 	( 	( 	( 	( O%N 2!$"5"?ATA`aa!'.11O%N 2!$"5"?ATA`aa!'.112'%'%  GWd E ,!^k 4!3kGU""r   c                    t                       |dk    rt          j        d           t          |           \  }}}|dk    r2t	          |j        j        |j        j                  fd|D             }|j        r|j	        nd }||fS )Nr   r   r   c                     g | ]}|z  S r   r   ).0xvideo_time_bases     r   
<listcomp>z*_read_video_timestamps.<locals>.<listcomp>  s    000qq?"000r   )
r	   ru   r   rc   r   r    r   r   r   r"   )rI   r   r   _r[   r"   r   s         @r   _read_video_timestampsr     s     %&&&5>	
 	
 	
 4H==LCD5"4#6#@$BUBabb0000C000"&.:dI	>r   )rG   r   r   r   r   r   rH   r   r   r   r   r   rH   r   r   )r   Nr   )r   )&r   ru   	fractionsr   typingr   r   rU   	extensionr   _video_deprecation_warningr	   _HAS_CPU_VIDEO_DECODERImportErrorOSError_HAS_VIDEO_OPTr   r   r   tupler   r-   rt   r9   rF   strr)   r(   r\   listrc   rf   r{   r}   r   dictr   r   r   r   r   <module>r      s           " " " " " " " "  % % % % % % H H H H H H#M.!!!!W # # #"# (8Aq>> 

' 
' 
' 
' 
' 
' 
' 
'% % % % % % % %B
U38_ 
 
 
 
 
|
, | |	
 , |    @#\#',|#FKCQTHo#
\# # # #"  $"  './"'./^" ^"^"^" ^" 	^"
 ^" ^" ^" 38_^" ^" ^" ^" ^" 38_^" ^" 5<}45^" ^" ^" ^"B (s  (uT#YS	S`=`7a  (  (  (  (FS ]      $  '.$%&''.$%&'!k kkk k 	k
 k k k 38_k "k !$k k k k 38_k "k  !$!k" 5<%&#k k k k\((((
49d3i./(( (( (( ((V   * )*04	;# ;#;#UH_%;# eE8O,-;# 	;#
 5<tCJ'778;# ;# ;# ;#~ $)  
5cDN*+Xe_<=     s   6 	AA