
    .`iC                     `   d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZmZmZ d dlmZ d dlZd dlmZ d dlZd dlmZmZ d d	lmZmZ d dlmZ d d
lmZm Z  d dl!m"Z" d dl#m$Z$ ddl%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ erddl,m-Z-m.Z.m/Z/ neZ-eZ.eZ/ e"e0          Z1 eej2                  Z3 ej4        e3j5                    ed          Z6 e$            Z7e74                    d           G d d                      Z8dddej9        de:de;de;fdZ<dddej9        de:de;de;fdZ=ddddej        d e;de;dz  de;fd!Z>dd"ddej        d e;de;de;fd#Z?d$dd%ej@        de;de;fd&ZAd$dd%ej@        de;de;fd'ZBd(e/deCeDe;e:f                  fd)ZEdd*d+d,eCe.         d-ejF        jG        d.eHdeeDe;e:e-f         ddf         fd/ZI	 d9d0e;d1eJe;ef         dz  deDej9        e:eKz  f         fd2ZL	 d9d3e;d4eJe;ef         dz  dej        fd5ZM	 d9d6e;d7eJe;ef         dz  deDej@        eJe;ef         f         fd8ZNdS ):    N)	Generator)ThreadPoolExecutor)groupby)Path)TYPE_CHECKINGAnyTypeVar)url2pathname)ImageUnidentifiedImageError)Url	parse_url)HTTPConnectionglobal_http_connection)init_logger)ExtensionManager   )AudioEmbeddingMediaIOAudioMediaIOImageEmbeddingMediaIOImageMediaIOMediaIOVideoMediaIO)BatchedTensorInputsMultiModalKwargsItemMultiModalPlaceholderDict)max_workers_Mhttpc                       e Zd Zdefddddeeeeef         f         dz  dededee         dz  ddf
 fd	Z	d
e
dee         defdZd
e
dee         defdZd
e
ddfdZdddedee         dedz  defdZdddedee         dedz  defdZdedeej        eez  f         fdZdedeej        eez  f         fdZdddededej        fdZdddededej        fdZdddededeej        eeef         f         fdZdddededeej        eeef         f         fdZd ede j!        fd!Z"d ede j!        fd"Z# xZ$S )#MediaConnectorN )allowed_local_media_pathallowed_media_domainsmedia_io_kwargs
connectionr#   r$   returnc                T   t                                                       |r|ni | _        || _        |r^t	          |          }|                                st          d| d          |                                st          d| d          nd}|| _        |g }|| _	        dS )aj  
        Args:
            media_io_kwargs: Additional args passed to process media
                             inputs, keyed by modalities. For example,
                             to set num_frames for video, set
                             `--media-io-kwargs '{"video":{"num_frames":40}}'`
            connection: HTTP connection client to download media contents.
            allowed_local_media_path: A local directory to load media files from.
            allowed_media_domains: If set, only media URLs that belong to this
                                   domain can be used for multi-modal inputs.
        z/Invalid `--allowed-local-media-path`: The path z does not exist.z must be a directory.N)
super__init__r%   r&   r   exists
ValueErroris_dirr#   r$   )selfr%   r&   r#   r$   allowed_local_media_path_	__class__s         i/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/multimodal/utils.pyr*   zMediaConnector.__init__;   s   & 	  /6OOB 	 %# 	-(,-E(F(F%,3355  C0C C C   -3355  H0H H H   )-%(A% ($&!%:"""    url_specmedia_ioc                     |j         pd}|                    dd          \  }}|                    dd          \  }}|                    d          }|dk    rd}t          |          |                    ||          S )Nr"   ,r   ;/base64z,Only base64 data URLs are supported for now.)pathsplitlstripNotImplementedErrorload_base64)	r.   r3   r4   url_spec_path	data_specdata
media_type	data_typemsgs	            r1   _load_data_urlzMediaConnector._load_data_urlj   s    
 !+'--c155	4 )Q 7 7
I&&s++
  @C%c***##J555r2   c                     | j         }|t          d          |j        pd}|j        pd}t	          t          ||z                       }||                                j        vrt          d| d| d          |	                    |          S )Nz=Cannot load local files without `--allowed-local-media-path`.r"   zThe file path z2 must be a subpath of `--allowed-local-media-path z`.)
r#   RuntimeErrorr:   netlocr   r
   resolveparentsr,   	load_file)r.   r3   r4   r#   r?   url_spec_netlocfilepaths          r1   _load_file_urlzMediaConnector._load_file_url{   s    
 $(#@ #+O   !+"//R_}%DEEFF#8+;+;+=+=+EEEO O O2JO O O  
 !!(+++r2   c                 r    | j         r-|j        | j         vr!t          d| j          d|j                   d S d S )Nz1The URL must be from one of the allowed domains: z. Input URL domain: )r$   hostnamer,   )r.   r3   s     r1   $_assert_url_in_allowed_media_domainsz3MediaConnector._assert_url_in_allowed_media_domains   se    &	!)CCC'-' '$' '  		 	CCr2   fetch_timeouturlrS   c                   t          |          }|j        rm|j                            d          rS|                     |           | j        }|                    ||t          j                  }|                    |          S |j        dk    r| 	                    ||          S |j        dk    r| 
                    ||          S d}t          |          Nr   )timeoutallow_redirectsrA   filez0The URL must be either a HTTP, data or file URL.)r   scheme
startswithrQ   r&   	get_bytesenvsVLLM_MEDIA_URL_ALLOW_REDIRECTS
load_bytesrE   rN   r,   )r.   rT   r4   rS   r3   r&   rA   rD   s           r1   load_from_urlzMediaConnector.load_from_url   s     S>>? 
	-x99&AA 
	-55h???J''% $ C (  D &&t,,,?f$$&&x:::?f$$&&x:::@oor2   c                \  K   t          |          }t          j                    }|j        r|j                            d          rm|                     |           | j        }|                    ||t          j	                   d {V }|
                    t          |j        |          }| d {V S |j        dk    r*|
                    t          | j        ||          }| d {V S |j        dk    r*|
                    t          | j        ||          }| d {V S d}	t          |	          rV   )r   asyncioget_running_looprZ   r[   rQ   r&   async_get_bytesr]   r^   run_in_executorglobal_thread_poolr_   rE   rN   r,   )
r.   rT   r4   rS   r3   loopr&   rA   futurerD   s
             r1   load_from_url_asyncz"MediaConnector.load_from_url_async   sf      S>>'))? 
	 x99&AA 
	 55h???J#33% $ C 4        D
 ))*<h>QSWXXF<<<<<<?f$$))"D$78 F  <<<<<<?f$$))"D$78 F  <<<<<<@oor2   	audio_urlc                     t          di | j                            di           }|                     ||t          j                  S )z(
        Load audio from a URL.
        audiorR    )r   r%   getr`   r]   VLLM_AUDIO_FETCH_TIMEOUTr.   rj   audio_ios      r1   fetch_audiozMediaConnector.fetch_audio   sS      HH$"6":":7B"G"GHH!!7 " 
 
 	
r2   c                    K   t          di | j                            di           }|                     ||t          j                   d{V S )z8
        Asynchronously fetch audio from a URL.
        rl   rR   Nrm   )r   r%   rn   ri   r]   ro   rp   s      r1   fetch_audio_asyncz MediaConnector.fetch_audio_async   su        HH$"6":":7B"G"GHH--7 . 
 
 
 
 
 
 
 
 	
r2   RGB
image_mode	image_urlrw   c                    t          dd|i| j                            di           }	 |                     ||t          j                  S # t          $ r"}t          t          |                    |d}~ww xY w)z
        Load a PIL image from an HTTP or base64 data URL.

        By default, the image is converted into RGB format.
        rw   imagerR   Nrm   )	r   r%   rn   r`   r]   VLLM_IMAGE_FETCH_TIMEOUTr   r,   strr.   rx   rw   image_ioes        r1   fetch_imagezMediaConnector.fetch_image   s       
 
!
%)%9%=%=gr%J%J
 
	,%%"; &   
 & 	, 	, 	,SVV$$!+	,s   !A 
A7A22A7c                  K   t          dd|i| j                            di           }	 |                     ||t          j                   d{V S # t          $ r"}t          t          |                    |d}~ww xY w)z
        Asynchronously load a PIL image from an HTTP or base64 data URL.

        By default, the image is converted into RGB format.
        rw   rz   rR   Nrm   )	r   r%   rn   ri   r]   r{   r   r,   r|   r}   s        r1   fetch_image_asyncz MediaConnector.fetch_image_async  s         
 
!
%)%9%=%=gr%J%J
 
	,11"; 2         
 & 	, 	, 	,SVV$$!+	,s   'A 
A?A::A?	video_urlc                    t          dd|i| j                            di           }t          |fi | j                            di           }|                     ||t
          j                  S )z=
        Load video from an HTTP or base64 data URL.
        rw   rz   videorR   rm   )r   r%   rn   r   r`   r]   VLLM_VIDEO_FETCH_TIMEOUTr.   r   rw   r~   video_ios        r1   fetch_videozMediaConnector.fetch_video.  s       
 
!
%)%9%=%=gr%J%J
 
  RRD,@,D,DWb,Q,QRR!!7 " 
 
 	
r2   c                   K   t          dd|i| j                            di           }t          |fi | j                            di           }|                     ||t
          j                   d{V S )z
        Asynchronously load video from an HTTP or base64 data URL.

        By default, the image is converted into RGB format.
        rw   rz   r   rR   Nrm   )r   r%   rn   r   ri   r]   r   r   s        r1   fetch_video_asyncz MediaConnector.fetch_video_asyncB  s         
 
!
%)%9%=%=gr%J%J
 
  RRD,@,D,DWb,Q,QRR--7 . 
 
 
 
 
 
 
 
 	
r2   rA   c                 J    t                      }|                    d|          S )z2
        Load image embedding from a URL.
        r"   )r   r>   )r.   rA   image_embedding_ios      r1   fetch_image_embeddingz$MediaConnector.fetch_image_embeddingX  &     344!--b$777r2   c                 J    t                      }|                    d|          S )z2
        Load audio embedding from a URL.
        r"   )r   r>   )r.   rA   audio_embedding_ios      r1   fetch_audio_embeddingz$MediaConnector.fetch_audio_embeddingc  r   r2   )%__name__
__module____qualname__r   dictr|   r   r   listr*   r   r   r   rE   rN   rQ   intr`   ri   tuplenpndarrayfloatrr   rt   r   r   r   nptNDArrayr   r   torchTensorr   r   __classcell__)r0   s   @r1   r!   r!   9   s        =A%;-;
 )+26-; -; -;c4S>12T9-; #-;
 #&-;  $Cy4/-; 
-; -; -; -; -; -;^66 "+6 
	6 6 6 6",, "+, 
	, , , ,,	S 	T 	 	 	 	  %)   "+
 Tz 
   F %)" " "" "+"
 Tz" 
" " " "H

 
rz3;&	'
 
 
 


 
rz3;&	'
 
 
 
&  	, , ,, 	,
 
, , , ,:  	, , ,, 	,
 
, , , ,:  	
 
 

 	

 
s{DcN*	+
 
 
 
0  	
 
 

 	

 
s{DcN*	+
 
 
 
,	8	8 
	8 	8 	8 	8	8	8 
	8 	8 	8 	8 	8 	8 	8 	8r2   r!   WAVformatrl   sampling_rater   r'   c                P    t                      }|                    | |f|          S )zEncode audio as base64.)audio_format)r   encode_base64)rl   r   r   rq   s       r1   encode_audio_base64r   o  s+     ~~H!!5-"8v!NNNr2   c                    t          | ||          }t          j                            d|                                z   d          }d| d| S )zEncode audio as a data URL.r   .rl   data:;base64,)r   	mimetypes	types_maprn   lower)rl   r   r   	audio_b64mimetypes        r1   encode_audio_urlr   z  sS     $E=HHHI"&&sV\\^^';WEEH0800Y000r2   ru   rw   r   rz   rw   c                P    t          |          }|                    | |          S )z
    Encode a pillow image to base64 format.

    By default, the image is converted into RGB format before being encoded.
    rv   )image_format)r   r   )rz   rw   r   r~   s       r1   encode_image_base64r     s-     z222H!!%f!===r2   PNGc                    t          | ||          }t          j                            d|                                z   d          }d| d| S )z|
    Encode a pillow image as a data URL.

    By default, the image is converted into RGB format before being encoded.
    r   r   rz   r   r   )r   r   r   rn   r   )rz   rw   r   	image_b64r   s        r1   encode_image_urlr     sS     $EjPPPI"&&sV\\^^';WEEH0800Y000r2   JPEGframesc                j    t                      }t          |          }|                    | |          S )N)video_format)r   r   r   )r   r   r~   r   s       r1   encode_video_base64r     s3    
 ~~HH%%H!!&v!>>>r2   c                    t          | |          }|                                dk    rd}n5t          j                            d|                                z   d          }d| d| S )Nr   jpegz
video/jpegr   r   r   r   )r   r   r   r   rn   )r   r   	video_b64r   s       r1   encode_video_urlr     sk    
 $F6:::I||~~&**3+?II0800Y000r2   mm_positionsc                 z    d |                                  D             }t          |d           }d |D             S )a/  
    Given a `MultiModalPlaceholderDict`, output a sequence of keys to
    sort the dictionary by `offset` (starting index in the input sequence)
    in ascending order.

    Returns:
        A list of `(modality, idx)`, which can be used to access an item
        by `mm_positions[modality][idx]`.
    c              3   P   K   | ]!\  }}t          |          D ]\  }}|||fV  "d S N)	enumerate).0modalityitemsidxitems        r1   	<genexpr>z'argsort_mm_positions.<locals>.<genexpr>  sd        He"5))  C 
3      r2   c                     | d         j         S )N   )offset)xs    r1   <lambda>z&argsort_mm_positions.<locals>.<lambda>  s    1 r2   keyc                     g | ]
\  }}}||fS rm   rm   )r   r   r   _s       r1   
<listcomp>z(argsort_mm_positions.<locals>.<listcomp>  s#    FFF 0#qXsOFFFr2   )r   sorted)r   
flat_itemssorted_flat_itemss      r1   argsort_mm_positionsr     sZ     +1133  J z/D/DEEEFF4EFFFFr2   Fdevice
pin_memory	mm_kwargsr   r   c             #      K   ddl m} t          | d           D ]T\  }}t          |          }|                    |          }|                    ||          }|t          |          |fV  UdS )a  Group consecutive `MultiModalKwargsItem`s from `mm_kwargs` with the same
    modality together into the same `MultiModalKwargs` instance.

    Args:
        mm_kwargs: List of `MultiModalKwargsItem`.
        device: The device to place the grouped tensors on.
        pin_memory: Whether to pin memory for faster host-to-device transfer.

    Yields:
        A tuple `(modality, num_items, grouped_kwargs)`.
    r   )MultiModalKwargsItemsc                     | j         S r   )r   )r   s    r1   r   z-group_mm_kwargs_by_modality.<locals>.<lambda>  s    t} r2   r   r   N)vllm.multimodal.inputsr   r   r   from_seqget_datalen)	r   r   r   r   r   r   	items_lstmm_kwargs_itemsmm_kwargs_datas	            r1   group_mm_kwargs_by_modalityr     s      " =<<<<<"92L2LMMM 7 7%KK	/88CC(11! 2 
 

 I666667 7r2   rj   audio_io_kwargsc                 ^    |sdnd|i}t          |d          }|                    |           S )a+  
    Args:
        audio_url: URL of the audio file to fetch.
        audio_io_kwargs: Additional kwargs passed to handle audio IO.

    Warning:
        This method has direct access to local files and is only intended
        to be called by user code. Never call this from the online server!
    Nrl   r8   r%   r#   )r!   rr   )rj   r   r%   media_connectors       r1   rr   rr     G     #2Qdd7QO$'!$  O &&y111r2   rx   image_io_kwargsc                 ^    |sdnd|i}t          |d          }|                    |           S )a+  
    Args:
        image_url: URL of the image file to fetch.
        image_io_kwargs: Additional kwargs passed to handle image IO.

    Warning:
        This method has direct access to local files and is only intended
        to be called by user code. Never call this from the online server!
    Nrz   r8   r   )r!   r   )rx   r   r%   r   s       r1   r   r     r   r2   r   video_io_kwargsc                 ^    |sdnd|i}t          |d          }|                    |           S )a+  
    Args:
        video_url: URL of the video file to fetch.
        video_io_kwargs: Additional kwargs passed to handle video IO.

    Warning:
        This method has direct access to local files and is only intended
        to be called by user code. Never call this from the online server!
    Nr   r8   r   )r!   r   )r   r   r%   r   s       r1   r   r     r   r2   r   )Orb   atexitr   collections.abcr   concurrent.futuresr   	itertoolsr   pathlibr   typingr   r   r	   urllib.requestr
   numpyr   numpy.typingr   r   PILr   r   urllib3.utilr   r   	vllm.envsr]   vllm.connectionsr   r   vllm.loggerr   vllm.utils.registryr   mediar   r   r   r   r   r   inputsr   r   r   r   loggerVLLM_MEDIA_LOADING_THREAD_COUNTrf   registershutdownr   MEDIA_CONNECTOR_REGISTRYr!   r   r   r|   r   r   r   r   r   r   r   r   r   r   typesDeviceboolr   r   r   rr   r   r   rm   r2   r1   <module>r     s         % % % % % % 1 1 1 1 1 1             . . . . . . . . . . ' ' ' ' ' '            - - - - - - - - ' ' ' ' ' ' ' '       C C C C C C C C # # # # # # 0 0 0 0 0 0                 	$            #	X		''4    "+ , , ,WT]]++--  ""6**r8 r8 r8 r8 r8 r8 r8 +*r8r	 	O O O:OO 	O
 	O O O O 		1 	1 	1:	1	1 		1
 		1 	1 	1 	1 	> > >;> > $J	>
 	> > > >$ 	1 1 1;1 1 	1
 	1 1 1 1& ? ? ?K? ? 		? ? ? ? 1 1 1K1 1 		1 1 1 1G+G	%S/G G G G4 "&	7 7 7()7 K7 	7
 uS#223T4?@7 7 7 7@ .22 22#s(^d*2 2:sU{"#2 2 2 2. .22 22#s(^d*2 [2 2 2 2. .22 22#s(^d*2 3;S#X&'2 2 2 2 2 2r2   