
    .`i                     "   d dl mZmZ d dlmZ d dlmZmZ d dlm	Z	m
Z
 d dlZd dlmZ d dlmZ d dlmZmZmZmZ d dlmZ d	d
lmZ ddlmZ  e
de          Z ee          Ze G d d                      Z G d dee	e                   Z dS )    )ABCabstractmethod)Mapping)	dataclassfield)GenericTypeVarN)Image)AudioDummyOptionsBaseDummyOptionsImageDummyOptionsVideoDummyOptions)init_logger   )MultiModalDataDict   )BaseProcessingInfo_I)boundc                       e Zd ZU dZeee         z  ed<   eed<    e	e
          Zeeef         ed<    e	e
          Zeeef         ed<   dS )ProcessorInputszq
    Represents the keyword arguments to
    [`vllm.multimodal.processing.BaseMultiModalProcessor.apply`][].
    promptmm_data)default_factoryhf_processor_mm_kwargstokenization_kwargsN)__name__
__module____qualname____doc__strlistint__annotations__r   r   dictr   r   objectr        {/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/multimodal/processing/dummy_inputs.pyr   r      s          
 $s)O3853N3N3NGCK0NNN05d0K0K0Kf-KKKKKr(   r   c                       e Zd ZdZdeddf fdZedeee	f         defd            Z
e	 dde	deee	f         d	eeef         dz  defd
            Z	 dde	deee	f         d	eeef         dz  defdZddde	de	dedz  deej                 fdZddde	de	de	dedz  deej                 f
dZddde	de	de	de	dedz  deej                 fdZ xZS )BaseDummyInputsBuilderz_
    Abstract base class that constructs the dummy data to profile
    multi-modal models.
    inforeturnNc                 V    t                                                       || _        d S N)super__init__r,   )selfr,   	__class__s     r)   r1   zBaseDummyInputsBuilder.__init__/   s$    			r(   	mm_countsc                     t           )zD
        Build the text input corresponding to `mm_counts`.
        NotImplementedError)r2   r4   s     r)   get_dummy_textz%BaseDummyInputsBuilder.get_dummy_text4   s
    
 "!r(   seq_len
mm_optionsc                     t           )a  
        Build the multimodal input which, after processing, results in
        the maximum possible number of placeholder tokens.

        Args:
            seq_len: Sequence length
            mm_counts: Count of items per modality
            mm_options: Configurable options per modality (optional).
                       If None, use model defaults for backward compatibility.
                       If provided, models can use these to customize dummy
                       data generation.
        r6   )r2   r9   r4   r:   s       r)   get_dummy_mm_dataz(BaseDummyInputsBuilder.get_dummy_mm_data;   s
    & "!r(   c                     |                      |          }|                     |||          }ddi}t          |||          S )a,  
        Build the input which, after processing, results in
        the maximum possible number of placeholder tokens.

        Args:
            seq_len: Sequence length
            mm_counts: Count of items per modality
            mm_options: Configurable options per modality (optional)
        
truncationF)r   r   r   )r8   r<   r   )r2   r9   r4   r:   
dummy_textdummy_mm_datar   s          r)   get_dummy_processor_inputsz1BaseDummyInputsBuilder.get_dummy_processor_inputsP   s[     ((33
 ..w	:NN+U3! 3
 
 
 	
r(   )	overrideslength
num_audiosrB   c                    |dk    rg S |rH|j         rA|j         |k    r!t                              d|j         |           t          ||j                   }t	          j        |f          }|g|z  S )Nr   zOaudio.length override (%d) exceeds model's maximum length (%d), will be ignored)rC   loggerwarningminnpzeros)r2   rC   rD   rB   audios        r)   _get_dummy_audiosz(BaseDummyInputsBuilder._get_dummy_audiosl   s     ??I 	3) 	3&((;$	   !122F&##w##r(   widthheight
num_imagesc                t   |dk    rg S |r|j         rA|j         |k    r!t                              d|j         |           t          ||j                   }|j        rA|j        |k    r!t                              d|j        |           t          ||j                  }t          j        d||fd          }|g|z  S )Nr   zMimage.width override (%d) exceeds model's maximum width (%d), will be ignoredzOimage.height override (%d) exceeds model's maximum height (%d), will be ignoredRGB   )color)rM   rF   rG   rH   rN   r
   new)r2   rM   rN   rO   rB   images         r)   _get_dummy_imagesz(BaseDummyInputsBuilder._get_dummy_images   s     ??I 	7 4?U**NN>!	   E9?33 7#f,,NN?!(	   VY%566	%%<<<w##r(   
num_frames
num_videosc                   |dk    rg S |r|j         rA|j         |k    r!t                              d|j         |           t          ||j                   }|j        rA|j        |k    r!t                              d|j        |           t          ||j                  }|j        rA|j        |k    r!t                              d|j        |           t          ||j                  }t          j        |||dfdt          j                  }|g|z  S )Nr   z]video.num_frames override (%d) exceeds model's maximum number of frames (%d), will be ignoredzMvideo.width override (%d) exceeds model's maximum width (%d), will be ignoredzOvideo.height override (%d) exceeds model's maximum height (%d), will be ignored   rR   )dtype)	rW   rF   rG   rH   rM   rN   rI   fulluint8)r2   rM   rN   rW   rX   rB   videos          r)   _get_dummy_videosz(BaseDummyInputsBuilder._get_dummy_videos   s9    ??I 	7# C'*44NNI!,"	   !Y-ABB
 4?U**NN>!	   E9?33 7#f,,NN?!(	   VY%566UFA628LLLw##r(   r/   )r   r   r   r    r   r1   r   r   r!   r#   r8   r   r   r<   r   rA   r   r"   nptNDArrayrL   r   r
   rV   r   r_   __classcell__)r3   s   @r)   r+   r+   )   sy        
R D      
 "S(9 "c " " " ^" 
 =A	" "" 38$" C!112T9	"
 
" " " ^"0 =A	
 

 38$
 C!112T9	

 

 
 
 
B /3$ $ $ $ 	$
 %t+$ 
ck	$ $ $ $6 /3$ $ $ $ 	$
 $ %t+$ 
ek	$ $ $ $N /3($ ($ ($ ($ 	($
 ($ ($ %t+($ 
ck	($ ($ ($ ($ ($ ($ ($ ($r(   r+   )!abcr   r   collections.abcr   dataclassesr   r   typingr   r	   numpyrI   numpy.typingr`   PILr
   vllm.config.multimodalr   r   r   r   vllm.loggerr   inputsr   contextr   r   r   rF   r   r+   r'   r(   r)   <module>rn      s   $ # # # # # # # # # # # # # ( ( ( ( ( ( ( ( # # # # # # # #                            $ # # # # # ' ' ' ' ' ' ' ' ' ' ' 'WT+,,,	X		 	L 	L 	L 	L 	L 	L 	L 	L`$ `$ `$ `$ `$S'"+ `$ `$ `$ `$ `$r(   