
    .`i+                     	   U d dl mZmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZ d dlmZmZmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZ d dlZd dlZd d	lmZm Z  d d
l!m"Z" d dl#m$Z$ d dl%m&Z&m'Z' ddl(m)Z) ddl*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4 ddl5m6Z6m7Z7m8Z8m9Z9 ddl:m;Z;m<Z<m=Z= ddl>m?Z? erd dl@mAZA ddlBmCZC neDZAeDZC e"eE          ZF edeGeHeI                   ZJeGeHeI         z  ZKeeLd<   	  ed          ddde$deGdeMdeHeI         fd             ZN ed          d!d"de$d#eOeId$f         d%eMdeGfd&            ZPdd'de$dz  d(eKd)eMdeGfd*ZQdd'de$dz  d(eKd)eMdeHeI         fd+ZR G d, d-e          ZSe G d. d/                      ZT G d0 d1          ZUeKeTz  ZVeeLd2<   	 eeIgeVf         eVz  ZWeeLd3<   	 e G d4 d5eeJ                               ZXeKeXz  ZYeeLd6<   	 eeIgeYf         eYz  ZZeeLd7<   	  G d8 d9eGe          Z[e G d: d;e                      Z\e G d< d=e\                      Z]e G d> d?e\                      Z^ G d@ dAe          Z_ G dB dCe          Z` edDe_e`z  E          ZadFe	ea         deeGeHea         f         fdGZb G dH dIe          Zc edJ           G dK dL                      Zd G dM dNe          Zed dOd#eHeI         dPeHeI         dQeIdeee         fdRZfd#eHeI         dPeHeI         dSeHeI         deHeI         fdTZge G dU dV                      ZheOeOeGeIf         eOeceIf         f         Zid dWdXeJdYdZde$dz  d[eId\d]deOe[dz  eHei         f         fd^Zjd_ekeGeIf         d`ekeGeIf         deMfdaZldXeJdYdZde$dz  deOeHeJ         d]f         fdbZmdXeHeI         dYdZde$dz  deOeHeI         d]f         fdcZndXeGdYdZde$dz  deOeGd]f         fddZodXeHeI         dYdZde$dz  de	eh         fdeZpdXeHeI         dYdZde$dz  de
eGeHeh         f         fdfZqekeGeHeM         f         Zr	 e
eGeHeed                  f         Zs	 e
eGeHeIdz           f         Zt	  edge;E          Zu G dh die          Zv G dj dkeeeu                   Zw G dl dmeweu                   ZxdS )n    )ABCabstractmethod)defaultdict)Callable	Generator	ItemsViewIterableMappingSequence)	dataclassfieldreplace)Enum)	lru_cache)TYPE_CHECKINGGeneric
NamedTupleProtocol	TypeAliascastN)TypeVarassert_never)init_logger)TokenizerLike)flatten_2d_listsfull_groupby   )MultiModalHasher)
MultiModalDataDictMultiModalEncDecInputsMultiModalFieldConfigMultiModalHashesMultiModalInputsMultiModalKwargsItemMultiModalKwargsItemsMultiModalKwargsOptionalItemsMultiModalUUIDDictPlaceholderRange)DictEmbeddingItemsEmbeddingItemsMultiModalDataItemsMultiModalDataParser   )BaseProcessingInfoget_current_request_idtimed_preprocessor_operation)BaseDummyInputsBuilder)BatchFeature)BaseMultiModalProcessorCache_S	PromptSeqi   )maxsizeTadd_special_tokens	tokenizertextr8   returnc                0    |                      ||          S )Nr7   )encode)r9   r:   r8   s      x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/multimodal/processing/processor.py_cached_encoder?   F   s     D5GHHH    Fskip_special_tokens	token_ids.rB   c                J    |                      t          |          |          S )NrA   )decodelist)r9   rC   rB   s      r>   _cached_decoderG   P   s$     DOOATUUUr@   	use_cacheseqrI   c                    t          |t                    r|S | t          d          |s|                     |          S t	          | t          |                    S )Nz8You cannot decode tokens when `skip_tokenizer_init=True`)
isinstancestr
ValueErrorrE   rG   tupler9   rJ   rI   s      r>   	_seq2textrQ   Z   sb     #s 
STTT %$$$)U3ZZ000r@   c                    t          |t                    r<| t          d          |s|                     |d          S t	          | |d          S |S )Nz6You cannot encode text when `skip_tokenizer_init=True`Fr7   )rL   rM   rN   r=   r?   rP   s      r>   _seq2tokensrS   l   sh     #s HUVVV 	C##CE#BBBiGGGGJr@   c            
       6    e Zd Z	 ddedz  dedededz  fdZdS )	_GetMatchIndexr   r9   Nprompt	start_idxr;   c                     d S N )selfr9   rV   rW   s       r>   __call__z_GetMatchIndex.__call__   s	    
 Sr@   r   )__name__
__module____qualname__r   r5   intr\   rZ   r@   r>   rU   rU   ~   sa        
 	  4'  	
 
t     r@   rU   c                       e Zd ZU dZeed<   dS )PromptIndexz#Resolves to an index in the prompt.get_match_indexN)r^   r_   r`   __doc__rU   __annotations__rZ   r@   r>   rc   rc      s$         --######r@   rc   c                   f    e Zd Zedefd            Zededefd            Zedefd            ZdS )PromptIndexTargetsr;   c                  $    t          dd          S )z
        Resolves to the start of the prompt (before the first token).

        This results in a match even if the prompt is empty.
        r   c                     dS Nr   rZ   r9   rV   rW   s      r>   <lambda>z*PromptIndexTargets.start.<locals>.<lambda>   s    ! r@   r]   rc   rZ   r@   r>   startzPromptIndexTargets.start   s     CCCDDDr@   rJ   c           
      v     	 ddt           dz  dt          dt          dt          dz  f fd}t          |          S )	zN
        Resolves to a location in the prompt after the given prefix.
        r   r9   NrV   rW   r;   c                     |dk    rd S }t          |t                    rt          | |d          }nt          | |d          }t	          |          }|d |         |k    r|nd S )Nr   FrH   )rL   rM   rQ   rS   len)r9   rV   rW   prefix	match_idxrJ   s        r>   rd   z2PromptIndexTargets.prefix.<locals>.get_match_index   s    
 A~~tF&#&& I"9fFFF %Y%HHHFI &z	z 2f < <99$Fr@   r]   )r   r5   ra   rc   )rJ   rd   s   ` r>   rs   zPromptIndexTargets.prefix   ss     	G 	G$t+	G	G 	G 4Z		G 	G 	G 	G 	G 	G( ?+++r@   c                  $    t          dd          S )z
        Resolves to the end of the prompt (after the last token).

        This results in a match even if the prompt is empty.
        r   c                      t          |          S rY   rr   rl   s      r>   rm   z(PromptIndexTargets.end.<locals>.<lambda>   s    #f++ r@   r]   rn   rZ   r@   r>   endzPromptIndexTargets.end   s     MMMNNNr@   N)	r^   r_   r`   staticmethodrc   ro   r5   rs   rx   rZ   r@   r>   rh   rh      s        E; E E E \E ,I ,+ , , , \,6 O O O O \O O Or@   rh   UpdateTargetPromptUpdateTargetc                       e Zd ZU dZeed<   	 dZeedz  e	ge
j        f         dz  ed<   	 ededdfd            Zeded	eddfd
            Zedededdfd            Zededee         ddfd            ZdS )PromptUpdateDetailszEDetails about the token sequence or text that are part of the update.fullNis_embedrJ   r;   zPromptUpdateDetails[_S]c                 "    t          |           S )N)r~   )r}   )rJ   s    r>   from_seqzPromptUpdateDetails.from_seq   s    ",,,,r@   
embed_textc                 l    dt           d z  dt          dt          j        ffd}t	          | |          S )Nr9   r~   r;   c                     t          | d          }t          | |          }t          j        t          j        |          t          j        |                    S )NFrH   rS   torchisintensor)r9   r~   embed_token_idsrC   r   s       r>   r   z1PromptUpdateDetails.select_text.<locals>.is_embed   sT    ))Z5QQQO#It44I:Y''_--  r@   r~   r   r   r5   r   Tensorr}   )rJ   r   r   s    ` r>   select_textzPromptUpdateDetails.select_text   sR    
	 4 	I 	%, 	 	 	 	 	 	 #h????r@   embed_token_idc                 l    dt           d z  dt          dt          j        ffd}t	          | |          S )Nr9   r~   r;   c                 T    t          | |          }t          j        |          k    S rY   )rS   r   r   )r9   r~   rC   r   s      r>   r   z5PromptUpdateDetails.select_token_id.<locals>.is_embed   s'    #It44I<	**n<<r@   r   r   )rJ   r   r   s    ` r>   select_token_idz#PromptUpdateDetails.select_token_id   sR    
	= 4 	=I 	=%, 	= 	= 	= 	= 	= 	=
 #h????r@   r   c                 l    dt           d z  dt          dt          j        ffd}t	          | |          S )Nr9   r~   r;   c                     t          | |          }t          j        t          j        |          t          j                            S rY   r   )r9   r~   rC   r   s      r>   r   z6PromptUpdateDetails.select_token_ids.<locals>.is_embed  s@    #It44I:Y''_--  r@   r   r   )rJ   r   r   s    ` r>   select_token_idsz$PromptUpdateDetails.select_token_ids  sR    
	 4 	I 	%, 	 	 	 	 	 	 #h????r@   )r^   r_   r`   re   r4   rf   r   r   r   r5   r   r   ry   r   rM   r   ra   r   rF   r   rZ   r@   r>   r}   r}      sP        OO
HHHQUHh,i8%,FG$NUUU	 -b -6 - - - \- @@@ 
#@ @ @ \@ 	@	@	@ 
#	@ 	@ 	@ \	@ @@c@ 
#@ @ @ \@ @ @r@   r}   PromptUpdateInfoPromptUpdateContentc                       e Zd ZdZdZdS )
UpdateModeinsertr   N)r^   r_   r`   INSERTREPLACErZ   r@   r>   r   r   %  s        FGGGr@   r   c                       e Zd ZU dZeed<   	 eed<   	 eede	fd                        Z
eedefd                        ZdedefdZdedefd	Zdedd
fdZdS )PromptUpdatezA
    Defines how to update a prompt with placeholder tokens.
    modalitytargetr;   c                     t           )z3The placeholder tokens that are part of the update.NotImplementedErrorr[   s    r>   contentzPromptUpdate.content6  
     "!r@   c                     t           )z!Defines how to update the prompt.r   r   s    r>   modezPromptUpdate.mode<  r   r@   item_idxc                 H    | j         }t          |          r ||          }|S rY   )r   callable)r[   r   r   s      r>   _resolve_targetzPromptUpdate._resolve_targetB  s-    F 	&VH%%Fr@   c                     | j         }t          |          r ||          }t          |t                    st                              |          }|S rY   )r   r   rL   r}   r   )r[   r   r   s      r>   _resolve_contentzPromptUpdate._resolve_contentI  sS    ,G 	(gh''G'#677 	<)227;;Gr@   ResolvedPromptUpdatec           	          t          | j        || j        |                     |          |                     |                    S )z
        Given the index of the processed item within
        [`modality`][vllm.multimodal.processing.PromptUpdate.modality],
        output a copy of this object with its lazy attributes resolved.
        )r   r   r   r   r   )r   r   r   r   r   )r[   r   s     r>   resolvezPromptUpdate.resolveS  sJ     $]''11))(33
 
 
 	
r@   N)r^   r_   r`   re   rM   rf   r{   propertyr   r   r   r   r   ra   rz   r   r}   r   r   rZ   r@   r>   r   r   *  s          MMM41", " " " ^ X" "j " " " ^ X"      1D    
 
(> 
 
 
 
 
 
r@   r   c                   p    e Zd ZU dZ ed          Zeed<   	 edefd            Z	ede
fd            ZdS )	PromptInsertiona  
    Defines how to insert placeholder tokens into a prompt.

    Example:

    For each image, insert a number of `<image>` feature placeholders
    equal to the feature size of the vision encoder after the `<s>` token:

    ```python
    PromptInsertion(
        modality="image",
        target="<s>",
        insertion="<image>" * image_feature_size,
    )
    ```

    Insert these tokens at the start of the prompt:

    ```python
    PromptInsertion(
        modality="image",
        target=PromptIndexTargets.start(),
        insertion="<image>" * image_feature_size,
    )
    ```

    Insert these tokens after a prefix `Images:`:

    ```python
    PromptInsertion(
        modality="image",
        target=PromptIndexTargets.prefix("Images:"),
        insertion="<image>" * image_feature_size,
    )
    ```

    Insert these tokens at the end of the prompt:

    ```python
    PromptInsertion(
        modality="image",
        target=PromptIndexTargets.end(),
        insertion="<image>" * image_feature_size,
    )
    ```
    Frepr	insertionr;   c                     | j         S rY   )r   r   s    r>   r   zPromptInsertion.content  s
    ~r@   c                     t           j        S rY   )r   r   r   s    r>   r   zPromptInsertion.mode  s      r@   N)r^   r_   r`   re   r   r   r   rf   r   r   r   r   rZ   r@   r>   r   r   b  s         - -^ &+U%6%6%6I"666 ,    X !j ! ! ! X! ! !r@   r   c                   p    e Zd ZU dZ ed          Zeed<   	 edefd            Z	ede
fd            ZdS )	PromptReplacementa  
    Defines how to replace portions of an input prompt with placeholder tokens.

    Example:

    For each image, replace one `<image>` input placeholder in the prompt
    with a number of `<image>` feature placeholders
    equal to the feature size of the vision encoder:

    ```python
    PromptReplacement(
        modality="image",
        target="<image>",
        replacement="<image>" * image_feature_size,
    )
    ```

    As above, but further pad the feature placeholders with `<image_bos>`
    and `<image_eos>`, which are not supposed to be passed to the vision
    encoder:

    ```python
    PromptReplacement(
        modality="image",
        target="<image>",
        replacement=PromptUpdateDetails(
            full="".join(
                [
                    "<image_bos>",
                    "<image>" * image_feature_size,
                    "<image_eos>",
                ]
            ),
            features="<image>" * image_feature_size,
        ),
    )
    ```

    To avoid unnecessary tokenization during prompt replacement,
    we recommended passing token sequences instead of text:

    ```python
    PromptReplacement(
        modality="image",
        target=[image_token_id],
        replacement=PromptUpdateDetails(
            full=(
                [image_bos_id] + [image_token_id] * image_feature_size + [image_eos_id]
            ),
            features=[image_token_id] * image_feature_size,
        ),
    )
    ```
    Fr   replacementr;   c                     | j         S rY   )r   r   s    r>   r   zPromptReplacement.content  s    r@   c                     t           j        S rY   )r   r   r   s    r>   r   zPromptReplacement.mode  s    !!r@   N)r^   r_   r`   re   r   r   r   rf   r   r   r   r   rZ   r@   r>   r   r     s         5 5n (-u%'8'8'8K$888  ,       X  "j " " " X" " "r@   r   c                       e Zd ZU eed<   dS )_HasModalityAttrr   N)r^   r_   r`   rM   rf   rZ   r@   r>   r   r     s         MMMMMr@   r   c                   *    e Zd Zedefd            ZdS )_HasModalityPropr;   c                     d S rY   rZ   r   s    r>   r   z_HasModalityProp.modality  s    "sr@   N)r^   r_   r`   r   rM   r   rZ   r@   r>   r   r     s+        "#""" X"""r@   r   _M)boundvaluesc                 &    t          | d           S )z}
    Convenience function to apply
    [`full_groupby`][vllm.utils.collection_utils.full_groupby]
    based on modality.
    c                     | j         S rY   )r   )xs    r>   rm   z'full_groupby_modality.<locals>.<lambda>  s    aj r@   key)r   )r   s    r>   full_groupby_modalityr      s     $8$89999r@   c                   $    e Zd ZU eed<   eed<   dS )PromptTargetMatchrW   end_idxNr^   r_   r`   ra   rf   rZ   r@   r>   r   r   	  "         NNNLLLLLr@   r   )frozenc            
       .   e Zd ZU dZeed<   	 eed<   	 eed<   	 eed<   	  e	d          Z
eed<   	 d	d
dee         dedz  dedee         fdZd	d
dededz  dedee         fdZd	d
dee         ez  dedz  dedee         fdZdefdZdefdZdS )r   z
    A [`PromptUpdate`][vllm.multimodal.processing.PromptUpdate] with its
    lazy attributes resolved, apart from those related to tokenization.
    r   r   r   r   Fr   r   r   rW   rV   r9   NrW   r;   c             #     K   | j         }t          |t                    r-|                    |||          }|t	          ||          V  dS t          ||          }t          |||          D ]}t	          |j        |j                  V  dS )7Yield each instance of `self.target` found in `prompt`.Nr   )	r   rL   rc   rd   r   rS   iter_token_matchesrW   r   )r[   rV   r9   rW   r   rt   target_token_idsmatchs           r>   r   z'ResolvedPromptUpdate.iter_token_matches$  s       fk** 	..y&)LLI$'	9=====F&y&99'0@IVVV 	D 	DE#EOU]CCCCCC	D 	Dr@   c             #     K   | j         }t          |t                    r-|                    |||          }|t	          ||          V  dS t          ||          }t          j        t          j        |          ||          D ]8}t	          |	                                |
                                          V  9dS )r   N)pos)r   rL   rc   rd   r   rQ   refinditerescapero   rx   )r[   rV   r9   rW   r   rt   target_textr   s           r>   iter_text_matchesz&ResolvedPromptUpdate.iter_text_matches:  s       fk** 	..y&)LLI$'	9=====F	622[;!7!7YOOO 	@ 	@E#EKKMM599;;??????	@ 	@r@   c                    t          |t                    r|                     |||          S |                     |||          S )r   r   )rL   rM   r   r   )r[   rV   r9   rW   s       r>   iter_matchesz!ResolvedPromptUpdate.iter_matchesP  sM     fc"" 	R))&)y)QQQ&&vyI&NNNr@   c                 $    t          | |          S )N)r   r   )r[   r   s     r>   with_targetz ResolvedPromptUpdate.with_target]  s    tF++++r@   c                     t          |t                    st                              |          }t          | |          S )N)r   )rL   r}   r   r   )r[   r   s     r>   with_contentz!ResolvedPromptUpdate.with_content`  s;    '#677 	<)227;;GtW----r@   )r^   r_   r`   re   rM   rf   ra   r   rz   r   r   r}   rF   r   r   r   r   r   r   r   r   r   rZ   r@   r>   r   r     s         
 MMM4MMMJ
+1#(5e#4#4#4G 444= D D DS	D !4'D
 D 
$	%D D D D6 @ @ @@ !4'@
 @ 
$	%@ @ @ @6 O O OS	CO !4'O
 O 
$	%O O O O,, , , , ,.$4 . . . . . .r@   r   c                   $    e Zd ZU eed<   eed<   dS )_TokenMatchrW   r   Nr   rZ   r@   r>   r   r   g  r   r@   r   r   	match_idsrW   c             #      K   t          |           }t          |          }|dk    rdS |||z
  dz   k     r<||z   }| ||         |k    rt          ||          V  |}n|dz  }|||z
  dz   k     :dS dS )zh
    Yield each occurrence of `match_ids` in `token_ids`.

    Note that empty matches are ignored.
    r   Nr-   )rW   r   )rr   r   )rC   r   rW   
prompt_len	match_lenr   s         r>   r   r   l  s       YJIIA~~
j9,q0
0
0i'Yw&'944	7CCCCCC  IINI j9,q0
0
0
0
0
0
0r@   new_idsc                 P   t          t           t                                        }d}t          | |          D ]D}|j        }|j        }|                    | ||                    |                    |           |}E|                    | |d                    t          |          S )z}
    Replace each occurrence of `match_ids` in `token_ids`
    with `new_ids`.

    Note that empty matches are ignored.
    r   N)rF   ra   r   rW   r   appendr   )rC   r   r   out_seqsprev_end_idxr   rW   r   s           r>   replace_token_matchesr     s     DI  HL#Iy99  O	-	,y"89:::   OOIlmm,---H%%%r@   c                       e Zd ZU eed<   eed<   eed<   ee         ed<   ej        dz  ed<   e	defd            Z
defd	ZdS )
PlaceholderFeaturesInfor   r   rW   tokensNr   r;   c                 *    t          | j                  S rY   )rr   r   r   s    r>   lengthzPlaceholderFeaturesInfo.length  s    4;r@   c                 D    t          | j        | j        | j                  S )N)offsetr   r   )r(   rW   r   r   r   s    r>   to_rangez PlaceholderFeaturesInfo.to_range  s*      >;]
 
 
 	
r@   )r^   r_   r`   rM   rf   ra   rF   r   r   r   r   r(   r   rZ   r@   r>   r   r     s         MMMMMMNNNIlT!!!!        X 
* 
 
 
 
 
 
r@   r   )r   rV   mm_prompt_updatesMultiModalPromptUpdatesr   current_result"MultiModalPromptUpdatesApplyResultc                   d }t          t          t          t          f         t          t          t          f         f                     }|                                D ]\  }}t          |          D ]k\  }	}
||         |	         t          |
          D ]G\  }}||	f|v r n;|                    | ||          D ]!}||j        }n||j        k    r||f|||	f<    Hlt          |                                d           }|t          j        k    rit          t                               }d}|D ]I}|\  }\  }}|j        |j        k    r|                    |           0|sd}|                    |           J|}||fS )Nr   c                     | d         d         S )Nr-   r   rZ   )items    r>   rm   z_find_matches.<locals>.<lambda>  s    471: r@   r   FT)dictrO   rM   ra   r   items	enumerater   r   sortedr   r   rF   _MatchToApplyrW   r   r   )rV   r   r9   r   r   r   
mm_matchesr   modality_updatesr   item_updates
update_idxupdater   matches_to_applymatches_to_apply_has_non_empty_matchesr  _s                      r>   _find_matchesr    s    #DeCHou->-C'DDEGGJ&7&=&=&?&?  ""&/0@&A&A 	 	"Hlh'1=&/&=&=  "
Fh':55E#00* 1    E |%{,, 7<j7HJ(34)	. j..006M6MNNN z!!! /11 %$ 	/ 	/D MAzq%-//!((....* /(,%!((...,!!!r@   mm_item_countsmm_found_countsc                 ^     t           fd|                                D                       S )Nc              3   6   K   | ]\  }}||         k    V  d S rY   rZ   ).0r   r   r  s      r>   	<genexpr>z#_all_items_found.<locals>.<genexpr>  sE        Hh 	N8,,     r@   )allr  )r  r  s   ` r>   _all_items_foundr    sI         "1"7"7"9"9     r@   c                    d |                                 D             }t          t          t          t                   z                       }d |                                 D             }d |                                 D             }t	          ||          r| g|fS d}	 t          | ||||          \  }}	|n|	D ]\  \  }
}\  }}||
         |         |         }|j        j        }|t          j	        k    r|j
        }n'|t          j        k    r|j        }nt          |           |                    | ||                    |                    t          | t                    rt!          ||          nt#          ||                     |||
         |<   |j
        }d |                                 D             }t	          ||          rn0|                    | |d                     t%          t          t&                   |          |fS )Nc                 4    i | ]\  }}|t          |          S rZ   rw   r  mr  s      r>   
<dictcomp>z"_apply_matches.<locals>.<dictcomp>  $    NNN5aUNNNr@   c                 <    i | ]\  }}|d gt          |          z  S rY   rw   r  s      r>   r  z"_apply_matches.<locals>.<dictcomp>  s8     6 6 6#+1eD6CJJ6 6 6r@   c                 H    i | ]\  }}|t          d  |D                        S )c              3      K   | ]}|d uV  	d S rY   rZ   r  rs     r>   r  z,_apply_matches.<locals>.<dictcomp>.<genexpr>  s&      **q}******r@   sumr  r  ress      r>   r  z"_apply_matches.<locals>.<dictcomp>  sB       /5q#3**c*****  r@   r   T)r   r   c                 H    i | ]\  }}|t          d  |D                        S )c              3      K   | ]}|d uV  	d S rY   rZ   r"  s     r>   r  z,_apply_matches.<locals>.<dictcomp>.<genexpr>6  s&      ..Q1D=......r@   r$  r&  s      r>   r  z"_apply_matches.<locals>.<dictcomp>5  sB     
 
 
391cAs..#.....
 
 
r@   )r  rF   rM   ra   r  r  r   r~   r   r   r   r   rW   r   r   rL   rQ   rS   r   r4   )rV   r   r9   r  r   
out_resultr  r   r   r  r   r   r   r
  matched_updatematched_contentend_idx_to_inserts                    r>   _apply_matchesr.    sg   
 ON4E4K4K4M4MNNNNC$s)O$&&H6 6/@/F/F/H/H6 6 6J
 9C9I9I9K9K  O 88 $x##L'!.%%"
 "
 "
 <9I 	) 	)5 Xx"55*.x8B:NN,49Oz((($)M!!+++$)O!!T"""OOF<0A#ABCCCOOfc**=	)_555 O<<  
 .8Jx * !=LL
 
=G=M=M=O=O
 
 
 NO<< 	O'R OOF<==)***R(##Z//r@   c                 L    t          | ||          \  }}t          |          |fS )
    Apply the updates in `mm_prompt_updates` to `prompt`.

    Matches are exclusive even when multiple modalities share
    the same placeholder tokens. In that case, the modality that
    appears earlier in `mm_prompt_updates` takes priority.
    )r.  r   )rV   r   r9   token_id_seqsresults        r>   apply_token_matchesr3  @  s.     +63DiPPM6M**F22r@   c                 X    t          | ||          \  }}d                    |          |fS )r0   )r.  join)rV   r   r9   textsr2  s        r>   apply_text_matchesr8  Q  s/     #6+<iHHME6775>>6!!r@   c           	   #     K   d |                                 D             }d |D             }t          ||          rdS t          |           }d}||k     rd}|                                 D ]\  }}	||         }
|
|                    |d          k    r(|	|
         D ]}|j        }t          ||j                  }t          |          }||z   }|dk    s||k    r?| ||         |k    rF|j        }| |||j                  }t          ||
|||          V  |}||xx         dz  cc<   d} n|rt          ||          r dS  n|s|dz  }||k     dS dS )	a&  
    Yield each set of placeholder tokens found in `prompt`.

    Matches are exclusive even when multiple modalities share
    the same placeholder tokens. In that case, the modality that
    appears earlier in `mm_prompt_updates` takes priority.

    Note that empty matches are ignored.
    c                 4    i | ]\  }}|t          |          S rZ   rw   r  s      r>   r  z&_iter_placeholders.<locals>.<dictcomp>p  r  r@   c                     i | ]}|d S r]   rZ   )r  r   s     r>   r  z&_iter_placeholders.<locals>.<dictcomp>q  s    JJJHHaJJJr@   Nr   F)r   r   rW   r   r   r-   T)	r  r  rr   getr   rS   r~   r   r   )rV   r   r9   r  item_idx_by_modalityr   rW   foundr   r  r   r  r   content_tokens_fullcontent_len_fullend_idx_fullcontent_is_embeds                    r>   _iter_placeholdersrC  b  s      ON4E4K4K4M4MNNNNJJ8IJJJ(<== VJI
j
 
 *;*A*A*C*C %	 %	&H&+H5H>--h::::*84   .&1)W\&J&J##&':#;#; (+;;#q((L:,E,E)L015HHH'.'7$'3+;+;Iw|+T+T(1!)!)"+2!1      !-I(222a7222 EE# I&  #N4HII FF	  	NIW j
 
 
 
 
 
 r@   c                 \    t          | ||          }t          t          |                    S rY   )rC  r  r   )rV   r   r9   its       r>   find_mm_placeholdersrF    s-    
 
F$5y	A	AB%b))***r@   _Ic                   .    e Zd ZU eed<   eed<   eed<   dS )MultiModalProcessingInfokwargshashesprompt_updatesN)r^   r_   r`   r&   rf   r"   r   rZ   r@   r>   rI  rI    s6         ))))++++++r@   rI  c                       e Zd ZdZdddedddedz  ddf fd	Zed
             Zed             Z	ddde
dedee
ef         dedz  def
dZdefdZde
deddfdZdedefdZededee
ef         dee
ef         fd            Zededee
ef         dedee         fd            Zdee         dee
ef         defdZ dedee
ef         dedefdZ!d e"e         d!edee
e"e#         f         fd"Z$dede%ee
ef         ee
ef         f         fd#Z&de
dee
ef         d$ee
ef         d%ee
ef         def
d&Z'd'e
dedee
ef         d(ee
ef         de(f
d)Z)d'e
dedee
ef         d(ee
ef         de%e"e         ee(f         f
d*Z*d'e
d(ee
ef         de"e         fd+Z+d,e"e         de"e         fd-Z,dedee
ef         d(ee
ef         defd.Z-de
e"e         z  dedee
ef         d(ee
ef         d/e(de%e"e         ee(f         fd0Z.dddedee
ef         d(ee
ef         dedz  de/f
d1Z0ded2ed3e/de%e1ef         fd4Z2d5e3d6ede3fd7Z4ded3e/d8e1d9ed:ede%e5ef         fd;Z6ddde
e"e         z  d2edee
ef         d(ee
ef         dedz  de%e"e         e7e(f         fd<Z8ddde
e"e         z  d2edee
ef         d(ee
ef         dedz  de%e"e         e7e(f         fd=Z9de"e         d!ede%e"e         e:f         fd>Z;de
d!ede%e
e:f         fd?Z<d@e"e         d!ede%e"e         ee
e"e#         f         f         fdAZ=d$e5dee
ef         ddfdBZ>dCedee
ef         ddfdDZ?dEee
e"e#         f         dee
ef         ddfdFZ@dedGe"e         d$e5d!edHe(de%e"e         ee
e"e#         f         f         fdIZA	 dKddde
e"e         z  dedee
ef         d(ee
ef         dz  dedz  defdJZB xZCS )LBaseMultiModalProcessorz
    Abstract base class to process multi-modal inputs to be used in vLLM.

    Not to be confused with `transformers.ProcessorMixin`.
    N)cacheinfodummy_inputszBaseDummyInputsBuilder[_I]rO  r;   c                   t                                                       || _        || _        || _        |                                 | _        | j                                        | _        | j        	                                | _
        d S rY   )super__init__rP  rQ  rO  _get_data_parserdata_parserget_supported_mm_limits_supported_mm_limitsget_allowed_mm_limits_allowed_mm_limits)r[   rP  rQ  rO  	__class__s       r>   rT  z BaseMultiModalProcessor.__init__  sy     		(
0022 %)I$E$E$G$G!"&)"A"A"C"Cr@   c                     | j         S rY   )rX  r   s    r>   supported_mm_limitsz+BaseMultiModalProcessor.supported_mm_limits  s    ((r@   c                     | j         S rY   )rZ  r   s    r>   allowed_mm_limitsz)BaseMultiModalProcessor.allowed_mm_limits  s    &&r@   mm_uuidsrV   mm_datahf_processor_mm_kwargsra  c                4    |                      ||||          S )Nr`  )apply)r[   rV   rb  rc  ra  s        r>   r\   z BaseMultiModalProcessor.__call__  s     zz&'+AHzUUUr@   c                     | j         j        j                                        }d}|j        r#| j         j        j                                        }t          |          S )a  
        Construct a parser to preprocess multi-modal data items
        before passing them to
        [`_get_hf_mm_data`][vllm.multimodal.processing.BaseMultiModalProcessor._get_hf_mm_data].

        You can support additional modalities by creating a subclass
        of [`MultiModalDataParser`][vllm.multimodal.parse.MultiModalDataParser]
        that has additional subparsers.
        N)expected_hidden_size)rP  ctxmodel_configget_multimodal_configenable_mm_embedsget_inputs_embeds_sizer,   )r[   	mm_configrg  s      r>   rU  z(BaseMultiModalProcessor._get_data_parser  sY     IM.DDFF	#% 	W#'9=#=#T#T#V#V #9MNNNNr@   r   	num_itemsc                     | j                             |d          }| j                            |d          }||}t          ||          }||k    r#d| d| d}||k    r|dz  }t	          |          d S )Nr   zAt most  z"(s) may be provided in one prompt.z4 Set `--limit-mm-per-prompt` to increase this limit.)r]  r<  r_  minrN   )r[   r   rn  supported_limitallowed_limitlimitmsgs          r>   validate_num_itemsz*BaseMultiModalProcessor.validate_num_items  s    
 266xCC.228Q??"+OO]33uQUQQXQQQCO++MMS//! r@   c                    | j                             |          }| j        j        j                                        }|j        sI|                                D ]4\  }}t          |t          t          f          rt          d| d          5|                                D ](\  }}|                     |t          |                     )|S )a3  
        Normalize
        [`MultiModalDataDict`][vllm.multimodal.inputs.MultiModalDataDict]
        to [`MultiModalDataItems`][vllm.multimodal.parse.MultiModalDataItems]
        before passing them to
        [`_get_hf_mm_data`][vllm.multimodal.processing.BaseMultiModalProcessor._get_hf_mm_data].
        z,You must set `--enable-mm-embeds` to input `z_embeds`)rV  parse_mm_datarP  rh  ri  rj  rk  r  rL   r*   r)   rN   rv  rr   )r[   rb  mm_itemsrm  r   r  s         r>   _to_mm_itemsz$BaseMultiModalProcessor._to_mm_items"  s     #11'::IM.DDFF	) 	#+>>#3#3  %en6H%IJJ $/$/ / /    (~~// 	: 	:OHe##Hc%jj9999r@   	hf_inputsc                     t           )z?Given the HF-processed data, output the metadata of each field.r   )r[   r{  rc  s      r>   _get_mm_fields_configz-BaseMultiModalProcessor._get_mm_fields_config=  s
     "!r@   ry  out_mm_kwargsc                     t           )aZ  
        Given the original multi-modal items for this modality
        and HF-processed data, output the updates to perform.

        The information returned by this method is used to update token inputs
        which bypass the HF processor. It is also used to update the output of
        HF processor if the HF process does not apply prompt updates to text
        inputs.

        Moreover, this information is critical to determine the token positions
        in order to construct
        [`PlaceholderRange`][vllm.multimodal.inputs.PlaceholderRange]
        for each multi-modal item.
        r   )r[   ry  rc  r~  s       r>   _get_prompt_updatesz+BaseMultiModalProcessor._get_prompt_updatesF  s
    * "!r@   rL  r  c                 :    fdt          |          D             S )Nc           
      x    i | ]5\  }|fd t                              |d                    D             6S )c                 .    g | ]fd D             S )c                 :    g | ]}|                               S rZ   )r   )r  r  r   s     r>   
<listcomp>zYBaseMultiModalProcessor._bind_and_group_updates.<locals>.<dictcomp>.<listcomp>.<listcomp>d  s%    @@@f))@@@r@   rZ   )r  r   updatess    @r>   r  zNBaseMultiModalProcessor._bind_and_group_updates.<locals>.<dictcomp>.<listcomp>c  s?        A@@@@@@  r@   r   )ranger<  )r  r   r  r  s     @r>   r  zCBaseMultiModalProcessor._bind_and_group_updates.<locals>.<dictcomp>b  sn     
 
 

 "'	      %n&8&81&E&E F F  
 
 
r@   )r   )r[   rL  r  s     `r>   _bind_and_group_updatesz/BaseMultiModalProcessor._bind_and_group_updates]  s8    

 
 
 

 &;>%J%J
 
 
 	
r@   c                 ^   |                      |||          }|                     ||                                          }|                                D ]W\  }}t	          |          D ]B\  }}	t          |	          dk    r*t                              dt          |          ||           CX|S )N)ry  rc  r~  r-   a  Detected %d prompt updates for `mm_items[%r][%s]`. Multiple prompt updates per item is now deprecated and may be removed in v0.13. Instead, please specify dynamic update targets in the same prompt update definition by passing a function to `PromptUpdate.target`.)r  r  get_all_countsr  r  rr   loggerwarning_once)
r[   ry  rc  r~  unbound_prompt_updatesr   r   rL  r   item_prompt_updatess
             r>   _get_mm_prompt_updatesz.BaseMultiModalProcessor._get_mm_prompt_updatesj  s     "&!9!9#9' ": "
 "
 !88"##%%
 

 ):(?(?(A(A 	 	$Hn1:>1J1J  --*++a//''? N++  
 
 
 ! r@   new_token_idsr   c                 V    | j                                         }t          |||          S rY   )rP  get_tokenizerrF  )r[   r  r   r9   s       r>   _find_mm_placeholdersz-BaseMultiModalProcessor._find_mm_placeholders  s*    
 I++--	#M3DiPPPr@   c                 T   t          t          t          f                     }t          t          t          f                     }|                                D ]P}|                    |                                           |                    |                                           Q||fS rY   )r  rM   objectr   r  get_processor_dataget_passthrough_data)r[   ry  processor_datapassthrough_datar  s        r>   _get_hf_mm_dataz'BaseMultiModalProcessor._get_hf_mm_data  s     c6k*,,V,..__&& 	B 	BE!!%":":"<"<===##E$>$>$@$@AAAA///r@   	mm_kwargs
tok_kwargsc           
          t          | j        j        d          5  | j        j                             | j        j        di |t          dd|i|t          di ||          cddd           S # 1 swxY w Y   dS )zc
        Call the HF processor on the prompt text and
        associated multi-modal data.
        hf_processorr:   NrZ   )r0   rP  rh  call_hf_processorget_hf_processorr  )r[   rV   rb  r  r  s        r>   _call_hf_processorz*BaseMultiModalProcessor._call_hf_processor  s     *$)-HH 	 	9=22*	*77Y77,,&,G,,//y/J// 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   A	A11A58A5prompt_texttokenization_kwargsc                 Z    t          d |                                D                        S )z
        Return whether the HF processor applies prompt updates.

        For most HF processors, this should be `True` when multi-modal
        data items are passed, but `False` when multi-modal embeddings
        are passed.
        c              3   N   K   | ] }t          |t          t          f          V  !d S rY   )rL   r*   r)   )r  r  s     r>   r  zHBaseMultiModalProcessor._hf_processor_applies_updates.<locals>.<genexpr>  sF       
 
 u~/ABCC
 
 
 
 
 
r@   )anyr   )r[   r  ry  rc  r  s        r>   _hf_processor_applies_updatesz5BaseMultiModalProcessor._hf_processor_applies_updates  sA      
 
!**
 
 
 
 
 
 	
r@   c                    |                      |          \  }}|                     ||||          }|                    |           |                    d                                          \  }|                     ||||          }	|||	fS )z
        Apply the HF processor on the prompt text and multi-modal data
        together.

        In addition, return whether prompt updates have been applied.
        )rV   rb  r  r  	input_idsr  ry  rc  r  )r  r  r  poptolistr  )
r[   r  ry  rc  r  r  r  processed_data
prompt_idsis_update_applieds
             r>   _apply_hf_processor_text_mmz3BaseMultiModalProcessor._apply_hf_processor_text_mm  s     ,0+?+?+I+I((00",*	 1 
 
 	.///&**;77>>@@ >>##9 3	 ? 
 
 >+<<<r@   c                 Z    |                      |t          i           i |          \  }}}|S )z
        Apply the HF processor on the prompt text only.

        Since HF processor requires that text and multi-modal items
        correspond to each other, we create dummy multi-modal items
        to go along with the text.
        r  )r  r+   )r[   r  r  r  r  s        r>   _apply_hf_processor_text_onlyz5BaseMultiModalProcessor._apply_hf_processor_text_only  sA      ;;#(,,#% 3	 < 
 

Aq r@   prompt_tokensc                     |S )a  
        Apply the HF processor on the prompt tokens only.

        Most HF processors accept prompt text but not prompt tokens.
        If the HF processor adds or removes tokens that are not related to
        multi-modal data, you should override this method so it is consistent
        with the output of
        [`_apply_hf_processor_text_only`][vllm.multimodal.processing.BaseMultiModalProcessor._apply_hf_processor_text_only]
        on the
        corresponding text.
        rZ   )r[   r  s     r>   _apply_hf_processor_tokens_onlyz7BaseMultiModalProcessor._apply_hf_processor_tokens_only   s
     r@   c                     |                                 }|                     | j                            |          |||          \  }}}|S )aK  
        Apply the HF processor on the multi-modal data only.

        Since HF processor requires that text and multi-modal items
        correspond to each other, we generate dummy text using
        [`DummyInputsBuilder`][vllm.multimodal.processing.BaseDummyInputsBuilder]
        to go along with the multi-modal data.
        r  )r  r  rQ  get_dummy_text)r[   ry  rc  r  	mm_countsr  mm_processed_datas          r>   _apply_hf_processor_mm_onlyz3BaseMultiModalProcessor._apply_hf_processor_mm_only  s]     ++--	"&"B"B)88CC#9 3	 #C #
 #
a ! r@   enable_hf_prompt_updatec                    t          |t                    r2|r|                     ||||          S |                     ||          }n|                     |          }|                     |||          }||dfS )a  
        Apply the HF processor on the prompt text and multi-modal data.

        In addition, return whether prompt updates have been applied
        (for most HF processors, this should be `True`).

        Note:
            If `enable_hf_prompt_update=False`, we use HF processor
            to perform prompt updates if available; HF processor requires
            that the prompt corresponds to multi-modal items.
        r  )ry  rc  r  F)rL   rM   r  r  r  r  )r[   rV   ry  rc  r  r  r  r  s           r>   _apply_hf_processor_mainz0BaseMultiModalProcessor._apply_hf_processor_main*  s    ( fc"" 	F& 77 &%+A(;	 8    ;;FDWXXJJ==fEEJ <<#9 3 = 
 
 ,e33r@   c          
         | j         j        i }|pi }|                                D ]\  }|v r|         }t          |t                    r|g}g }t          |                                          D ][\  }	}
||	         }|sr3||n|
}
|                    t          j	        ddi|
i           F|                    |           \||<   fd|D             |<   |S )zCreate MM hashes to be returned.


        Note: When overrides are provided via callers of `apply`,
        `_hash_mm_items` will be bypassed and the overrides will be used.
        Nmodel_idc                 B    g | ]}t          j        dd i|iS )r  rZ   )r   hash_kwargs)r  r  rc  r   r  r  s     r>   r  z:BaseMultiModalProcessor._hash_mm_items.<locals>.<listcomp>  sf     $ $ $  %0  !)#T* 1 .	 $ $ $r@   rZ   )
rP  r  r  rL   rM   r  get_all_items_for_hashr   r   r  )r[   ry  rc  r  ra  rK  r  mm_uuids_per_modalitycomputedir  	item_uuidr   r  s     ``        @@r>   _hash_mm_itemsz&BaseMultiModalProcessor._hash_mm_itemsS  s    9%#%>r'~~// +	 +	OHe8##(0(:%3S99 D-B,C) ')()E)E)G)GHH 3 3GAt 5a 8I ")1 *. * -6,Ayyt ,8  )1#+T"2 #9 #6	     !	2222#+x  $ $ $ $ $ $ $ !&$ $ $x   r@   mm_data_items	mm_hashesc           	      t   fd|                                 D             }d |                                 D             }i }|                                 D ]L\  }}g }	|D ]=}
||         |
         }|t          d| d|
 d          |	                    |           >|	||<   M||                     |          fS )Nc                 B    i | ]\  }}|                     |          S rZ   )	is_cached)r  r   rK  rO  s      r>   r  zDBaseMultiModalProcessor._get_cache_missing_items.<locals>.<dictcomp>  s8     
 
 
2B(FHeoof--
 
 
r@   c                 H    i | ]\  }}|d  t          |          D              S )c                     g | ]	\  }}||
S rZ   rZ   )r  idxitem_is_cacheds      r>   r  zOBaseMultiModalProcessor._get_cache_missing_items.<locals>.<dictcomp>.<listcomp>  s2       'C%  r@   )r  )r  r   items_is_cacheds      r>   r  zDBaseMultiModalProcessor._get_cache_missing_items.<locals>.<dictcomp>  sR     
 
 
 */   +4_+E+E  
 
 
r@   zCache miss for z
 at index z but data is not provided.)r  rN   r   rz  )r[   rO  r  r  mm_is_cachedmm_missing_idxsmm_missing_datar   idxsmissing_modality_datar  datas    `          r>   _get_cache_missing_itemsz0BaseMultiModalProcessor._get_cache_missing_items  s#   
 
 
 
FOooFWFW
 
 

 
 .:-?-?-A-A
 
 
 -3355 	> 	>NHd$&! 7 7$X.s3<$5( 5 5c 5 5 5  
 *006666(=OH%%T..????r@   cached_updatenew_item_idxc                 $    t          ||          S )z
        Override this if other attributes of `ResolvedPromptUpdate`
        also need to be recomputed after retrieving from the cache.
        )r   r   )r[   r  r  s      r>   _recompute_cached_prompt_updatez7BaseMultiModalProcessor._recompute_cached_prompt_update  s     }|<<<<r@   r  mm_missing_kwargsmm_missing_prompt_updatesc                     |                                 D ]}|D ]}|                    |           t          t          t          f         d           }t          t          t
          t          d z           f         t
                    }	t          t          t
          t          t                            f         t
                    }
|	                                D ]\  }}|
                    |g           }|
                    |g           }t          |          D ]\  }||                  s-||         }||         }||         }||xx         dz  cc<   ||f}nd }|                    ||          \  }}|	|                             |           |
|                              fd|D                        t          |	          }t          |
          }||fS )Nc                      dS rk   rZ   rZ   r@   r>   rm   z:BaseMultiModalProcessor._merge_mm_kwargs.<locals>.<lambda>  s    A r@   r-   c                 <    g | ]}                     |          S rZ   )r  )r  r  r   r[   s     r>   r  z<BaseMultiModalProcessor._merge_mm_kwargs.<locals>.<listcomp>  s9       " <<VXNN  r@   )r   touch_sender_cache_itemr   rM   ra   rF   r$   r   r   r  r<  r  get_and_update_itemr   r%   r  )r[   rO  r  r  r  r  rK  	item_hashmm_missing_next_idxmerged_kwargsmerged_prompt_updatesr   missing_kwargsmissing_prompt_updatesmissing_next_idxmissing_kwargs_itemmissing_updates_itemr  rJ  r  r  r   r   s   `                     @r>   _merge_mm_kwargsz(BaseMultiModalProcessor._merge_mm_kwargs  s)     &&(( 	9 	9F# 9 9	--i88889 *#s(3II>>#C.BT.I)J$JKDQQ +Ch?S6T1U,U V!
 !
 !* 1 1 	 	Hf.228R@@N%>%B%B8R%P%P"'0'8'8  #)#H-h7 	 ':8'D$*89I*J'+ABR+S('111Q6111.0DDDDD"'";";D)"L"Lh'..v666%h/66    &-     , *-88	 !677+++r@   c                   |                      ||||d          \  }}}t          j        ||                     ||                    }	t	          | j        j        d          5  |                     ||||          }
d d d            n# 1 swxY w Y   |                     |||	          }t          |	|
|          }|||fS )NTrV   ry  rc  r  r  hashingr`  rJ  rK  rL  )
r  r%   from_hf_inputsr}  r0   rP  rh  r  r  rI  )r[   rV   r  rc  r  ra  r  r  r  r  r  r   mm_infos                r>   _apply_hf_processorz+BaseMultiModalProcessor._apply_hf_processor  sF    ))"#9 3$( * 
 
		
 *8&&'8:PQQ
 
	 *$)-CC 	 	++&#!	 ,  I	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 !77"
 
 +,
 
 
 7$555s   "BBBc                4   | j         }|                     |          \  }}||r|                     |||||          S t          | j        j        d          5  |                     ||||          }	ddd           n# 1 swxY w Y   t          | j        j        d          5  |                     |||	          \  }
}ddd           n# 1 swxY w Y   |                     ||||d          \  }}}t          j
        ||                     ||                    }|                     |||          }t          | j        j        d          5  |                     ||	|
||	          \  }}ddd           n# 1 swxY w Y   t          ||	|
          }|||fS )zy
        Apply the HF processor on the full prompt text,
        caching the results and reusing cached results.
        N)rV   r  rc  r  ra  r  r`  cache_lookup)rO  r  r  Fr  )r  r  r  r  r  )rO  r  r  r0   rP  rh  r  r  r  r%   r  r}  r  r  rI  )r[   rV   r  rc  r  ra  rO  r  r  r  r  mm_missing_data_itemsr  mm_missing_processed_datar  r  r  r  r   r  s                       r>   _cached_apply_hf_processorz2BaseMultiModalProcessor._cached_apply_hf_processor&  s    
"22=AA=,=+++'=$7! ,    *$)-CC 	 	++&#!	 ,  I	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 *$)-HH 	 	262O2O+# 3P 3 3/L/	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ))*#9 3$) * 
 
		
% 2@%&&)+A 
 
 %)$?$?!"%
 %
! *$)-HH 	 	+/+@+@#)"3*C ,A , ,(I(	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 +,
 
 
 7$555s6   A>>BB#CCCE88E<?E<c                 V    | j                                         }t          |||          S rY   )rP  r  r3  r[   rV   r   r9   s       r>   _apply_token_matchesz,BaseMultiModalProcessor._apply_token_matches{  s*    
 I++--	"6+<iHHHr@   c                 V    | j                                         }t          |||          S rY   )rP  r  r8  r  s       r>   _apply_text_matchesz+BaseMultiModalProcessor._apply_text_matches  s*    
 I++--	!&*;YGGGr@   rC   c                    | j                                         }|                     ||          \  }}t          d |                                D                       s;|                     t          ||d          |          \  }}t          ||d          }t          t          t          t          t                            f         t                    }|                                D ][\  }}	t          |	          D ]F\  }
}|J d|d|
 d            ||                             ||         |
         |         g           G\|                     |t#          |                    }||fS )Nc              3   H   K   | ]}t          d  |D                       V  dS )c              3      K   | ]}|d uV  	d S rY   rZ   )r  r
  s     r>   r  zJBaseMultiModalProcessor._apply_prompt_updates.<locals>.<genexpr>.<genexpr>  s'      EE:
$&EEEEEEr@   N)r  )r  update_idxss     r>   r  z@BaseMultiModalProcessor._apply_prompt_updates.<locals>.<genexpr>  sM       
 
 EEEEEEE
 
 
 
 
 
r@   FrH   z0Failed to apply prompt replacement for mm_items[z][])rP  r  r  r  r   r  rQ   rS   r   rM   rF   r   r   r  r  r   r  r  )r[   rC   r   r9   r  match_resultnew_textmatched_updatesr   r  r   r
  placeholderss                r>   _apply_prompt_updatesz-BaseMultiModalProcessor._apply_prompt_updates  s   
 I++--	&*&?&?'
 '
#|  
 
+2244
 
 
 
 
 		N &*%=%=)Y%@@@!& &"Hl
 (	8uMMMM%c49M0N+O&OPQUVV%1%7%7%9%9 		 		!Hk(1+(>(>  $*!--: (: :.6: : : .--
  )00&x0::FG    11!!
 

 l**r@   c                     |                                 D ]Z\  }}|                    |g           }t          |          |k    r,t          d| d| d| d| dt          |           d          [d S )NExpected there to be rp  z- items in keyword arguments corresponding to z data items, but only found z! There is likely a problem with your implementation of merged multi-modal processor for this model (usually arising from an inconsistency between `_call_hf_processor` and `_get_mm_fields_config`).r  r<  rr   RuntimeError)r[   r  r  r   
item_countr  s         r>   _validate_mm_kwargsz+BaseMultiModalProcessor._validate_mm_kwargs  s    
 %3$8$8$:$: 	 	 HjMM(B//E5zzZ''"IJ I I I I:DI II I=@ZZI I I   (	 	r@   
mm_updatesc                     |                                 D ]W\  }}|                    |g           }t          |          |k    r)t          d| d| d| dt          |           d	          Xd S )Nr
  z! prompt updates corresponding to rp   items, but instead found z prompt updates! This is likely because you forgot to include input placeholder tokens (e.g., `<image>`, `<|image_pad|>`) in the prompt. If the model has a chat template, make sure you have applied it before calling `LLM.generate`.r  )r[   r  r  r   r  r  s         r>   _validate_mm_updatesz,BaseMultiModalProcessor._validate_mm_updates  s    
 %3$8$8$:$: 	 	 Hj%>>(B77L<  J.."NJ N N(2N N5=N N%(%6%6N N N   /	 	r@   mm_placeholdersc                     |                                 D ]W\  }}|                    |g           }t          |          |k    r)t          d| d| d| dt          |           d	          Xd S )Nr
  z& prompt placeholders corresponding to rp  r  z prompt placeholders! Make sure the implementation of `_call_hf_processor` and `_get_mm_fields_config` are consistent with each other.r  )r[   r  r  r   r  r  s         r>   _validate_mm_placeholdersz1BaseMultiModalProcessor._validate_mm_placeholders  s    
 %3$8$8$:$: 
	 
	 Hj*..x<<L<  J.."NJ N N(2N N5=N N%(%6%6N N N   /
	 
	r@   r  r  c                 F   |                                 }|                     ||           |                     ||           |r-|                     ||          }|                     ||           n/|                     ||          \  }}|                     ||           ||fS rY   )r  r  r  r  r  r  )r[   ry  r  r  r   r  r  r  s           r>   _maybe_apply_prompt_updatesz3BaseMultiModalProcessor._maybe_apply_prompt_updates  s     "0022  N;;;!!"3^DDD 	L"88! O **?NKKKK*.*D*D!+ +'J **?NKKK?**r@   c                   t                      }|| j        j                            |           |                     |          }|i }|                     |||||          \  }}	}
t          | j        j        d          5  |                     |||	j        |	j	        |
          \  }}ddd           n# 1 swxY w Y   d |
                                D             }t          d||	j        |	j        |          S )a"  
        Process multi-modal inputs to be used in vLLM.

        The main steps are:

        1. Apply HF Processor on prompt text and multi-modal data together,
           outputting token IDs and processed tensors.
        2. Find and update sequences in the token IDs with placeholder tokens.
           The number of placeholder tokens equals the feature size of the
           multi-modal data outputted by the multi-modal encoder.
        3. Extract information about the placeholder tokens from the
           processed token IDs.
        N)r  ra  prompt_update)ry  r  r  r   r  c                 .    i | ]\  }}|d  |D             S )c                 6    g | ]}|                                 S rZ   )r   )r  r  s     r>   r  z<BaseMultiModalProcessor.apply.<locals>.<dictcomp>.<listcomp>G  s     @@@4t}}@@@r@   rZ   )r  r   r  s      r>   r  z1BaseMultiModalProcessor.apply.<locals>.<dictcomp>F  s=     !
 !
 !
&, @@<@@@!
 !
 !
r@   
multimodal)typeprompt_token_idsr  r  r  )r/   rP  rh  create_timing_statsrz  r  r0   r  rJ  rL  r  r#   rK  )r[   rV   rb  rc  r  ra  
request_idry  r  r  r  r  mm_placeholder_rangess                r>   re  zBaseMultiModalProcessor.apply  sy   , ,--
!IM--j999$$W--&"$ ++" 3 , 
 
		
 *$)-II 	 	*.*J*J!%!.")"8"3 +K + +'J	 	 	 	 	 	 	 	 	 	 	 	 	 	 	!
 !
*9*?*?*A*A!
 !
 !

  'nn1
 
 
 	
s   (B55B9<B9rY   )Dr^   r_   r`   re   rG  r3   rT  r   r]  r_  rM   r   r
   r  r'   r#   r\   r,   rU  ra   rv  r+   rz  r   r2   r!   r}  r%   r   r   r  r   r  r  rF   r   r  rO   r  r  boolr  r  r  r  r  r  r"   r  MultiModalIsCachedr  r   r  r&   r  rI  r  r  r   r  r  r  r  r  r  r  re  __classcell__r[  s   @r>   rN  rN    s
         6:D D DD 3D
 ,d2D 
D D D D D D& ) ) X) ' ' X' /3V V VV $V !(V 4	V %t+V 
V V V VO"6 O O O O("" " 
	" " " "*# 
   6 "" !(V 4" 
++	,	" " " ^" "%" !(V 4" -	"
 
,	" " " ^",
 .
  S)
 
!	
 
 
 
 !% ! !(V 4 ! -	 !
 
! !  !  !  !DQCyQ 3Q 
d233	4	Q Q Q Q0%0 
wsF{#WS&[%99	:0 0 0 0
 f% 3;' CK( 
   (

 &
 !(V 4	

 %S&[1
 

 
 
 
& = = & = !(V 4	 =
 %S&[1 = 
tCy,,	- =  =  =  =D %S&[1 
c	   *Cy 
c   "!%! !(V 4! %S&[1	!
 
! ! ! !2'4d3i'4 &'4 !(V 4	'4
 %S&[1'4 "&'4 
tCy,,	-'4 '4 '4 '4^ /3@ @ @%@ !(V 4@ %S&[1	@ %t+@ 
@ @ @ @D @+ @ + @ $	 @
 
!#66	7 @  @  @  @D	=+	= 	= 
		= 	= 	= 	=1,+1, $1, )	1,
 11, $;1, 
,.EE	F1, 1, 1, 1,t /3/6 /6 /6d3i/6 +/6 !(V 4	/6
 %S&[1/6 %t+/6 
tCy2D8	9/6 /6 /6 /6p /3S6 S6 S6d3iS6 +S6 !(V 4	S6
 %S&[1S6 %t+S6 
tCy2D8	9S6 S6 S6 S6jIS	I 3I 
tCy<<	=	I I I IHH 3H 
s66	7	H H H H2+92+ 32+ 
tCy'#t,C'D"DEE	F	2+ 2+ 2+ 2+h0  S) 
	   &+  S) 
	   & d+B&C!CD  S) 
	   "+%+ I+ 1	+
 3+  + 
tCy'#t,C'D"DEE	F+ + + +@ <@@
 /3@
 @
 @
d3i@
 $@
 !(V 4	@

 %S&[1D8@
 %t+@
 
@
 @
 @
 @
 @
 @
 @
 @
r@   rN  c                   &    e Zd Zedeee         z  dedeee         z  fd            Zdeee         z  dedeee         z  fdZ	deee         z  dede
fdZ	 ddd	deee         z  ded
eeef         deeef         dz  dedz  def fdZ xZS )EncDecMultiModalProcessorrV   rb  r;   c                     t           )z
        Create input prompt for the encoder. HF processor will be applied on
        this prompt during profiling and generation.
        r   r[   rV   rb  s      r>   create_encoder_promptz/EncDecMultiModalProcessor.create_encoder_promptU  s
     "!r@   c                     |S )z$Create input prompt for the decoder.rZ   r)  s      r>   create_decoder_promptz/EncDecMultiModalProcessor.create_decoder_prompta  s	     r@   encoder_inputsc                     | j                                         }|                     ||          }t          |t                    r|                    |d          }n|}t          dd|d         i|}||d<   |S )NFr7   encoder_prompt_token_idsr  rZ   )rP  r  r,  rL   rM   r=   r    )r[   rV   rb  r-  r9   decoder_prompt_rawdecoder_prompt_ids	mm_inputss           r>   _get_enc_dec_inputsz-EncDecMultiModalProcessor._get_enc_dec_inputsi  s     I++--	!77HH(#.. 	4!*!1!1"u "2 " " "4* 
 
%34F%G

 
	 );	$%r@   Nr`  rc  r  ra  c                    |                      ||          }t                                          |||||          }|                     |||          S )a;  
        Process multi-modal inputs to be used in vLLM.
        The main processing steps are modified to fit encoder-decoder model:
        1. Create encoder prompt from input prompt text.
        2. Apply the HF processor on encoder prompt.
        3. Copy the input prompt text as decoder prompt inputs.
        r`  )rV   rb  r-  )r*  rS  re  r3  )	r[   rV   rb  rc  r  ra  encoder_promptr-  r[  s	           r>   re  zEncDecMultiModalProcessor.apply  sm      33FGDD" ' 
 
 '') ( 
 
 	
r@   rY   )r^   r_   r`   r   rM   rF   ra   r   r*  r,  r#   r3  r
   r  r'   r    re  r$  r%  s   @r>   r'  r'  T  sw       	"d3i	" $	" 
tCy		" 	" 	" ^	"d3i $ 
tCy	   d3i $ )	   6 <@
 /3
 
 
d3i
 $
 !(V 4	

 %S&[1D8
 %t+
 
 
 
 
 
 
 
 
 
 
 
r@   r'  )yabcr   r   collectionsr   collections.abcr   r   r   r	   r
   r   dataclassesr   r   r   enumr   	functoolsr   typingr   r   r   r   r   r   regexr   r   typing_extensionsr   r   vllm.loggerr   vllm.tokenizersr   vllm.utils.collection_utilsr   r   hasherr   inputsr   r    r!   r"   r#   r$   r%   r&   r'   r(   parser)   r*   r+   r,   contextr.   r/   r0   rQ  r1   %transformers.feature_extraction_utilsr2   rO  r3   r  r^   r  rM   rF   ra   r4   r5   rf   r"  r?   rO   rG   rQ   rS   rU   rc   rh   rz   r{   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r.  r3  r8  rC  rF  r#  r   r   rG  rI  rN  r'  rZ   r@   r>   <module>rG     s3   $ # # # # # # # # # # # # # # W W W W W W W W W W W W W W W W 1 1 1 1 1 1 1 1 1 1                                 3 3 3 3 3 3 3 3 # # # # # # ) ) ) ) ) ) F F F F F F F F % % % % % %                                           
 1 0 0 0 0 0 *BBBBBB4444444L#) 	X		WT3S	"" T#Y	9 & & & 3 4
  $	I I II
I 	I
 
#YI I I I 4
 !&	V V VVS#XV 	V
 	V V V V 	1 1 1t#1	1 	1
 	1 1 1 1, 	  t#	 	
 
#Y   $    X    $ $ $ $ $ $ $ $-O -O -O -O -O -O -O -O` $k1i 1 1 1 !)#)< = L I L L L ?@ ?@ ?@ ?@ ?@'"+ ?@ ?@ ?@D (*== ) = = = "*3%1A*A!BEU!U Y U U U    d   
 4
 4
 4
 4
 4
3 4
 4
 4
n A! A! A! A! A!l A! A! A!H I" I" I" I" I" I" I" I"X    x   # # # # #x # # #
 WT),<<===:(2, :9S$r(]3K : : : :    
   
 $U. U. U. U. U. U. U. U.p    *    	  CyCy 	
 {   :&Cy&Cy& #Y& 
#Y	& & & &6 
 
 
 
 
 
 
 
* eCHou->-C'DDE 4" 4" 4"4"04" t#4"
 4" 94" :d=1124" 4" 4" 4"ncN#s(^ 
   ?0?00?0 t#?0 4899:	?0 ?0 ?0 ?0D3I303 t#3 49::;	3 3 3 3"""0" t#" 3445	" " " ""BIB0B t#B %&	B B B BJ+I+0+ t#+ S$.//0	+ + + + #tDz/* 
 "#tH5I,J'K"KL 
 &-S$sTz2B-B%C " WT+,,,, , , , ,z , , ,D
 D
 D
 D
 D
c72; D
 D
 D
NH
 H
 H
 H
 H
 7 ; H
 H
 H
 H
 H
r@   