
    -`i;                        U d dl Z d dlZd dlZd dlmZmZ d dlmZmZ d dl	m
Z
mZmZ d dlmZmZmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZ d d	lmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% d d
lm&Z' d dlm(Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z0 d dl1m2Z2 d dl3m4Z4m5Z5m6Z6 d dl7m8Z8m9Z9 d dl:m;Z; d dl<m=Z= d dl>m?Z? d dl@mAZA d dlBmCZCmDZDmEZE d dlFmGZGmHZHmIZImJZJmKZKmLZL d dlMmNZN d dlOmPZPmQZQ d dlRmSZS d dlTmUZU d dlVmWZW er	d dlXZXd dlYZYn* eWd eZ            d          ZY eWd eZ            d          ZX e?e[          Z\de]fd Z^ G d! d"e_          Z`d#d$d%d&Za G d' d(e9d)*          Zb G d+ d,e9d)*          Zc G d- d.e9d)*          Zd G d/ d0e9d)*          Ze G d1 d2e9d)*          Zf G d3 d4e9d)*          Zg G d5 d6e4          Zh G d7 d8e9d)*          Zi G d9 d:e9d)*          Zj G d; d<e9d)*          Zk G d= d>e9d)*          Zl G d? d@e9d)*          Zme'ecz  e z  egz  e!z  eiz  ejz  edz  eez  ekz  elz  e]z  emz  Z&eendA<    G dB dCe9d)*          Zoe)eoz  e0z  Z(eendD<    G dE dFe9d)*          ZpedG         ZqedH         ZredI         Zs edJ          Zt G dK dLeI          ZudMeveXjw                 dNeNfdOZxdPeveye]dQf                  dNeNfdRZzdSe]dPeve         dNeNfdTZ{dUeEdVeDdWeEfdXZ|dVeDdWeve]         fdYZ} G dZ d[eeet                   Z~d\eye]eveee]dz  f                  f         dNeNd]eye]eve]         f         dWeeDeEf         fd^Z G d_ d`e~eee]dz  f                            Z G da dbe~e
eee]dz  f                                     Z G dc dde          Z G de dfe          Z G dg dhe          Zdiee]z  dz  fdjZd)dkdiee]z  dz  dledWe]dz  fdmZ ee          Zd)dkdiee]z  dz  dledWe]dz  fdnZdoeye]evf         dpeve]         dWe]fdqZdoeye]evf         dpeve]         dredWe]fdsZ eee"          Z eeed          Z eeee          Z eee           Z eee!          Z eeei          Z eeem          Z e6e          j        Z e6ec          j        Z e6eg          j        Z e6e-          j        Ze]eye]e]f         z  e+z  ehz  Zeendt<   du dv dw dx dy dz d{ d| d} d~ d d d dZeye]ee&gef         f         end<   de&dWee]ef         fdZdZde]dee&         de~dedredWevep         fdZde&dededredWedz  f
dZ eee          Z eee%          Zde(de~derdredWevep         f
dZdevep         dWdfdZdeve(         de=derdWeevep         eDdz  eEdz  f         fdZdeve(         de=derdWeevep         eDdz  eEdz  f         fdZdevep         fdZdde]fdZdS )    N)ABCabstractmethod)Counterdefaultdict)	AwaitableCallableIterable)cached_property	lru_cachepartial)
accumulate)Path)TYPE_CHECKINGAnyGenericLiteral	TypeAliasTypeVarcast)#ChatCompletionAssistantMessageParam#ChatCompletionContentPartImageParam(ChatCompletionContentPartInputAudioParam%ChatCompletionContentPartRefusalParam"ChatCompletionContentPartTextParamChatCompletionFunctionToolParam"ChatCompletionMessageToolCallParamChatCompletionToolMessageParam)ChatCompletionContentPartParam)ChatCompletionMessageParam)
InputAudio)ResponseInputImageParam)Message)Image)	BaseModel
ConfigDictTypeAdapter)Required	TypedDict)envs)ModelConfig)init_logger)SupportsMultiModal)MULTIMODAL_REGISTRYMultiModalDataDictMultiModalUUIDDict)MultiModalBatchedFieldMultiModalFlatFieldMultiModalSharedFieldVisionChunkVisionChunkImageVisionChunkVideo)BaseMultiModalProcessor)MEDIA_CONNECTOR_REGISTRYMediaConnectorrandom_uuid)
is_list_of)
LazyLoadertransformerstorchnamec                     | dk    r$ddl m} t          j        dt          d           |S t          dt          d|           )	Nresolve_hf_chat_templater   )resolve_chat_templatez`vllm.entrypoints.chat_utils.resolve_hf_chat_template` has been moved to `vllm.renderers.hf.resolve_chat_template`. The old name will be removed in v0.16.   )
stacklevelzmodule z has no attribute )vllm.renderers.hfrB   warningswarnDeprecationWarningAttributeError__name__)r?   rB   s     o/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/entrypoints/chat_utils.py__getattr__rL   E   sm    )));;;;;;5 	
 	
 	
 	
 %$
I8IIII
J
JJ    c                       e Zd ZdZdS )ChatTemplateResolutionErrorzRaised when chat template resolution fails.

    This is a subclass of ValueError for backward compatibility with
    existing exception handlers.
    N)rJ   
__module____qualname____doc__ rM   rK   rO   rO   V   s           rM   rO   z<##IMAGE##>z<##AUDIO##>z<##VIDEO##>)imageaudiovideoc                   &    e Zd ZU ee         ed<   dS )AudioURLurlNrJ   rP   rQ   r'   str__annotations__rS   rM   rK   rX   rX   e   &         	# rM   rX   F)totalc                   H    e Zd ZU ee         ed<   eed                  ed<   dS )#ChatCompletionContentPartAudioParam	audio_urltypeN)rJ   rP   rQ   r'   rX   r\   r   rS   rM   rK   r`   r`   l   9         !!!!
7;'
((((''rM   r`   c                   l    e Zd ZU eeeef         z  dz  ed<   	 eed                  ed<   	 edz  ed<   dS ))ChatCompletionContentPartImageEmbedsParamNimage_embedsrb   uuidrJ   rP   rQ   r[   dictr\   r'   r   rS   rM   rK   re   re   s   d         S#X&----
 7>*
++++'
* rM   re   c                   l    e Zd ZU eeeef         z  dz  ed<   	 eed                  ed<   	 edz  ed<   dS ))ChatCompletionContentPartAudioEmbedsParamNaudio_embedsrb   rg   rh   rS   rM   rK   rl   rl      rj   rM   rl   c                   &    e Zd ZU ee         ed<   dS )VideoURLrY   NrZ   rS   rM   rK   ro   ro      r]   rM   ro   c                   H    e Zd ZU ee         ed<   eed                  ed<   dS )#ChatCompletionContentPartVideoParam	video_urlrb   N)rJ   rP   rQ   r'   ro   r\   r   rS   rM   rK   rq   rq      rc   rM   rq   c                   @    e Zd ZU dZej        ed<    ed          ZdS )PILImagez#
    A PIL.Image.Image object.
    	image_pilT)arbitrary_types_allowedN)rJ   rP   rQ   rR   r#   r\   r%   model_configrS   rM   rK   rt   rt      s<           {:d;;;LLLrM   rt   c                   4    e Zd ZU dZedz  ed<   edz  ed<   dS )(CustomChatCompletionContentPILImageParamzA simpler version of the param that only accepts a PIL image.

    Example:
    {
        "image_pil": ImageAsset('cherry_blossom').pil_image
    }
    Nru   rg   )rJ   rP   rQ   rR   rt   r\   r[   rS   rM   rK   ry   ry      sB           $
* rM   ry   c                   4    e Zd ZU dZedz  ed<   edz  ed<   dS )+CustomChatCompletionContentSimpleImageParamzA simpler version of the param that only accepts a plain image_url.
    This is supported by OpenAI API, although it is not documented.

    Example:
    {
        "image_url": "https://example.com/image.jpg"
    }
    N	image_urlrg   rJ   rP   rQ   rR   r[   r\   rS   rM   rK   r{   r{      sB           Tz
* rM   r{   c                   $    e Zd ZU dZedz  ed<   dS )+CustomChatCompletionContentSimpleAudioParamzA simpler version of the param that only accepts a plain audio_url.

    Example:
    {
        "audio_url": "https://example.com/audio.mp3"
    }
    Nra   r}   rS   rM   rK   r   r      s.           TzrM   r   c                   4    e Zd ZU dZedz  ed<   edz  ed<   dS )+CustomChatCompletionContentSimpleVideoParamzA simpler version of the param that only accepts a plain audio_url.

    Example:
    {
        "video_url": "https://example.com/video.mp4"
    }
    Nrr   rg   r}   rS   rM   rK   r   r      sB           Tz
* rM   r   c                   Z    e Zd ZU dZee         ed<   	 eed<   	 eed                  ed<   dS )!CustomThinkCompletionContentParamzA Think Completion Content Param that accepts a plain text and a boolean.

    Example:
    {
        "thinking": "I am thinking about the answer",
        "closed": True,
        "type": "thinking"
    }
    thinkingclosedrb   N)	rJ   rP   rQ   rR   r'   r[   r\   boolr   rS   rM   rK   r   r      sS           smLLL)
7:&
''''rM   r   r   c                       e Zd ZU dZee         ed<   	 eee         z  ed<   	 eed<   	 edz  ed<   	 e	e
         dz  ed<   	 edz  ed<   	 ee         dz  ed	<   dS )
 CustomChatCompletionMessageParamz0Enables custom roles in the Chat Completion API.rolecontentr?   Ntool_call_id
tool_calls	reasoningtools)rJ   rP   rQ   rR   r'   r[   r\   listr   r	   r   r   rS   rM   rK   r   r     s         ::
3-+4677777&
III *7;<tCCCCHTz9/047777''rM   r   r   c                       e Zd ZU ee         ed<   	 edz  eeeef                  z  ed<   	 edz  ed<   	 edz  ed<   	 ee	         dz  ed<   	 edz  ed<   	 edz  ed<   	 ee
         dz  ed	<   dS )
ConversationMessager   Nr   r   r?   r   r   reasoning_contentr   )rJ   rP   rQ   r'   r[   r\   r   ri   r	   r   r   rS   rM   rK   r   r   5  s         
3-+4Z$tCH~.....%*7
**;<tCCCCHTz9Tz!!!E/047777''rM   r   )autostringopenai)r   r   )rT   rU   rV   rf   rm   vision_chunk_Tc                       e Zd ZdS )_BatchedSingleItemFieldN)rJ   rP   rQ   rS   rM   rK   r   r   ]  s        DrM   r   tensorsmm_processorc                 <   | d         }|j         j        j                                        }t	          |           dk    rW|j        dk    rL|j        d         dk    r;|j        d         |k    r*t                              d           t          d          S |j        t          fd| D                       rt                      S d | D             }dgt          |          fd	t          t	          |                    D             }t          |
          S )Nr         zBatched multi-modal embedding inputs are deprecated for Chat API. Please pass a separate content part for each multi-modal item.)
batch_sizec              3   .   K   | ]}|j         k    V  d S N)shape).0tfirst_shapes     rK   	<genexpr>z _detect_field.<locals>.<genexpr>u  s*      
3
3a17k!
3
3
3
3
3
3rM   c                 ,    g | ]}t          |          S rS   )len)r   tensors     rK   
<listcomp>z!_detect_field.<locals>.<listcomp>x  s    777VS[[777rM   c                 P    g | ]"}t          |         |d z                      f#S )r   )slice)r   i
slice_idxss     rK   r   z!_detect_field.<locals>.<listcomp>z  s@       78z!}jQ/	0	02  rM   )slices)infoctxrw   get_inputs_embeds_sizer   ndimr   loggerwarningr   allr0   r   ranger1   )r   r   
first_itemhidden_sizesize_per_itemr   r   r   s         @@rK   _detect_fieldr   a  sJ    J#'4KKMMK 	GOq  Q1$$R K//M	
 	
 	
 '!4444"K

3
3
3
37
3
3
333 (%'''77w777M0j//0J   <A#mBTBT<U<U  F f----rM   
data_itemsztorch.Tensorc                 p     si S t           d                                                   t          fd dd          D                       rt          d           fdD              fd                                D             }	                     t          j        |          i           fdD             fdD             }|D ].                             fd	 D             d
          |<   /n*# t          $ r t                              d           Y nw xY w|S )Nr   c              3   b   K   | ])}t          |                                          k    V  *d S r   )setkeys)r   item
first_keyss     rK   r   z _merge_embeds.<locals>.<genexpr>  s8      
E
Ed3tyy{{z)
E
E
E
E
E
ErM   r   zCAll dictionaries in the list of embeddings must have the same keys.c                 L    i | ]t          fd D                        S )c                      g | ]
}|         S rS   rS   r   r   keys     rK   r   z,_merge_embeds.<locals>.<dictcomp>.<listcomp>  s    ===$DI===rM   )r   )r   r   r   r   s    @rK   
<dictcomp>z!_merge_embeds.<locals>.<dictcomp>  sK        	]====*===|LL  rM   c                 `    i | ])\  }|                     fd D             d          *S )c                      g | ]
}|         S rS   rS   r   s     rK   r   z,_merge_embeds.<locals>.<dictcomp>.<listcomp>  s     B B Btc B B BrM   F
pin_memory)_reduce_data)r   fieldr   r   s     @rK   r   z!_merge_embeds.<locals>.<dictcomp>  sX       C 	U B B B Bz B B BuUU  rM   c                 ,    i | ]}||         j         S rS   )r   )r   r   parsed_configss     rK   r   z!_merge_embeds.<locals>.<dictcomp>  s#    NNNCnS17NNNrM   c                 n    g | ]1}|         |         k    t          |         t                    /|2S rS   )
isinstancer   )r   r   fieldsparsed_fieldss     rK   r   z!_merge_embeds.<locals>.<listcomp>  sP     
 
 
s}S111"6#;0GHH 2  211rM   c                      g | ]
}|         S rS   rS   r   s     rK   r   z!_merge_embeds.<locals>.<listcomp>  s    222tc222rM   Fr   zKError when parsing merged embeddings. Falling back to auto-detected fields.)r   r   any
ValueErroritems_get_mm_fields_configr=   BatchFeaturer   	Exceptionr   	exception)	r   r   data_mergedkeys_to_updater   r   r   r   r   s	   ``  @@@@@rK   _merge_embedsr     s     	Z]''))**J

E
E
E
Ejn
E
E
EEE 
Q
 
 	
      F    ,,..  K

%;;%k22
 
 ONNN:NNN
 
 
 
 
!
 
 
 " 	 	C,S1>>2222z222u  ?    K	  
 
 
4	
 	
 	
 	
 	

 s   A6D $D32D3modalityc                 l   t          |          dk    r|S t          d |D                       r|S t          |t          j                  r)|  dfd|D             }t          ||                   S t          |t                    rt          ||          S t          t          |                    )Nr   c              3      K   | ]}|d u V  	d S r   rS   )r   r   s     rK   r   z#_get_embeds_data.<locals>.<genexpr>  s&      
/
/D44<
/
/
/
/
/
/rM   _embedsc                     g | ]}|iS rS   rS   )r   r   
embeds_keys     rK   r   z$_get_embeds_data.<locals>.<listcomp>  s    @@@Tz4(@@@rM   )	r   r   r;   r>   Tensorr   ri   NotImplementedErrorrb   )r   r   r   
dict_itemsr   s       @rK   _get_embeds_datar     s    
 :!

/
/J
/
/
/// *el++ C )))
@@@@Z@@@
Z66zBB*d## 7Z666
d:..
/
//rM   mm_uuidsmm_datareturnc                     |                     d          }|| S t          |           }g }|D ]E}t          |t                    sJ |                     d          }||                    |           F|r||d<   |S )as  Rebuild mm_uuids after vision_chunk processing.

    When videos are split into chunks, the original UUIDs need to be updated
    to reflect the new UUIDs generated for each chunk.

    Args:
        mm_uuids: Original UUIDs dictionary
        mm_data: Processed multimodal data with vision_chunk items

    Returns:
        Updated UUIDs dictionary with chunk UUIDs
    r   Nrg   )getri   r   append)r   r   vision_chunks	new_uuidsvision_chunk_uuidsr   uuid_vals          rK   rebuild_mm_uuids_from_mm_datar     s      KK//MXI 0 0$%%%%%88F##%%h/// 7$6	.!rM   c                     |                      d          }|g S t          t                    }|D ]y}t          |t                    sJ |                     d          dk    rG|                     dd          }|                     dd          }||                             |           zg }t          |                                          D ]0}|                    d                    ||                              1|S )	a   Build video prompts from vision_chunk data.

    Collects prompts from video chunks and groups them by video_idx.

    Args:
        mm_data: Processed multimodal data with vision_chunk items

    Returns:
        List of video prompts, one per video.
    r   Nrb   video_chunk	video_idxr   prompt )	r   r   r   r   ri   r   sortedr   join)r   r   video_prompts_dictr   r   r   video_promptss          rK    build_video_prompts_from_mm_datar     s    KK//M	 0;4/@/@ 9 9$%%%%%88F},,a00IXXh++Fy)00888 M.335566 E E	RWW%7	%BCCDDDDrM   c                       e Zd ZdZdef fdZedefd            Ze	defd            Z
edee         fd            Ze	d             Ze	d	             Ze	d
             Zed             Zdedededz  fdZedd            Z xZS )BaseMultiModalItemTrackerz
    Tracks multi-modal items in a given request and ensures that the number
    of multi-modal items in a given request does not exceed the configured
    maximum per prompt.
    rw   c                 .   t                                                       || _        t          t          t
          t                   f         t
                    | _        t          t          t
          t                   f         t
                    | _        d S r   )	super__init___model_configr   r[   r   r   _items_by_modality_modality_order)selfrw   	__class__s     rK   r  z"BaseMultiModalItemTracker.__init__  sa    )"-c48m"<T"B"B*3S	>:4@@rM   r   c                 8    t          | j        j        dd          S )zDCheck if model uses unified vision_chunk modality for images/videos.use_unified_vision_chunkF)getattrr  	hf_configr  s    rK   !use_unified_vision_chunk_modalityz;BaseMultiModalItemTracker.use_unified_vision_chunk_modality!  s     t)35OQVWWWrM   c                     | j         S r   )r  r  s    rK   rw   z&BaseMultiModalItemTracker.model_config&  s    !!rM   c                 n    ddl m}  || j                  }t          t          t
                   |          S )Nr   )get_model_cls) vllm.model_executor.model_loaderr  rw   r   rb   r,   )r  r  	model_clss      rK   r  z#BaseMultiModalItemTracker.model_cls*  s<    BBBBBB!M$"344	D+,i888rM   c                     | j         j        S r   )r  allowed_local_media_pathr  s    rK   r  z2BaseMultiModalItemTracker.allowed_local_media_path1  s    !::rM   c                     | j         j        S r   )r  allowed_media_domainsr  s    rK   r  z/BaseMultiModalItemTracker.allowed_media_domains5  s    !77rM   c                     t           S r   )r-   r  s    rK   mm_registryz%BaseMultiModalItemTracker.mm_registry9  s    ""rM   c                 @    | j                             | j                  S r   )r  create_processorrw   r  s    rK   r   z&BaseMultiModalItemTracker.mm_processor=  s    001BCCCrM   r   r   Nc                    |                     dd          }|}| j        o|dv }|r d}t          | j        |                   dz   }nt          | j        |                   dz   }| j                            ||           |rA| j        |                             |           | j        d                             |           n | j        |                             |           | j        	                    ||          S )z
        Add a multi-modal item to the current prompt and returns the
        placeholder string to use, if any.

        An optional uuid can be added which serves as a unique identifier of the
        media.
        r   r   )rV   rT   r   r   )
replacer  r   r  r   validate_num_itemsr   r  r  get_placeholder_str)r  r   r   input_modalityoriginal_modalityuse_vision_chunk	num_itemss          rK   addzBaseMultiModalItemTracker.addA  s    ")))R88$2 8!%77 	  	L ,ND3NCDDqHIID34EFGG!KI,,^YGGG  	D#N3::4@@@ 0778IJJJJ#$56==dCCC~11(IFFFrM   BaseMultiModalContentParserc                     t           r   r   r  s    rK   create_parserz'BaseMultiModalItemTracker.create_parserf      !!rM   r   r%  )rJ   rP   rQ   rR   r*   r  r
   r   r  propertyrw   rb   r,   r  r  r  r  r   ModalityStrr   r[   r$  r   r(  __classcell__r  s   @rK   r   r     s        A[ A A A A A A X4 X X X _X "k " " " X" 94 23 9 9 9 _9 ; ; X; 8 8 X8 # # X# D D _D#GK #Gr #GcDj #G #G #G #GJ " " " ^" " " " "rM   r   items_by_modalityvision_chunk_modality_orderc                 @   d| v rd| v rt          d          d| v rd| v rt          d          i }i }d| v r9t          dd | d         D             |          |d<   d | d         D             |d<   d| v r*d	 | d         D             |d<   d
 | d         D             |d<   d| v r9t          dd | d         D             |          |d<   d | d         D             |d<   d| v r*d | d         D             |d<   d | d         D             |d<   d| v r*d | d         D             |d<   d | d         D             |d<   d| v r`| d         }|                    dg           }d | d         D             |d<   d t          |          D             }t	          |          t	          |          k    s+J dt	          |           dt	          |           d            g }d}	t          |          D ]\  }
\  }}||
         }|\  }}|t	          |d                   k     r|nd }|dk    rSt          |d          r-|j        }|                    t          d||                     v|                    |           |dk    rt          |d          r|	 |pt                      }t          |t                    rt	          |          dk    r	|d         }n|}|                    |          }t          |          D ]=\  }
}|                    t          d|d         | d|
 |	|d                               >|	dz  }	R# t          $ r;}t                              d!|           |                    |           Y d }~d }~ww xY w|                    |           ||d<   ||fS )"NrT   rf   z4Mixing raw image and embedding inputs is not allowedrU   rm   z4Mixing raw audio and embedding inputs is not allowedc                     g | ]\  }}|S rS   rS   r   datarg   s      rK   r   z"_resolve_items.<locals>.<listcomp>z      FFFjdDTFFFrM   c                     g | ]\  }}|S rS   rS   r3  s      rK   r   z"_resolve_items.<locals>.<listcomp>}      VVVjdDTVVVrM   c                     g | ]\  }}|S rS   rS   r3  s      rK   r   z"_resolve_items.<locals>.<listcomp>      NNNZT4DNNNrM   c                     g | ]\  }}|S rS   rS   r3  s      rK   r   z"_resolve_items.<locals>.<listcomp>      OOOjdDTOOOrM   c                     g | ]\  }}|S rS   rS   r3  s      rK   r   z"_resolve_items.<locals>.<listcomp>  r5  rM   c                     g | ]\  }}|S rS   rS   r3  s      rK   r   z"_resolve_items.<locals>.<listcomp>  r7  rM   c                     g | ]\  }}|S rS   rS   r3  s      rK   r   z"_resolve_items.<locals>.<listcomp>  r9  rM   c                     g | ]\  }}|S rS   rS   r3  s      rK   r   z"_resolve_items.<locals>.<listcomp>  r;  rM   rV   c                     g | ]\  }}|S rS   rS   r3  s      rK   r   z"_resolve_items.<locals>.<listcomp>  r9  rM   c                     g | ]\  }}|S rS   rS   r3  s      rK   r   z"_resolve_items.<locals>.<listcomp>  r;  rM   r   c                     g | ]\  }}|S rS   rS   r3  s      rK   r   z"_resolve_items.<locals>.<listcomp>  s)     $
 $
 $
T4D$
 $
 $
rM   c                      g | ]\  }}|||fS r   rS   )r   idxr   s      rK   r   z"_resolve_items.<locals>.<listcomp>  s0     
 
 
T $KrM   zvision_chunk items (z) and modality_order (z) must have same lengthr   media)rb   rT   rg   split_video_chunksr   r   -r   )rb   r   rg   r   r   z Failed to split video chunks: %s)r   r   r   	enumerater   hasattrrE  r   r4   r:   r   tuplerF  r5   r   r   r   )r/  r   r0  r   r   vision_chunk_itemsmodality_orderfiltered_itemsprocessed_chunksr   r   rD  r   inner_modalityr4  rg   r   
image_data
video_uuid
video_datavideo_chunksvces                          rK   _resolve_itemsrV  k  s   
 ###:K(K(KOPPP###:K(K(KOPPPGH***+FF$5n$EFFF
 

 WV4En4UVVV###NN3DW3MNNNOO4Eg4NOOO***+FF$5n$EFFF
 

 WV4En4UVVV###NN3DW3MNNNOO4Eg4NOOO###NN3DW3MNNNOO4Eg4NOOO*** /~>488LL$
 $
#4^#D$
 $
 $
 

 
&'9::
 
 
 >""c.&9&9999L3~#6#6 L L">22L L L :99
 /1	'77 +	2 +	2NA{T+A.NJD$"S.)A%B%BBBttH(( 4)) 2!%J$++(gZhWWW    %++D11117** <)=>> 24CS6%-%>
%dE22 .s4yyA~~)-aJJ)-J'3'F'Fz'R'R%.|%<%< 	 	EAr,33 0)602=0A,6)<)<)<)<.7+-h<!" !" !"    "Q		$ 6 6 6'I1MMM(//555555556 %++D111"2Hs   B*L88
M=0M88M=c                   >    e Zd Zdeedz  edz  f         fdZddZdS )MultiModalItemTrackerr   Nc                 n    | j         sdS t          t          | j                   | j        | j                  S )NNN)r  rV  ri   r   r  r  s    rK   resolve_itemsz#MultiModalItemTracker.resolve_items  s>     & 	:())4+<d>R
 
 	
rM   r%  c                      t          |           S r   )MultiModalContentParserr  s    rK   r(  z#MultiModalItemTracker.create_parser  s    &t,,,rM   r*  rJ   rP   rQ   rJ  r.   r/   r[  r(  rS   rM   rK   rX  rX    sZ        
	!D(*<t*CC	D
 
 
 
- - - - - -rM   rX  c                   >    e Zd Zdeedz  edz  f         fdZddZdS )AsyncMultiModalItemTrackerr   Nc                    K   | j         sdS d | j                                         D              d {V }t          || j        | j                  S )NrZ  c                 B   K   i | ]\  }}|t          j        |  d {V S r   )asynciogather)r   r   coross      rK   r   z<AsyncMultiModalItemTracker.resolve_items.<locals>.<dictcomp>  sK       &
 &
 &
% GNE2222222&
 &
 &
rM   )r  r   rV  r   r  )r  resolved_items_by_modalitys     rK   r[  z(AsyncMultiModalItemTracker.resolve_items  s       & 	:&
 &
#'#:#@#@#B#B&
 &
 &
 &
 &
 &
 &
 &
 &
"
 &(94;O
 
 	
rM   r%  c                      t          |           S r   )AsyncMultiModalContentParserr  s    rK   r(  z(AsyncMultiModalItemTracker.create_parser  s    +D111rM   r*  r^  rS   rM   rK   r`  r`    sZ        
	!D(*<t*CC	D
 
 
 
2 2 2 2 2 2rM   r`  c                       e Zd Zd fdZdededz  fdZdeeef         fdZ	e
ddedz  d	edz  ddfd
            Ze
	 ddeeeef         z  dz  d	edz  ddfd            Ze
	 ddej        dz  d	edz  ddfd            Ze
ddedz  d	edz  ddfd            Ze
	 ddedz  d	edz  ddfd            Ze
	 ddeeeef         z  dz  d	edz  ddfd            Ze
ddedz  d	edz  ddfd            Z xZS )r%  r   Nc                 z    t                                                       t          t                    | _        d S r   )r  r  r   r   _placeholder_storage)r  r  s    rK   r  z$BaseMultiModalContentParser.__init__  s2     6A5F5F!!!rM   r   placeholderc                 h    t           |         }|r"| j        |                             |           d S d S r   )MODALITY_PLACEHOLDERS_MAPrk  r   )r  r   rl  mod_placeholders       rK   _add_placeholderz,BaseMultiModalContentParser._add_placeholder  sC    3H= 	K%o6==kJJJJJ	K 	KrM   c                 *    t          | j                  S r   )ri   rk  r  s    rK   mm_placeholder_storagez2BaseMultiModalContentParser.mm_placeholder_storage  s    D-...rM   r|   rg   c                     t           r   r'  )r  r|   rg   s      rK   parse_imagez'BaseMultiModalContentParser.parse_image  r)  rM   rf   c                     t           r   r'  )r  rf   rg   s      rK   parse_image_embedsz.BaseMultiModalContentParser.parse_image_embeds  
     "!rM   ru   c                     t           r   r'  )r  ru   rg   s      rK   parse_image_pilz+BaseMultiModalContentParser.parse_image_pil  
     "!rM   ra   c                     t           r   r'  )r  ra   rg   s      rK   parse_audioz'BaseMultiModalContentParser.parse_audio!  r)  rM   input_audioc                     t           r   r'  )r  r}  rg   s      rK   parse_input_audioz-BaseMultiModalContentParser.parse_input_audio%  rz  rM   rm   c                     t           r   r'  )r  rm   rg   s      rK   parse_audio_embedsz.BaseMultiModalContentParser.parse_audio_embeds+  rw  rM   rr   c                     t           r   r'  )r  rr   rg   s      rK   parse_videoz'BaseMultiModalContentParser.parse_video3  r)  rM   )r   Nr   )rJ   rP   rQ   r  r,  r[   rp  ri   r   rr  r   rt  rv  r#   ry  r|  r    r  r  r  r-  r.  s   @rK   r%  r%    s       	G 	G 	G 	G 	G 	GK K3: K K K K
/S$Y / / / / " "S4Z "sTz "T " " " ^"   " "DcN*T1" Dj" 
	" " " ^" @D" "t+"36:"	" " " ^"
 " "S4Z "sTz "T " " " ^" AE" "%,"47$J"	" " " ^"
   " "DcN*T1" Dj" 
	" " " ^" " "S4Z "sTz "T " " " ^" " " " "rM   r%  c                   p    e Zd Zdeddf fdZedefd            Zddedz  dedz  ddfdZ		 dd	ee
eef         z  dz  dedz  ddfd
Z	 ddee
eef         z  dz  dedz  ddfdZ	 ddej        dz  dedz  ddfdZddedz  dedz  ddfdZ	 ddedz  dedz  ddfdZddedz  dedz  ddfdZ xZS )r]  trackerr   Nc                     t                                                       || _        | j        j        j        }t          |dd           }t          j        t          j	        ||j
        |j                  | _        d S Nmedia_io_kwargs)r  r  r  r  r  _trackerrw   multimodal_configr  r7   loadr)   VLLM_MEDIA_CONNECTORr  r  
_connectorr  r  r  r  r  s       rK   r  z MultiModalContentParser.__init__9  ss     M6H!"35FMM*B*G%+%,%E")"?	+
 +
 +
rM   c                     | j         j        S r   r  rw   r  s    rK   rw   z$MultiModalContentParser.model_configG      }))rM   r|   rg   c                     |r| j                             |          nd }| j                            d||f          }|                     d|           d S NrT   )r  fetch_imager  r$  rp  )r  r|   rg   rT   rl  s        rK   rt  z#MultiModalContentParser.parse_imageK  X    :CM++I666m''%??g{33333rM   rf   c                      j                                         }|j        st          d          t	          |t
                    r= fd|                                D             } j                            d||f          }t	          |t                    r7 j
                            |          } j                            d||f          }| j                            dd |f          }                     d|           d S )N9You must set `--enable-mm-embeds` to input `image_embeds`c                 L    i | ] \  }}|j                             |          !S rS   r  fetch_image_embeddingr   kvr  s      rK   r   z>MultiModalContentParser.parse_image_embeds.<locals>.<dictcomp>]  ?       Aq 4?88;;  rM   rf   rT   )rw   get_multimodal_configenable_mm_embedsr   r   ri   r   r  r$  r[   r  r  rp  )r  rf   rg   	mm_configembedsrl  	embeddings   `      rK   rv  z*MultiModalContentParser.parse_image_embedsQ  s    
 %;;==	) 	K   lD)) 	L   (..00  F -++NVTNKKKlC(( 	O==lKKI-++NY<MNNK-++NT4LIIKg{33333rM   rm   c                      j                                         }|j        st          d          t	          |t
                    r> fd|                                D             } j                            d||f          }njt	          |t                    r8 j
                            |          } j                            d||f          }n j                            dd |f          }                     d|           d S )N9You must set `--enable-mm-embeds` to input `audio_embeds`c                 L    i | ] \  }}|j                             |          !S rS   r  fetch_audio_embeddingr  s      rK   r   z>MultiModalContentParser.parse_audio_embeds.<locals>.<dictcomp>x  r  rM   rm   rU   )rw   r  r  r   r   ri   r   r  r$  r[   r  r  rp  )r  rm   rg   r  r  rl  r  s   `      rK   r  z*MultiModalContentParser.parse_audio_embedsl  s   
 %;;==	) 	K   lD)) 
	J   (..00  F -++NVTNKKKKc** 	J==lKKI-++NY<MNNKK-++NT4LIIKg{33333rM   ru   c                 l    | j                             d||f          }|                     d|           d S r  )r  r$  rp  )r  ru   rg   rl  s       rK   ry  z'MultiModalContentParser.parse_image_pil  s<     m'')T1BCCg{33333rM   ra   c                     |r| j                             |          nd }| j                            d||f          }|                     d|           d S NrU   )r  fetch_audior  r$  rp  )r  ra   rg   rU   rl  s        rK   r|  z#MultiModalContentParser.parse_audio  r  rM   r}  c                     |r:|                     dd          }|                     dd          }|r	d| d| }nd }nd }|                     ||          S Nr4  r   formatzdata:audio/z;base64,r   r|  r  r}  rg   
audio_dataaudio_formatra   s         rK   r  z)MultiModalContentParser.parse_input_audio  u      		$44J&??8R88L !L,LL
LL		 !		I	4000rM   rr   c                     |r| j                             |          nd }| j                            d||f          }|                     d|           d S )N)rr   rV   )r  fetch_videor  r$  rp  )r  rr   rg   rV   rl  s        rK   r  z#MultiModalContentParser.parse_video  s\    DMW++i+@@@SWm''%??g{33333rM   r   )rJ   rP   rQ   rX  r  r+  r*   rw   r[   rt  ri   rv  r  r#   ry  r|  r    r  r  r-  r.  s   @rK   r]  r]  8  s9       
 5 
$ 
 
 
 
 
 
 *k * * * X*4 4S4Z 4sTz 4T 4 4 4 4  4 4DcN*T14 Dj4 
	4 4 4 4<  4 4DcN*T14 Dj4 
	4 4 4 44 AE4 4t+436:4	4 4 4 44 4S4Z 4sTz 4T 4 4 4 4 BF1 1%,147$J1	1 1 1 1 4 4S4Z 4sTz 4T 4 4 4 4 4 4 4 4rM   r]  c                       e Zd Zdeddf fdZedefd            Zdedz  dedz  fdZ	ddedz  dedz  ddfd	Z
	 dd
eeeef         z  dz  dedz  ddfdZ	 ddeeeef         z  dz  dedz  ddfdZ	 ddej        dz  dedz  ddfdZdedz  dedz  fdZddedz  dedz  ddfdZ	 ddedz  dedz  ddfdZdedz  dedz  fdZddedz  dedz  ddfdZ xZS )rh  r  r   Nc                     t                                                       || _        | j        j        j        }t          |dd           }t          j        t          j	        ||j
        |j                  | _        d S r  r  r  s       rK   r  z%AsyncMultiModalContentParser.__init__  ss     M6H!"35FMM*B*G%+%,%E")"?	+
 +
 +
rM   c                     | j         j        S r   r  r  s    rK   rw   z)AsyncMultiModalContentParser.model_config  r  rM   r|   rg   c                 V   K   |r | j                             |           d {V nd }||fS r   )r  fetch_image_async)r  r|   rg   rT   s       rK   _image_with_uuid_asyncz3AsyncMultiModalContentParser._image_with_uuid_async  G      BKU$/33I>>>>>>>>>QU 	 d{rM   c                     |                      ||          }| j                            d|          }|                     d|           d S r  )r  r  r$  rp  )r  r|   rg   cororl  s        rK   rt  z(AsyncMultiModalContentParser.parse_image  J    **9d;;m''66g{33333rM   rf   c                      j                                         }|j        st          d          t	          j        t          t          j        t          t          t          j        f         z  d z  t          d z  f                              }t          |t                    r7 fd|                                D             }|                    ||f           t          |t                    r1 j                            |          }|                    ||f           ||                    d |f            j                            d|          }                     d|           d S )Nr  c                 L    i | ] \  }}|j                             |          !S rS   r  r  s      rK   r   zCAsyncMultiModalContentParser.parse_image_embeds.<locals>.<dictcomp>  r  rM   rf   rT   )rw   r  r  r   rc  FuturerJ  r>   r   ri   r[   r   r   
set_resultr  r  r  r$  rp  )r  rf   rg   r  futurer  r  rl  s   `       rK   rv  z/AsyncMultiModalContentParser.parse_image_embeds  m   
 %;;==	) 	K   %,c5<&7!884?tKL
   lD)) 	.   (..00  F vtn---lC(( 	1==lKKIy$/000tTl+++m''??g{33333rM   rm   c                      j                                         }|j        st          d          t	          j        t          t          j        t          t          t          j        f         z  d z  t          d z  f                              }t          |t                    r7 fd|                                D             }|                    ||f           t          |t                    r1 j                            |          }|                    ||f           ||                    d |f            j                            d|          }                     d|           d S )Nr  c                 L    i | ] \  }}|j                             |          !S rS   r  r  s      rK   r   zCAsyncMultiModalContentParser.parse_audio_embeds.<locals>.<dictcomp>  r  rM   rm   rU   )rw   r  r  r   rc  r  rJ  r>   r   ri   r[   r   r   r  r  r  r  r$  rp  )r  rm   rg   r  r  r  r  rl  s   `       rK   r  z/AsyncMultiModalContentParser.parse_audio_embeds  r  rM   ru   c                 @   t          j        t          t          j        d z  t          d z  f                              }|r|                    ||f           n|                    d |f           | j                            d|          }|                     d|           d S r  )	rc  r  rJ  r#   r[   r  r  r$  rp  )r  ru   rg   r  rl  s        rK   ry  z,AsyncMultiModalContentParser.parse_image_pil  s    
 ekD&8#*&D EFHH 	,y$/0000tTl+++m''88g{33333rM   ra   c                 V   K   |r | j                             |           d {V nd }||fS r   )r  fetch_audio_async)r  ra   rg   rU   s       rK   _audio_with_uuid_asyncz3AsyncMultiModalContentParser._audio_with_uuid_async  r  rM   c                     |                      ||          }| j                            d|          }|                     d|           d S r  )r  r  r$  rp  )r  ra   rg   r  rl  s        rK   r|  z(AsyncMultiModalContentParser.parse_audio  r  rM   r}  c                     |r:|                     dd          }|                     dd          }|r	d| d| }nd }nd }|                     ||          S r  r  r  s         rK   r  z.AsyncMultiModalContentParser.parse_input_audio   r  rM   rr   c                 V   K   |r | j                             |           d {V nd }||fS r   )r  fetch_video_async)r  rr   rg   rV   s       rK   _video_with_uuid_asyncz3AsyncMultiModalContentParser._video_with_uuid_async0  r  rM   c                     |                      ||          }| j                            d|          }|                     d|           d S )NrV   )r  r  r$  rp  )r  rr   rg   r  rl  s        rK   r  z(AsyncMultiModalContentParser.parse_video6  r  rM   r   )rJ   rP   rQ   r`  r  r+  r*   rw   r[   r  rt  ri   rv  r  r#   ry  r  r|  r    r  r  r  r-  r.  s   @rK   rh  rh    s       
 : 
t 
 
 
 
 
 
 *k * * * X*cDj d
    4 4S4Z 4sTz 4T 4 4 4 4  4 4DcN*T14 Dj4 
	4 4 4 4F  4 4DcN*T14 Dj4 
	4 4 4 4F  4 4;%4 Dj4 
	4 4 4 4cDj d
    4 4S4Z 4sTz 4T 4 4 4 4 BF1 1%,147$J1	1 1 1 1 cDj d
    4 4S4Z 4sTz 4T 4 4 4 4 4 4 4 4rM   rh  chat_templatec                      dS t           t                    r#                                 st          d          t           t                    r{d}t           fd|D                       sXt                                                     s9ddlm} | z  }|                                st          d  d  d	|           dS dS dS t          t                      d
          )z5Raises if the provided chat template appears invalid.Nz-the supplied chat template path doesn't exist{}
c              3       K   | ]}|v V  	d S r   rS   r   cr  s     rK   r   z)validate_chat_template.<locals>.<genexpr>H  s(      <<1A&<<<<<<rM   r   CHAT_TEMPLATES_DIRz#The supplied chat template string (z/) appears path-like, but doesn't exist! Tried:  and z" is not a valid chat template type)r   r   existsFileNotFoundErrorr[   r   /vllm.transformers_utils.chat_templates.registryr  r   	TypeErrorrb   )r  JINJA_CHARSr  builtin_template_paths   `   rK   validate_chat_templater  =  s_   	M4	(	( T1E1E1G1G T OPPP	M3	'	' T<<<<<<<<<	''..00	
      %7$F!(//11  J- J J+J J2GJ J  	 	 	 	  4..RRRSSSrM   
is_literalr  c          
           d S |r&t           t                    rt          d           S 	 t                     5 }|                                cd d d            S # 1 swxY w Y   d S # t
          $ r}t           t                    r d}t           fd|D                       syddlm} | z  }	 t          |          5 }|                                cd d d            cY d }~S # 1 swxY w Y   n,# t
          $ r d  d  d| d	| }t          |          |w xY wt           d
          cY d }~S d }~ww xY w)Nz<chat_template is expected to be read directly from its valuer  c              3       K   | ]}|v V  	d S r   rS   r  s     rK   r   z&_load_chat_template.<locals>.<genexpr>t  s(      ;;!1%;;;;;;rM   r   r  zThe supplied chat template (z=) looks like a file path, but it failed to be opened. Tried: r  z
. Reason: Tr  )r   r   r  openreadOSErrorr   r  r  r   _load_chat_template)r  r  frU  r  r  r  msgs   `       rK   r  r  \  sv   
 t mT** 	N   C-   	A6688	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 C C CmT** 	;;;;{;;;;; 	-      %7$F!
-/00 $A6688$ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ - - -#= # #+# #2G# #  !# #  !oo1,- #=TBBBBBBBBB5Cs   A, AA, A##A, &A#'A, ,
D=6>D85C6C*C6$D=*C.	.C61C.	2C65D86)DD82D=8D=c                $    t          | |          S )Nr  )_cached_load_chat_template)r  r  s     rK   load_chat_templater    s    
 &m
KKKKrM   placeholder_storagetextsc                     t          |          D ]'\  }}|| v r| |                             d          ||<   (d                    |          S )Nr   
)rH  popr   )r  r  rD  elems       rK   _get_interleaved_text_promptr    sZ     u%% : :	T&&&,T266q99E#J99UrM   interleave_stringsc                    t          d |                                 D                       }|rt          | |          }nd                    |          }g }|D ]}||xx         |                    |          z  cc<   ||         dk     rIt
                              d|           t
                              d|           t          d| d          |	                    |g||         z             d                    ||gz             S )z;Combine multimodal prompts for a multimodal language model.c                     g | ]	}|D ]}|
S rS   rS   )r   r  r  s      rK   r   z4_get_full_multimodal_text_prompt.<locals>.<listcomp>  s%    BBBtTBBBBBBrM   r  r   zPlaceholder count is negative! Ensure that the 'interleave_strings' flag is disabled (current value: %s) when manually placing image placeholders.zInput prompt: %szFound more 'zA' placeholders in input prompt than actual multimodal data items.)
r   valuesr  r   countr   errordebugr   extend)r  r  r  placeholder_countstext_promptmissing_placeholdersrl  s          rK    _get_full_multimodal_text_promptr    sS    !BB*1133BBB   '23FNNii&& ')) U U;''';+<+<[+I+II'''k*Q..LL< #   LL+[9990{ 0 0 0  
 	##[M4F{4S$STTTT 99)[M9:::rM   _ContentPartc                 H    t          |                               dd           S Ntext_TextParserr   parts    rK   <lambda>r    s    T**..vt<< rM   c                 H    t          |                               dd           S )Nr   )_ThinkParserr   r
  s    rK   r  r    s    \$//33JEE rM   c                 H    t          |                               dd           S r  r  r
  s    rK   r  r    s    {40044VTBB rM   c                 H    t          |                               dd           S r  r  r
  s    rK   r  r    s    D 1 1 5 5fd C C rM   c                 H    t          |                               dd           S )Nr|   )_ResponsesInputImageParserr   r
  s    rK   r  r    s      :4 @ @ D D[RV W W rM   c                 p    t          |                               di                               dd           S )Nr|   rY   )_ImageParserr   r
  s    rK   r  r    /    l40044["EEII%QUVV rM   c                 H    t          |                               dd           S )Nrf   )_ImageEmbedsParserr   r
  s    rK   r  r        !3D!9!9!=!=nd!S!S rM   c                 H    t          |                               dd           S )Nrm   )_AudioEmbedsParserr   r
  s    rK   r  r    r  rM   c                 H    t          |                               dd           S )Nru   )_PILImageParserr   r
  s    rK   r  r    s    od3377TJJ rM   c                 p    t          |                               di                               dd           S )Nra   rY   )_AudioParserr   r
  s    rK   r  r    r  rM   c                 H    t          |                               dd           S )Nr}  )_InputAudioParserr   r
  s    rK   r  r    s     1$ 7 7 ; ;M4 P P rM   c                 H    t          |                               dd           S )Nrefusal)_RefusalParserr   r
  s    rK   r  r    s    N40044YEE rM   c                 p    t          |                               di                               dd           S )Nrr   rY   )_VideoParserr   r
  s    rK   r  r    r  rM   )r  r   
input_textoutput_textinput_imager|   rf   rm   ru   ra   r}  r"  rr   MM_PARSER_MAPr  c                    t          | t                    sJ |                     dd          }|                     dd          }t          |t                    r^|t          v rU|St	          |         |           }|dk    r4|                     dd          dk    rt
                              d           ||fS ||d| v rZt          t          |           }|                    dd          }t          |t                    r|                    dd          }d|fS d	| v r/t          t          |           }|                    d	d          }d	|fS d
| v r/t          t          |           }|                    d
d          }d
|fS d| v r/t          t          |           }|                    dd          }	d|	fS d| v rZt          t          |           }|                    dd          }
t          |
t                    r|
                    dd          }
d|
fS |                     d          +t          t          t          t          f         |           }d|fS d| v rZt          t          |           }|                    dd          }t          |t                    r|                    dd          }d|fS t          d          t          |t                    st          d          |dfS )a  
    Parses a given multi-modal content part based on its type.

    Args:
        part: A dict containing the content part, with a potential 'type' field.

    Returns:
        A tuple (part_type, content) where:
        - part_type: Type of the part (e.g., 'text', 'image_url').
        - content: Parsed content (e.g., text, image URL).

    Raises:
        ValueError: If the 'type' field is missing and no direct URL is found.
    rb   Nrg   r|   detailr   zB'image_url.detail' is currently not supported and will be ignored.rY   ru   rf   rm   ra   r}  rr   z(Missing 'type' field in multimodal part.z(Invalid 'type' field in multimodal part.zunknown part_type content)r   ri   r   r[   r)  r   r   r   r{   ry   re   rl   r   r   r   )r  	part_typerg   r   image_paramsr|   ru   rf   audio_paramsrm   ra   input_audio_paramsvideo_paramsrr   s                 rK   #_parse_chat_message_content_mm_partr1    s9   " d     &&I88FD!!D)S!! 
"i=&@&@T\	*400 ##6(B(Bf(L(LNNT   '!! D,$ KTRRL$((d;;I)T** 7 &MM%66		))$8$ L %((d;;I	))T!!94 L (++NDAAL!<//T!!94 L (++NDAAL!<//$;T L %((d;;I)T** 7 &MM%66		))88M"".!%d38nd!;!; "444$ KTRRL$((d;;I)T** 7 &MM%66		))CDDDi%% ECDDD111rM   )r  r"  r   parts
mm_tracker
wrap_dictsc                   t          t                               }|                                }|D ],}t          ||||          }|r|                    |           -|rt          | |          gS t          t           t                   |          }	|                                }
|
rt          |
|	|          }nd
                    |	          }t          | |          gS )Nr4  r  )r   r   r  )r   r  r(   _parse_chat_message_content_partr   r   r   r[   rr  r  r   )r   r2  r3  r4  r  r   	mm_parserr  	parse_resr  rr  r  s               rK   !_parse_chat_message_content_partsr:  a  s     < ""G((**I & &4!1	
 
 
	  	&NN9%%% A#w???@@cG$$E&==?? '6"E+=
 
 ii&&T;???@@rM   r8  c                   t          | t                    r| S t          |           \  }}|t          v r |t                              d| |           dS |dv rt          t          |          }|rd|dS |S |                     dd          }|t          |          }d}|dk    r8|t          t          j        |          nd}	|	                    |	|           d}n|d	v r/t          t          |          }|
                    ||           d}nS|d
k    rM|/t          t          t          t          t          f         z  |          nd}|                    ||           d}n |dk    rL|/t          t          t          t          t          f         z  |          nd}|                    ||           d}n|dk    r.t          t          |          }|                    ||           d}nz|dk    r.t          t          |          }
|                    |
|           d}nF|dk    r.t          t          |          }|                    ||           d}nt%          d|           |rd|in|rt&          |         ndS )a|  Parses a single part of a conversation. If wrap_dicts is True,
    structured dictionary pieces for texts and images will be
    wrapped in dictionaries, i.e., {"type": "text", "text", ...} and
    {"type": "image"}, respectively. Otherwise multimodal data will be
    handled by mm_parser, and texts will be returned as strings to be joined
    with multimodal placeholders.
    NzKSkipping multimodal part '%s' (type: '%s') with empty / unparsable content.)r  r&  r'  r"  r   r  rb   r  rg   ru   rT   )r|   r(  rf   rm   rU   ra   r}  rr   rV   zUnknown part type: rb   )r   r[   r1  PART_TYPES_TO_SKIP_NONE_CONTENTr   r   r   r   r#   ry  rt  ri   rv  r  r|  r    r  r  r   rn  )r  r8  r4  r  r,  r   str_contentrg   r   image_contentdict_contents              rK   r7  r7    s    $ <TBBIw 333/		
 	
 	
 tPPP3(( 	"K888 88FD!!D4yyHK6=6IU['222t!!-666	2	2	23((k4000	n	$	$9@9L$sT#s(^+W555RV$$Wd333	n	$	$9@9L$sT#s(^+W555RV$$Wd333	k	!	!3((k4000	m	#	#J00##L$777	k	!	!3((k4000!"C	"C"CDDD 	S5GQ'11TrM   messagecontent_formatc                    | d         }|                      d          }|                      d          p|                      d          }|g }n't          |t                    rt          d|          g}t	          ||||dk    |          }|D ]}|d	k    rft          |           }	d
|	v r |	d
         t          |	d
                   |d
<   |0t          t          |          |d<   t          t          |          |d<   n$|dk    rt          |           }	d|	v r|	d         |d<   d| v r&t          | d         t                    r| d         |d<   |dk    r|                      dd           |d<   |S )Nr   r   r   r   r  r<  r   r6  	assistantr   toolr   r?   	developerr   )	r   r   r[   r   r:  _AssistantParserr   r   _ToolParser)
rA  r3  rB  r  r   r   r   result
result_msg
parsed_msgs
             rK   _parse_chat_message_contentrL    s    6?Dkk)$$GK((LGKK8K,L,LI	GS	!	! R56PPPQ."h.-  F  = =
;)'22J
 z))j.F.R+/
<0H+I+I
<($*.sI*>*>
;'263 3
./ V^^$W--J++-7-G
>*WGFOS!A!A!(Jv;")++gt"<"<JwMrM   messagesc                    | D ]}|d         dk    rd|v r|                     d          }t          |t                    s=t          |          dk    r|                    dd            g|D ]d}|d                              d          x}r:t          |t
          t          f          st          j        |          |d         d<   Yi |d         d<   ed S )Nr   rD  r   r   function	arguments)r   r   r   r   r  ri   jsonloads)rM  rA  r   r   r   s        rK   _postprocess_messagesrS    s      7 76?k))lg.E.E \22Jj$// :!##L$///" 7 7":.22;???7 7%gd|<< L8<
78K8KZ(546D$[11#7 7rM   rw   c                    g }t          |          }| D ]C}t          ||||dk    o|j        d uo|j        j                  }|                    |           Dt          |           |                                \  }}|||fS Nr   )r  )rX  rL  r  interleave_mm_stringsr  rS  r[  	rM  rw   rB  conversationr3  r  sub_messagesr   r   s	            rK   parse_chat_messagesrZ  (  s     /1L&|44J * *2(* I 2$>I 2H	
 	
 	
 	L)))),'''"0022GX(**rM   c                   K   g }t          |          }| D ]C}t          ||||dk    o|j        d uo|j        j                  }|                    |           Dt          |           |                                 d {V \  }}|||fS rU  )r`  rL  r  rV  r  rS  r[  rW  s	            rK   parse_chat_messages_asyncr\  I  s       /1L+L99J * *2(* I 2$>I 2H	
 	
 	
 	L)))),'''(6688888888GX(**rM   rX  c                     d}| D ]F}|d         dk    r8|                     d          }||t          t          |                    ndz  }G|S )Nr   r   rD  r   )r   r   r   )rX  rD  r  r   s       rK   get_history_tool_calls_cntr^  j  sb    
C J Jv;+%%..JJ,B3tJ''(((ICJrM   randomid_typec                 @    | dk    rd| d| S dt                       S )Nkimi_k2z
functions.:zchatcmpl-tool-r9   )r`  	func_namerD  s      rK   make_tool_call_idre  s  s7    )-I----- 0///rM   )r_  NN)rc  rQ  rF   abcr   r   collectionsr   r   collections.abcr   r   r	   	functoolsr
   r   r   	itertoolsr   pathlibr   typingr   r   r   r   r   r   r   openai.types.chatr   r   r   r   r   r   r   r   r   $OpenAIChatCompletionContentPartParamr    OpenAIChatCompletionMessageParam@openai.types.chat.chat_completion_content_part_input_audio_paramr    openai.types.responsesr!   openai_harmonyr"   OpenAIHarmonyMessagePILr#   pydanticr$   r%   r&   typing_extensionsr'   r(   vllmr)   vllm.configr*   vllm.loggerr+   vllm.model_executor.modelsr,   vllm.multimodalr-   r.   r/   vllm.multimodal.inputsr0   r1   r2   r3   r4   r5   vllm.multimodal.processingr6   vllm.multimodal.utilsr7   r8   
vllm.utilsr:   vllm.utils.collection_utilsr;   vllm.utils.import_utilsr<   r>   r=   globalsrJ   r   r[   rL   r   rO   rn  rX   r`   re   rl   ro   rq   rt   ry   r{   r   r   r   r\   r   r   ChatTemplateContentFormatOptionChatTemplateContentFormatr,  r   r   r   r   r   ri   r   r   r   r   r   rJ  objectrV  rX  r`  r%  r]  rh  r  r   r  r  r  r  r  r	  r  r  r   r#  r  r  validate_pythonr  r  r%  r  r  r)  r1  r=  r:  r7  rG  rH  rL  rS  rZ  r\  r^  re  rS   rM   rK   <module>r     s      # # # # # # # # , , , , , , , , 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9                   Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	           X W W W W W : : : : : : : : : : : :       7 7 7 7 7 7 7 7 7 7 2 1 1 1 1 1 1 1       # # # # # # # # # # # # 9 9 9 9 9 9 W W W W W W W W W W                ? > > > > > J J J J J J J J " " " " " " 2 2 2 2 2 2 . . . . . . 4LLL:nggiiHHLJw		733E	X		Kc K K K K"    *          y    ( ( ( ( ()5 ( ( ( (    	         	         y    ( ( ( ( ()5 ( ( ( (< < < < <y < < <    y    "    )5    $	 	 	 	 	)5 	 	 	 	    )5    "    	    , ))*./ ** ,	,
 // 22 00 00 2	2 2
2 

 (( 	   "( ( ( ( (y ( ( ( (< %&' I   ( ( ( ( ()5 ( ( ( (6 #**D"E  $$67  M WT]]	 	 	 	 	3 	 	 	.%,.). . . .>0T#~-./0)0 0 0 0f00S	0 *0 0 0 0,! !! ! ! ! !H!!	#Y! ! ! !HW" W" W" W" W"WR[ W" W" W"thCeFC$J,>&?!@@Ah)h "&c49n!5h 112	h h h hV- - - - -5eFC$J<N6OP - - -2 2 2 2 2ifcDj.@(ABC2 2 2,:" :" :" :" :"# :" :" :"zm4 m4 m4 m4 m49 m4 m4 m4`R4 R4 R4 R4 R4#> R4 R4 R4jT$*t*; T T T TD -C -C -C#:$-C -C 	4Z	-C -C -C -C` 'Y':;;  L L L#:$L L 	4Z	L L L Lc4i15c   0;c4i0;90; 0; 		0; 0; 0; 0;h gd>??WT#LMM WT#LMM GD"JKK DEE'$ HIIwt>??{>??O{>??O{>??O([)@AAQ S#X.;hFi F F F =<EEBBCCWWVVSSSSJJVVPPEEVV  t,-|;<>   (]2
(]2
3]2 ]2 ]2 ]2@# "A
"A23"A *"A
 "A "A 

"A "A "A "AJN
(N*N 	N
 N DN N N Nd 74!DEE gd:;;/'/)/ ./ 	/
 

/ / / /d7D)<$= 7$ 7 7 7 74+-.++ .+ 		+ + + +B+-.++ .+ 		+ + + +BT2E-F    0 0s 0 0 0 0 0 0rM   