
    .`i`                        d Z ddlmZmZ ddlmZ ddlmZmZ ddl	Z	ddl
mZ ddlmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZmZm Z  ddl!m"Z" ddl#m$Z$m%Z%m&Z& ddl'm(Z(m)Z) ddl*m+Z+m,Z,m-Z-m.Z. ddl/m0Z0 ddl1m2Z2 ddl3m4Z4m5Z5 ddl6m7Z7m8Z8m9Z9 dZ:dZ;g dZ<dddddZ=ddd d dZ> G d! d"e4          Z? G d# d$e4          Z@ G d% d&e	j        jA                  ZB G d' d(e-          ZC G d) d*e+eC                   ZD G d+ d,e,eC                   ZE e"jF        eEeCeD-           G d. d/ejA        e8e9                      ZGdS )0zPyTorch Ovis model.    )IterableMapping)partial)	AnnotatedLiteralN)BaseImageProcessorBatchFeaturePretrainedConfig)
VllmConfig)BaseDummyOptions)ReplicatedLinear)QuantizationConfig)VisualEmbedding)Siglip2NavitModel)AutoWeightsLoader
flatten_bninit_vllm_registered_modelmaybe_prefix)MULTIMODAL_REGISTRY)MultiModalDataDictMultiModalFieldConfigMultiModalKwargsItems)	ImageSizeMultiModalDataItems)BaseDummyInputsBuilderBaseMultiModalProcessorBaseProcessingInfoPromptReplacement)IntermediateTensors)Ovis2_5Processor)TensorSchemaTensorShape   )MultiModalEmbeddingsSupportsMultiModal
SupportsPPz<image>z<video>)iiiiz	<unused0>z<|reserved_special_token_0|>z<|image_pad|>)gemma2llamaqwen2qwen3   i igP c                      e Zd ZU dZed         ed<   eej         e	dd          f         ed<   eej         e	d          f         ed<   ee
e          e	d	          f         ed
<   eej         e	d	d          f         ed<   dS )Ovis2_5ImagePatchInputsa  
    Dimensions:
        - bnp: Batch size * number of images * number of patches
        - patch_size: patch_size_x * patch_size_y * num_channels
        - patch_indicators: Batch size * (number of patches + 1)
        - bn: Batch size * number of images
    image_patchestypebnp
patch_size	flat_datapatch_indicatorsindicator_tokensbnpatches_per_item   gridsN__name__
__module____qualname____doc__r   __annotations__r   torchTensorr"   listint     v/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/ovis2_5.pyr-   r-   >              /
""""{{5,'G'GGHHHHkk:L.M.M MNNNNS	;;t+<+< <====U\;;tQ#7#77888888rD   r-   c                      e Zd ZU dZed         ed<   eej         e	dd          f         ed<   eej         e	d          f         ed<   ee
e          e	d	          f         ed
<   eej         e	d	d          f         ed<   dS )Ovis2_5VideoPatchInputsa  
    Dimensions:
        - bnp: Batch size * number of videos * number of patches
        - patch_size: patch_size_x * patch_size_y * num_channels
        - patch_indicators: Batch size * (number of patches + 1)
        - bn: Batch size * number of videos
    video_patchesr/   r0   r1   r2   r3   r4   r5   r6   r7   r8   Nr9   rC   rD   rE   rH   rH   O   rF   rD   rH   c            	       4    e Zd ZdZ	 	 ddedededz  def fdZ	 	 ddededz  defd	Z	e
d
ej        fd            Ze
d
ej        fd            Zdej        d
ej        fdZdej        dej        d
ej        fdZdej        dej        d
ej        fdZ xZS )VisualTokenizerz
    VIT
    N configvisual_vocab_sizequant_configprefixc                    t                                                       || _        |                     ||| d          | _        |t          t                    z
  }t          j        	                    t          | j        j        | j        j        dz  z  |dd          t          j                            |                    | _        d S )Nz.vitrM   rO   rP      F)biasreturn_bias)super__init__rM   _init_backbonevitlenINDICATOR_IDSr?   nn
Sequentialr   hidden_sizehidden_stride	LayerNormhead)selfrM   rN   rO   rP   head_dim	__class__s         rE   rW   zVisualTokenizer.__init__e   s     	&&%??? ' 
 
 %s='9'99H'''$+*CQ*FF!	   Hx((
 
			rD   c                 d    |j         }|dk    rt          |||          S t          d|           )Nsiglip2_navitrR   z)Unsupported visual tokenizer model_type: )
model_typer   
ValueError)rb   rM   rO   rP   rg   s        rE   rX   zVisualTokenizer._init_backbone   sP     &
(($)   
 QZQQRRRrD   returnc                 X    t          | j                                                  j        S N)nextra   
parametersdtyperb   s    rE   rn   zVisualTokenizer.dtype   s!    DI((**++11rD   c                 X    t          | j                                                  j        S rk   )rl   ra   rm   devicero   s    rE   rq   zVisualTokenizer.device   s!    DI((**++22rD   logitsc                 x    t          j        |dt           j                                      |j                  }|S )N)dimrn   )r?   softmaxfloat32torn   )rb   rr   tokenss      rE   tokenizezVisualTokenizer.tokenize   s/    v2U]CCCFFv|TTrD   pixel_values	grid_thwsc                     |                      ||          }|j        \  }}|                    || j        j        dz  z  d          }|S )NrS   rt   )rY   shapereshaperM   r_   )rb   r{   r|   featuresseq_len_s         rE   encodezVisualTokenizer.encode   sK     88L)44^
##G0I10L$MrRRrD   c                     |                      ||          }|                     |          }|                     |          }t          j        j                            |dt          t                    fdd          }|S )Nr   constant)modevalue)	r   ra   rz   r?   r\   
functionalpadrZ   r[   )rb   r{   r|   r   rr   ry   s         rE   forwardzVisualTokenizer.forward   sz     ;;|Y778$$v&& $((M""#	 ) 
 
 rD   )NrL   )r:   r;   r<   r=   r
   rB   r   strrW   rX   propertyr?   rn   rq   r@   rz   r   r   __classcell__rd   s   @rE   rK   rK   `   s         37
 
 
 
 )4/	

 
 
 
 
 
 
: 37	S S S )4/S 	S S S S 2u{ 2 2 2 X2 3 3 3 3 X3u|     !L5:\	   !L5:\	       rD   rK   c                      e Zd Zd Zd ZdefdZdefdZde	ee
dz  f         fdZdefdZd	d
de
de
de
deee
f         fdZde
fdZde
de
fdZde
de	ee
f         de
fdZde
de
de
dedz  de
f
dZde
de	ee
f         de
fdZdS )Ovis2_5ProcessingInfoc                 4    | j                                         S rk   )ctxget_hf_configro   s    rE   r   z#Ovis2_5ProcessingInfo.get_hf_config   s    x%%'''rD   c                     |                                  j        }| j                            t          |                                 |j        |j        |j                  S )N)image_pad_tokenr1   r_   temporal_patch_size)	r   
vit_configr   get_hf_processorr    get_image_pad_tokenr1   r_   r   )rb   kwargsr   s      rE   r   z&Ovis2_5ProcessingInfo.get_hf_processor   sY    ''))4
x(( 4466!,$2 * > ) 
 
 	
rD   ri   c                     |                                                                  }|j        }t                              |          S rk   )r   get_text_configrg   IMAGE_PAD_TOKEN_MAPget)rb   hf_text_configtext_model_types      rE   r   z)Ovis2_5ProcessingInfo.get_image_pad_token   s;    ++--==??(3"&&777rD   c                 4    |                                  j        S rk   )r   image_processorro   s    rE   get_image_processorz)Ovis2_5ProcessingInfo.get_image_processor   s    $$&&66rD   Nc                     d ddS )Nr#   imagevideorC   ro   s    rE   get_supported_mm_limitsz-Ovis2_5ProcessingInfo.get_supported_mm_limits   s    ***rD   c                 $    t          dd          S )Ni   )widthheight)r   ro   s    rE   !get_image_size_with_most_featuresz7Ovis2_5ProcessingInfo.get_image_size_with_most_features   s     tD1111rD   r#   )
num_framesimage_widthimage_heightr   c                    |                                  }|j        }|j        }|j        }|| |z  z   }t	          ||z  d          }	||z  }
||z  }|	|
z  |z  }|}|S )Nr#   )r   r   r1   r   max)rb   r   r   r   	hf_configr   r1   r   padded_num_framesgrid_tgrid_hgrid_wnum_patchesnum_vision_tokenss                 rE   get_num_image_tokensz*Ovis2_5ProcessingInfo.get_num_image_tokens   s     &&((	)
*
(< ':+8K*KL&*==qAA+
*vo.'  rD   c                 ^    |                                  \  }}|                     ||          S )N)r   r   )r   r   )rb   target_widthtarget_heights      rE   get_max_image_tokensz*Ovis2_5ProcessingInfo.get_max_image_tokens   s:    &*&L&L&N&N#m(($= ) 
 
 	
rD   
max_tokensc                     |                                  \  }}d}	 |dz   }|                     |||d           }||k    rn|}(|S )Nr   Tr#   r   r   r   r   )r   get_num_video_tokens)rb   r   r   r   r   next_num_framesnext_max_tokenss          rE   _get_max_video_framesz+Ovis2_5ProcessingInfo._get_max_video_frames   ss    &*&L&L&N&N#m

	)(1nO"77(** $	 8  O ++(J
	) rD   r   	mm_countsc                     |                     dd          }|                     dd          }|                                 |z  }|                     ||z
            }|t          |d          z  }t          |d          S )Nr   r   r   r#   )r   r   r   r   )rb   r   r   
max_images
max_videosmax_image_tokensmax_total_framesmax_frames_per_videos           rE   !get_num_frames_with_most_featuresz7Ovis2_5ProcessingInfo.get_num_frames_with_most_features   s    
 ]]7A..
]]7A..
4466C55g@P6PQQ/3z13E3EE'+++rD   r   c                6    |                      |||          }|S )N)r   r   r   )r   )rb   r   r   r   r   num_video_tokenss         rE   r   z*Ovis2_5ProcessingInfo.get_num_video_tokens  s.      44#,: 5 
 
  rD   c                     |                                  \  }}|                     |||                     ||          d           S )Nr   )r   r   r   )rb   r   r   r   r   s        rE   get_max_video_tokensz*Ovis2_5ProcessingInfo.get_max_video_tokens  sS    
 '+&L&L&N&N#m(($&==gyQQ 	 ) 
 
 	
rD   )r:   r;   r<   r   r   r   r   r   r   r   rB   r   r   r   tupler   r   r   r   r   r   rC   rD   rE   r   r      s       ( ( (
 
 
8S 8 8 8 8
7%7 7 7 7 7+cDj)A + + + +29 2 2 2 2 ! ! ! ! 	!
 ! 
y#~	! ! ! !*
c 
 
 
 
      
,
, 38$
, 
	
, 
, 
, 
,    	 
   ,d2  
       

 38$
 
	
 
 
 
 
 
rD   r   c            	       p    e Zd Zdeeef         defdZ	 ddedeeef         deeef         dz  defdZ	dS )	Ovis2_5DummyInputsBuilderr   ri   c                     |                     dd          }|                     dd          }t          |z  t          |z  z   S )Nr   r   r   )r   IMAGE_TOKENVIDEO_TOKEN)rb   r   
num_images
num_videoss       rE   get_dummy_textz(Ovis2_5DummyInputsBuilder.get_dummy_text'  s=    ]]7A..
]]7A..
Z'+
*BBBrD   Nr   
mm_optionsc                    |                     dd          }|                     dd          }| j                                        \  }}| j                            ||          }|r|                     d          nd }	|r|                     d          nd }
|                     ||||	          |                     |||||
          d}|S )Nr   r   r   )r   r   r   	overrides)r   r   r   r   r   r   )r   infor   r   _get_dummy_images_get_dummy_videos)rb   r   r   r   r   r   r   r   target_num_framesimage_overridesvideo_overridesmm_datas               rE   get_dummy_mm_dataz+Ovis2_5DummyInputsBuilder.get_dummy_mm_data,  s     ]]7A..
]]7A..
&*i&Q&Q&S&S#m IGGY
 
 6@I*..111T5?I*..111T ++"$%)	 ,   ++"$,%) ,  
 
 rD   rk   )
r:   r;   r<   r   r   rB   r   r   r   r   rC   rD   rE   r   r   &  s        CS(9 Cc C C C C =A	     38$  C!112T9	 
 
           rD   r   c            
           e Zd Zdee         dee         fdZdedeeef         deeef         deeef         de	f
 fdZ
d	ee         dee         fd
Zde	deeef         deeef         fdZdedeeef         dedee         fdZ xZS )Ovis2_5MultiModalProcessorvisual_indicatorsri   c                 `    | j                                         }|j        fd|D             S )z|
        Filter image indicators placeholders and convert them to corresponding
        tokens in visual tokenizer.
        c                 v    g | ]5}|d k     t          t                    z
  t          |dz             z   dz
  6S )ii,  r#   )rZ   r[   abs).0xvte_vocab_sizes     rE   
<listcomp>zQOvis2_5MultiModalProcessor.visual_indicators_to_visual_tokens.<locals>.<listcomp>Z  sK     
 
 
4xx S///#a#g,,>BxxrD   )r   r   rN   )rb   r   r   r   s      @rE   "visual_indicators_to_visual_tokensz=Ovis2_5MultiModalProcessor.visual_indicators_to_visual_tokensP  sK     I++--	"4
 
 
 
&
 
 
 	
rD   promptr   	mm_kwargs
tok_kwargsc                     
 |sP j                                         }|                    |d          }t          t	          |g          d          S t                                          ||||          } j                                         
d|v r9
fd|d	         D             } fd
|D             }	t          j	        |	          |d<   d|v r9
fd|d         D             } fd|D             }	t          j	        |	          |d<   |S )NF)add_special_tokens)	input_idspt)tensor_type)r   r   r   r   videosc                 <    g | ]}                     d d          S )r#   r#   r#   Tconstruct_visual_indicatorsr   gridhf_processors     rE   r   zAOvis2_5MultiModalProcessor._call_hf_processor.<locals>.<listcomp>v  s9     ! ! ! 88DII! ! !rD   video_gridsc                 :    g | ]}                     |          S rC   r   r   	indicatorrb   s     rE   r   zAOvis2_5MultiModalProcessor._call_hf_processor.<locals>.<listcomp>z  7           77	BB     rD   video_indicator_tokensimagesc                 <    g | ]}                     d d          S )r   Fr   r   s     rE   r   zAOvis2_5MultiModalProcessor._call_hf_processor.<locals>.<listcomp>  s9     ! ! ! 88EJJ! ! !rD   r8   c                 :    g | ]}                     |          S rC   r  r  s     rE   r   zAOvis2_5MultiModalProcessor._call_hf_processor.<locals>.<listcomp>  r  rD   r4   )
r   get_tokenizerr   r	   dictrV   _call_hf_processorr   r?   tensor)rb   r   r   r   r   	tokenizer
prompt_idsprocessed_outputsr   r4   r   rd   s   `         @rE   r  z-Ovis2_5MultiModalProcessor._call_hf_processor`  s     	P	//11I"))&U)KKJ
| < < <$OOOO!GG66!	 7 
 
 y1133w! ! ! !-m<! ! !       !2      ;@,GW:X:X67w! ! ! !-g6! ! !       !2     
 5:LAQ4R4R01  rD   prompt_tokensc                     |S rk   rC   )rb   r  s     rE   _apply_hf_processor_tokens_onlyz:Ovis2_5MultiModalProcessor._apply_hf_processor_tokens_only  s
     rD   	hf_inputshf_processor_mm_kwargsc           
         t          t          j        d          t          j        d          t          j        d          t          j        d          t          j        d          t          j        d                    S )Nr   r   )r{   r8   r4   video_pixel_valuesr  r   )r  r   batched)rb   r  r  s      rE   _get_mm_fields_configz0Ovis2_5MultiModalProcessor._get_mm_fields_config  so    
 .6w??'/882:7CC4<WEE#8#@#I#I-5g>>
 
 
 	
rD   mm_itemsout_mm_kwargsc                 @     dt           f fdfddD             S )Nmodalityc                     |dk    rd         |          }|d         j         }n!|dk    rd         |          }|d         j         }j                                        }|                    |d                   S )Nr   r8   r   r   r   )datar   r   construct_visual_placeholders)item_idxr  out_itemr   r   r  rb   s        rE   get_replacement_oviszLOvis2_5MultiModalProcessor._get_prompt_updates.<locals>.get_replacement_ovis  s    7""(1(;(-W$$(1(;.395577L==Q  rD   c           
      v    g | ]5}t          ||d k    rt          nt          t          |                    6S )r   )r  )r  targetreplacement)r   r   r   r   )r   r  r#  s     rE   r   zBOvis2_5MultiModalProcessor._get_prompt_updates.<locals>.<listcomp>  s_     
 
 
  !&.'&9&9{{{#$88LLL  
 
 
rD   r   )r   )rb   r  r  r  r#  s   `  `@rE   _get_prompt_updatesz.Ovis2_5MultiModalProcessor._get_prompt_updates  s\    
	S 
	 
	 
	 
	 
	 
	 
	
 
 
 
 /
 
 
 	
rD   )r:   r;   r<   rA   rB   r   r   r   objectr	   r  r  r   r  r   r   r   r'  r   r   s   @rE   r   r   O  sr       
9
 
c
 
 
 
 *!*! f%*! 3;'	*!
 CK(*! 
*! *! *! *! *! *!XCy 
c   

 !(V 4
 
++	,	
 
 
 

%
 !(V 4
 -	

 
	 
 
 
 
 
 
 
 
rD   r   )r   dummy_inputsc                       e Zd Zededededz  fd            Zddded	ef fd
Zde	de
dz  fdZde	dedz  fdZde
ez  defdZde	defdZde	defdZ	 	 ddej        dej        dedz  dej        dz  de	dej        ez  fdZdej        dej        dz  fdZdeeeej        f                  dee         fdZ xZS )Ovis2_5r  iri   Nc                     |                     d          rt          S |                     d          rt          S t          d          )Nr   r   z)Only image or video modality is supported)
startswithr   r   rh   )clsr  r,  s      rE   get_placeholder_strzOvis2_5.get_placeholder_str  sI    w'' 	w'' 	DEEErD   rL   )rP   vllm_configrP   c                   t                                                       |j        j        }|j        }|| _        |                     |          5  t          |                    |j	                  t          |d                    | _        d d d            n# 1 swxY w Y   |                     |ddh          5  t          |j        |j        || d          | _        t#          |j        |j                  | _        d d d            n# 1 swxY w Y   | j                                        j        }t,          |         | _        |                                 j        | _        d S )Nllm)r1  rP   r   r   z.visual_tokenizer)rM   rN   rO   rP   )rV   rW   model_configr   rO   rM   _mark_language_modelr   with_hf_configtext_configr   r3  _mark_tower_modelrK   r   rN   visual_tokenizerr   r^   vter   rg   IMAGE_PAD_TOKEN_ID_MAPimage_pad_token_idget_language_modelmake_empty_intermediate_tensors)rb   r1  rP   rM   rO   r   rd   s         rE   rW   zOvis2_5.__init__  s   )3"/(.&&{33 	 	1'66v7IJJ#FE22  DH	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ##K'71CDD 	U 	U$3("(":) 333	% % %D! 'v'?ASTTDH	U 	U 	U 	U 	U 	U 	U 	U 	U 	U 	U 	U 	U 	U 	U +5577B"8"I ##%%E 	,,,s%   =BB!B=ADDDr   c           
      T    |                     dd           }|                     dd           }|                     dd           }||d S ||t          |t          j        t          f          st          dt          |                     t          |t          j        t          f          st          dt          |                     t          dt          |d           fd	|D             t          |d          t          |d          
          S t          d          )Nr{   r4   r8   *Incorrect type of pixel values. Got type: .Incorrect type of indicator_tokens. Got type: r.   Tconcatc                 T    g | ]$}|j         d          j        j        j        dz  z  %S r   rS   r~   rM   r   r_   r   r   rb   s     rE   r   z;Ovis2_5._parse_and_validate_image_input.<locals>.<listcomp>   A     " " " GAJ4;#9#G#JK" " "rD   r/   r2   r6   r4   r8    This line should be unreachable.)
pop
isinstancer?   r@   rA   rh   r/   r-   r   AssertionErrorrb   r   r{   r4   r8   s   `    rE   _parse_and_validate_image_inputz'Ovis2_5._parse_and_validate_image_input  sj    zz.$77!::&8$??

7D))$4$<4#(8(DlU\4,@AA  UlASASUU   .t0DEE  :!%&6!7!7: :  
 +$$\$???" " " ")" " " ",,<T!J!J!J t444	 	 	 	 ?@@@rD   c           
      T    |                     dd           }|                     dd           }|                     dd           }||d S ||t          |t          j        t          f          st          dt          |                     t          |t          j        t          f          st          dt          |                     t          dt          |d           fd	|D             t          |d          t          |d          
          S t          d          )Nr  r  r   r@  rA  rI   TrB  c                 T    g | ]$}|j         d          j        j        j        dz  z  %S rE  rF  rG  s     rE   r   z;Ovis2_5._parse_and_validate_video_input.<locals>.<listcomp>"  rH  rD   rI  rJ  )
rK  rL  r?   r@   rA   rh   r/   rH   r   rM  rN  s   `    rE   _parse_and_validate_video_inputz'Ovis2_5._parse_and_validate_video_input
  sk    zz"6==!::&>EE

=$//$4$<4#(8(DlU\4,@AA  UlASASUU   .t0DEE  :!%&6!7!7: :  
 +$$\$???" " " ")" " " ",,<T!J!J!J t444	 	 	 	 ?@@@rD   visual_inputc           
      T   |d         }|d         }|d         }|d         }t          t          d |                    }| j        j        }|                     |                    |          |          }|                     |          }	|                     |          }
|	                    |d          }|
                    |          }g }t          ||          D ]\  }}g }|                    d          }t          |j
        d                   D ]>}|                    t          j        |||dz            ||         gd                     ?|                    ||dz   d                     |                    t          j        |d                     t          |          S )	Nr2   r6   r4   r8   c                     | dk    rdn| dz   S )Nr#   rS   rC   )r   s    rE   <lambda>z/Ovis2_5._process_visual_input.<locals>.<lambda>5  s    q1uu!!!a% rD   r   )ru   r#   )rA   mapr9  rn   rx   r:  splitzip	unsqueezeranger~   appendr?   catr   )rb   rS  image_patches_flatpatches_per_imager4   r|   indicator_per_imagetarget_dtypevisual_tokensvisual_embedsindicator_embedsvisual_embeds_per_imageindicator_embeds_per_imagevision_embeddingsr  visualvision_embeddings_per_imager,  s                     rE   _process_visual_inputzOvis2_5._process_visual_input,  s    *+6();<'(:; )	"//1BCC
 
 ,2--!!,//
 
 //88$455"/"5"56GQ"5"O"O%5%;%;<O%P%P"!$&(?"
 "
 
	T 
	TIv +-'%%a((F6<?++  +22IyQU3VAY?QGGG    (..yQ/ABBB$$UY/JPQ%R%R%RSSSS&'''rD   c                 t    i }|D ]2}|dv rd|vr | j         di ||d<   |dv rd|vr | j        di ||d<   3|S )N)r{   r4   r8   r  )r  r  r   r   rC   )rO  rR  )rb   r   
modalities	input_keys       rE   %_parse_and_validate_multimodal_inputsz-Ovis2_5._parse_and_validate_multimodal_inputsQ  s    
   	V 	VIJJJJ..'Kt'K'U'Uf'U'U
8$RS SJ..'Kt'K'U'Uf'U'U
8$rD   c                 
    | j         di |}|sg S d}|D ]l}|dk    r/|d         }|                     |          }|t          |          z  }|dk    r/|d         }|                     |          }|t          |          z  }m|S )NrC   r  r   )rn  rj  r   )	rb   r   rl  multimodal_embeddingsr  image_inputimage_embeddingsvideo_inputvideo_embeddingss	            rE   embed_multimodalzOvis2_5.embed_multimodale  s    ?T?II&II
 	I:< # 	A 	AH8##(2#'#=#=k#J#J %/?)@)@@%8##(2#'#=#=k#J#J %/?)@)@@%$$rD   r   	positionsintermediate_tensorsinputs_embedsc                 @    |d }|                      ||||          }|S )N)r   rv  rw  rx  )r3  )rb   r   rv  rw  rx  r   hidden_statess          rE   r   zOvis2_5.forwardy  s=      + M !5'	 ! 
 
 rD   rz  c                 6    | j                             |          S rk   )r3  compute_logits)rb   rz  s     rE   r|  zOvis2_5.compute_logits  s     x&&}555rD   weightsc                 J    t          |           }|                    |          S rk   )r   load_weights)rb   r}  loaders      rE   r  zOvis2_5.load_weights  s#    "4((""7+++rD   )NN)r:   r;   r<   classmethodr   rB   r0  r   rW   r(  r-   rO  rH   rR  r$   rj  r  rn  ru  r?   r@   r   r   r|  r   r   setr  r   r   s   @rE   r+  r+    sF        F3 F3 F3: F F F [F BD 
 
 
z 
3 
 
 
 
 
 
: A A	 4	' A  A  A  AD A A	 4	' A  A  A  AD#(36MM#(	#( #( #( #(Jf     (% %4H % % % %0 <@-1 < < 2D8	
 |d*  
+	+   *6|6 
	6 6 6 6,HU33D-E$F ,3s8 , , , , , , , ,rD   r+  )Hr=   collections.abcr   r   	functoolsr   typingr   r   r?   torch.nnr\   transformersr   r	   r
   vllm.configr   vllm.config.multimodalr   !vllm.model_executor.layers.linearr   'vllm.model_executor.layers.quantizationr   vllm.model_executor.models.ovisr   'vllm.model_executor.models.siglip2navitr    vllm.model_executor.models.utilsr   r   r   r   vllm.multimodalr   vllm.multimodal.inputsr   r   r   vllm.multimodal.parser   r   vllm.multimodal.processingr   r   r   r   vllm.sequencer   *vllm.transformers_utils.processors.ovis2_5r    vllm.utils.tensor_schemar!   r"   
interfacesr$   r%   r&   r   r   r[   r   r;  r-   rH   ModulerK   r   r   r   register_processorr+  rC   rD   rE   <module>r     s     - - - - - - - -       % % % % % % % %        K K K K K K K K K K " " " " " " 3 3 3 3 3 3 > > > > > > F F F F F F ; ; ; ; ; ; E E E E E E            0 / / / / /         
 A @ @ @ @ @ @ @            . - - - - - G G G G G G > > > > > > > > L L L L L L L L L L((( +	   	  9 9 9 9 9l 9 9 9"9 9 9 9 9l 9 9 9"S S S S Seho S S Slm
 m
 m
 m
 m
. m
 m
 m
`& & & & & 67L M & & &Rj
 j
 j
 j
 j
!89N!O j
 j
 j
Z ('	*  
U, U, U, U, U,bi+Z U, U, 
U, U, U,rD   