
    .`i                        d dl mZmZmZ d dlmZ d dlZd dlmZ d dl	m
Z
mZ d dlmZ d dlmZmZmZmZmZmZ d dlmZ d dlmZmZmZmZ d d	lmZ d d
lmZ d dl m!Z!m"Z" d dl#m$Z$m%Z% d dl&m'Z'm(Z(m)Z)m*Z*m+Z+  ede          Z, G d de(e                   Z-ddde,de'e,         dedz  fdZ. ej/        e.ee           G d de                      Z0dS )    )IterableMappingSequence)TypeVarN)BatchFeaturePixtralVisionConfig)
VllmConfig)Mistral3DummyInputsBuilder Mistral3ForConditionalGenerationMistral3MultiModalProjectorMistral3ProcessingInfo_build_mistral3_infoinit_vision_tower_for_llava)PixtralHFEncoderInfo)AutoWeightsLoaderWeightsMapperinit_vllm_registered_modelmaybe_prefix)MULTIMODAL_REGISTRY)BaseMultiModalProcessorCache)MultiModalFieldConfigMultiModalKwargsItems)ImageProcessorItemsMultiModalDataItems)BaseDummyInputsBuilderBaseMultiModalProcessorPromptReplacementPromptUpdatePromptUpdateDetails_I)boundc            
            e Zd Zdedeeef         deeef         deeef         def
 fdZdedeeef         deeef         fd	Z	d
e
deeef         dedee         fdZ xZS )LightOnOCRMultiModalProcessorpromptmm_data	mm_kwargs
tok_kwargsreturnc                    t                                          ||||          }|                    d          }|| j                                        }| j                                        }|                                }	|	                    |j                  }
|	                    |j                  }t          j
        |t          j        |
|g                     }||                             d          |d<   d|v r$|d         |                             d          |d<   |                    d          }|G|d         }t          |          t          |          k    sJ d t          ||          D             |d<   |S )N)r$   r%   r&   r'   	input_idsr   attention_maskpixel_valuesimage_sizesc                 <    g | ]\  }\  }}|d d d |d |f         S )N ).0phws       y/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/lightonocr.py
<listcomp>zDLightOnOCRMultiModalProcessor._call_hf_processor.<locals>.<listcomp>V   sC     1 1 1!*FQ!!!RaR!)1 1 1    )super_call_hf_processorgetinfoget_hf_processorget_tokenizer	get_vocabimage_break_tokenimage_end_tokentorchisintensor	unsqueezelenzip)selfr$   r%   r&   r'   processed_outputsr*   	processor	tokenizervocabbreak_idend_id	keep_maskr,   r-   	__class__s                  r4   r8   z0LightOnOCRMultiModalProcessor._call_hf_processor-   s    "GG66!	 7 
 
 &))+66	 	2244I	//11I''))Eyy!<==HYYy899F h/00  I
 .7y-A-K-KA-N-Nk*#4446G$77&Yq\\ ""23
 ),,^<<#+M:K|$$K(8(888881 1.1,.L.L1 1 1n- ! r6   	hf_inputshf_processor_mm_kwargsc                 l    t          t          j        d          t          j        d                    S )Nimage)r,   image_embeds)dictr   batched)rF   rO   rP   s      r4   _get_mm_fields_configz3LightOnOCRMultiModalProcessor._get_mm_fields_config\   s7    
 .6w??.6w??
 
 
 	
r6   mm_itemsout_mm_kwargsc                     | j                                         }|j        t          |j        t
                    sJ t          |          dt          ffd}t          dg|          gS )Nitem_idxc                                          dt                    }|                    |           }                    |j        |j                  \  }}g||z  z  }t          j        |          S )NrR   )image_widthimage_height)	get_itemsr   get_image_sizeget_patch_grid_sizewidthheightr   select_token_id)	rZ   imagessizencolsnrowstokensencoder_infoimage_token_idrW   s	         r4   replacezBLightOnOCRMultiModalProcessor._get_prompt_updates.<locals>.replacer   sy    ''1DEEF((22D';; JT[ <  LE5 %%7F&6v~NNNr6   rR   )modalitytargetreplacement)	r:   get_hf_configimage_token_index
isinstancevision_configr   r   intr   )rF   rW   rP   rX   	hf_configrk   ri   rj   s    `    @@r4   _get_prompt_updatesz1LightOnOCRMultiModalProcessor._get_prompt_updatesf   s     I++--	"4)13FGGGGG+I66	Oc 	O 	O 	O 	O 	O 	O 	O 	O  .)9w  
 	
r6   )__name__
__module____qualname__strr   objectr   r8   r   rV   r   r   r   r   ru   __classcell__)rN   s   @r4   r#   r#   ,   s       -!-! f%-! 3;'	-!
 CK(-! 
-! -! -! -! -! -!^

 !(V 4
 
++	,	
 
 
 

%
 !(V 4
 -	

 
,	
 
 
 
 
 
 
 
r6   r#   cacher:   dummy_inputsr}   c                T    t          | t                    sJ t          | ||          S )Nr|   )rq   r   r#   )r:   r~   r}   s      r4   _build_LightOnOCR_processorr      s0     d233333(|5IIIIr6   )r:   r~   c                       e Zd Z eddddd          Zddd	ed
eddfdZdee	ee
j        f                  dee         fdZdS )"LightOnOCRForConditionalGenerationzvision_tower.zmulti_modal_projector.zlanguage_model.lm_head.zlanguage_model.model.)zmodel.vision_encoder.zmodel.vision_projection.zlm_head.zmodel.language_model.)orig_to_new_prefix )prefixvllm_configr   r(   Nc                   t           j                            |            |j        j        }|j        }|j        j        }|| _        || _        t          ||dt          |d                    | _
        t          |j        j        |j        j        |j        |j        |j        j        |j        |t          |d                    | _        t)          ||j        t          |d                    | _        | j        j        | _        d S )NFvision_tower)quant_configrequire_post_normr   multi_modal_projector)vision_hidden_sizetext_hidden_sizeprojector_hidden_actspatial_merge_size
patch_sizemultimodal_projector_biasr   r   language_model)r   rt   r   )nnModule__init__model_configrt   r   multimodal_configconfigr   r   r   r   rr   hidden_sizetext_configr   r   r   r   r   r   r   make_empty_intermediate_tensors)rF   r   r   r   r   r   s         r4   r   z+LightOnOCRForConditionalGeneration.__init__   s   
	4   )3"/'4F!27%#77	
 
 
 &A%3?#/;!'!<%8+6&,&F%(?@@	&
 	&
 	&
" 9#((899
 
 
 ? 	,,,r6   weightsc                 X    t          |           }|                    || j                  S )N)mapper)r   load_weightshf_to_vllm_mapper)rF   r   loaders      r4   r   z/LightOnOCRForConditionalGeneration.load_weights   s+    "4((""743I"JJJr6   )rv   rw   rx   r   r   r	   ry   r   r   tupler@   Tensorsetr   r/   r6   r4   r   r      s         &%4(@1%<	
 
   BD $
 $
 $
z $
3 $
 $
 $
 $
 $
LKHU33D-E$F K3s8 K K K K K Kr6   r   )1collections.abcr   r   r   typingr   r@   torch.nnr   transformersr   r   vllm.configr	   #vllm.model_executor.models.mistral3r
   r   r   r   r   r   "vllm.model_executor.models.pixtralr    vllm.model_executor.models.utilsr   r   r   r   vllm.multimodalr   vllm.multimodal.cacher   vllm.multimodal.inputsr   r   vllm.multimodal.parser   r   vllm.multimodal.processingr   r   r   r   r   r    r#   r   register_processorr   r/   r6   r4   <module>r      s   8 7 7 7 7 7 7 7 7 7                    
 # " " " " "                D C C C C C            0 / / / / / > > > > > > O O O O O O O O J J J J J J J J              WT/000T
 T
 T
 T
 T
$;<R$S T
 T
 T
v 26	J J J
J(,J ($.	J J J J ('	+  
2K 2K 2K 2K 2K)I 2K 2K 
2K 2K 2Kr6   