
    .`iJ                     F   d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ dd	lmZmZmZ dd
lmZ ddlmZ  G d de          Z G d dee                   Z G d dej                  Z ej        eee           G d de                      ZdS )    )MappingN)GELUActivation)
VllmConfig)BaseDummyOptions)MULTIMODAL_REGISTRY)MultiModalDataDict   )LlavaDummyInputsBuilderLlavaNextMultiModalProcessorLlavaNextProcessingInfo)&LlavaOnevisionForConditionalGeneration)WeightsMapperc                   P    e Zd Zd ZdefdZdededededed	eeef         fd
ZdS )BeeProcessingInfoc                 4    | j                                         S N)ctxget_hf_config)selfs    r/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/bee.pyr   zBeeProcessingInfo.get_hf_config   s    x%%'''    kwargsc                 &     | j         j        di |S )N )r   get_hf_processor)r   r   s     r   r   z"BeeProcessingInfo.get_hf_processor   s    (tx(226222r   original_heightoriginal_widthnpatchesnum_patch_heightnum_patch_widthreturnc                V   ddl }||z  }||z  }||z  }	||z  }
|	|
k    r4t          t          |||z  z  d                    }||z
  dz  }|d|z  z
  }n3t          t          |||z  z  d                    }||z
  dz  }|d|z  z
  }||z  }|}|                                 }t	          |dd          }t          |                    dd                    }|                    ||z  ||dz  z  z            }|d	k    r+t          ||z            }t          ||z            }||z  }|}||fS )
zAOverride to use correct max_num_patches from vision_aspect_ratio.r   N      vision_aspect_ratioanyres_max_9anyres_max_ g?)mathintroundr   getattrreplacesqrt)r   r   r   r   r   r    r)   current_heightcurrent_widthaspect_ratiocurrent_aspect_ratio
new_heightpadding	new_widthunpadded_featuresnewline_features	hf_configr%   max_num_patchesratioheight_factorwidth_factors                         r   _get_num_unpadded_featuresz,BeeProcessingInfo._get_num_unpadded_features   s    	!$44 ?2%7,~=...o)GH!LL J &
2q8G+q7{;NNn(HI1MM I %y0Q6G)Q[9M*]:) &&((	%i1FWW199-LLMM		]*o!.KL
 
 3;;% 788M}566L - <,!#344r   N)	__name__
__module____qualname__r   objectr   r*   tupler=   r   r   r   r   r      s        ( ( (3 3 3 3 305 05 	05
 05 05 05 
sCx05 05 05 05 05 05r   r   c            	       p    e Zd Zdeeef         defdZ	 ddedeeef         deeef         dz  defdZ	dS )	BeeDummyInputsBuilder	mm_countsr!   c                 <    |                     dd          }d}||z  S )Nimager   z<image>)get)r   rE   
num_imagesimage_tokens       r   get_dummy_textz$BeeDummyInputsBuilder.get_dummy_textS   s%    ]]7A..
Z''r   Nseq_len
mm_optionsc                     |                     dd          }| j                                        \  }}|r|                     d          nd }d|                     ||||          iS )NrG   r   )widthheightrI   	overrides)rH   info!get_image_size_with_most_features_get_dummy_images)r   rL   rE   rM   rI   target_widthtarget_heightimage_overridess           r   get_dummy_mm_dataz'BeeDummyInputsBuilder.get_dummy_mm_dataY   s|     ]]7A..
&*i&Q&Q&S&S#m5?I*..111T T++"$%)	 ,  
 	
r   r   )
r>   r?   r@   r   strr*   rK   r   r   rX   r   r   r   rD   rD   R   s        (S(9 (c ( ( ( ( =A	
 

 38$
 C!112T9	

 

 
 
 
 
 
r   rD   c                   B     e Zd Z fdZdej        dej        fdZ xZS )BeeMultiModalProjectorc                    t                                                       t          j        |j        j        d          | _        t          j        |j        j        |j        j        dz  d          | _	        t                      | _        t          j        |j        j        dz  |j        j        d          | _        d S )Ngư>)eps   T)bias)super__init__nn	LayerNormvision_confighidden_sizepre_normLineartext_configlinear_1r   actlinear_2)r   config	__class__s     r   ra   zBeeMultiModalProjector.__init__p   s    V%9%E5QQQ	 ,*Q.
 
 

 "##	*Q.*
 
 
r   image_featurer!   c                     |                      |          }|                     |          }|                     |          }|                     |          }|S r   )rf   ri   rj   rk   )r   rn   hidden_statess      r   forwardzBeeMultiModalProjector.forward   sL    m44m44//m44r   )r>   r?   r@   ra   torchTensorrq   __classcell__rm   s   @r   r[   r[   o   s^        
 
 
 
 
U\ el        r   r[   )rR   dummy_inputsc                   V     e Zd Z edddddd          Zdd	d
ededdf fdZ xZS )BeeForConditionalGenerationzlanguage_model.model.zvision_tower.zmulti_modal_projector.image_newlinezlanguage_model.lm_head.)zmodel.language_model.zmodel.vision_tower.zmodel.multi_modal_projector.zmodel.image_newlinezlm_head.)orig_to_new_prefixr(   )prefixvllm_configr{   r!   Nc                    t                                          ||           |j        j        }t	          |          | _        d S )N)r|   r{   )r`   ra   model_configr8   r[   multi_modal_projector)r   r|   r{   rl   rm   s       r   ra   z$BeeForConditionalGeneration.__init__   sA    [@@@)3%;F%C%C"""r   )	r>   r?   r@   r   hf_to_vllm_mapperr   rY   ra   rt   ru   s   @r   rx   rx      s         & &=#2,D#21
 

 
 
 BD D D Dz D3 D D D D D D D D D D Dr   rx   )collections.abcr   rr   torch.nnrb   transformers.activationsr   vllm.configr   vllm.config.multimodalr   vllm.multimodalr   vllm.multimodal.inputsr   
llava_nextr
   r   r   llava_onevisionr   utilsr   r   rD   Moduler[   register_processorrx   r   r   r   <module>r      s   $ # # # # #        3 3 3 3 3 3 " " " " " " 3 3 3 3 3 3 / / / / / / 5 5 5 5 5 5         
 D C C C C C            75 75 75 75 75/ 75 75 75t
 
 
 
 
34EF 
 
 
:    RY   2 (' 	&  
D D D D D"H D D 
D D Dr   