
    .`i                     Z    d Z ddlmZ ddlmZ  G d de          Z G d de          ZdS )	z
Kimi-K2.5 Model Configuration.

This configuration supports video-chunk as an internal modality type.
A video-chunk is the smallest independently processable unit of video.
    )DeepseekV3Config)PretrainedConfigc            !            e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d"dedededededededededeeef         dededededz  ded ef  fd!Z xZ	S )#KimiK25VisionConfigkimi_k25_vision   @      divided_fixed             r   spatial_temporal	sd2_tpoolpatchmergerNgeluh㈵>
patch_sizeinit_pos_emb_heightinit_pos_emb_widthinit_pos_emb_timepos_emb_typenum_attention_headsnum_hidden_layershidden_sizeintermediate_sizemerge_kernel_sizevideo_attn_type
merge_typemm_projector_typemm_hidden_sizeprojector_hidden_actprojector_ln_epsc                 .    t                      j        di | || _        || _        || _        || _        || _        || _        || _        || _	        |	| _
        |
| _        || _        || _        || _        ||| _        n|| _        || _        || _        d S )N )super__init__r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   )selfr   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   kwargs	__class__s                     |/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/transformers_utils/configs/kimi_k25.pyr*   zKimiK25VisionConfig.__init__   s    , 	""6"""$#6 "4!2(#6 !2&!2!2.$!2%"0D"-D$8! 0    )r   r	   r	   r
   r   r   r   r   r   r   r   r   r   Nr   r   )
__name__
__module____qualname__
model_typeintstrtuplefloatr*   __classcell__r-   s   @r.   r   r      s%       "J
 #%"$!"+#%!#!%-31%!.%)$*"&'+1 +1 +1 !	+1
  +1 +1 +1 !+1 +1 +1 +1 !c?+1 +1 +1  !+1" d
#+1$ "%+1&  '+1 +1 +1 +1 +1 +1 +1 +1 +1 +1r/   r   c                        e Zd ZdZdZ	 	 	 	 	 	 	 dd	eez  dz  d
eez  dz  dededede	de
f fdZedefd            Zedefd            Z xZS )KimiK25Configa*  Kimi-K2.5 model configuration.

    Kimi-K2.5 extends Kimi-K2 with vision support using video-chunks.
    A video-chunk consists of multiple consecutive frames
    that are processed together with temporal pooling.

    Args:
        vision_config: Configuration for the vision tower and projector.
        text_config: Configuration for the text model (DeepseekV3).
        ignore_index: The ignore index for the loss function.
        media_placeholder_token_id: The token ID for media placeholders.
        pad_token_id: The token ID for padding.
    kimi_k25N r   F<|kimi_k25_video_placeholder|>vision_configtext_configignore_indexmedia_placeholder_token_idpad_token_iduse_unified_vision_chunkvideo_placeholderc                    |t                      }n!t          |t                    rt          di |}|| _        |t	                      }n!t          |t                    rt	          di |}|| _        | j        j        | j        j        k    r| j        j        | j        _        || _        || _	        || _
        || _        t          | j        dd           | j        j        | _         t                      j        dd|i| d S )Nquantization_configrD   r(   )r   
isinstancedictr@   r   rA   r$   r   rB   rC   rE   rF   getattrrH   r)   r*   )
r+   r@   rA   rB   rC   rD   rE   rF   r,   r-   s
            r.   r*   zKimiK25Config.__init__P   s"     /11MMt,, 	A/@@-@@M2? *,,KKT** 	:*99[99K-8 ,0B0NNN040@0LD- )*D'(@%!2 4#%:DAAM'+'7'KD$==l=f=====r/   returnc                     | j         j        S )z3Get hidden size from text config for compatibility.)rA   r   r+   s    r.   r   zKimiK25Config.hidden_sizey   s     ++r/   c                     | j         j        S )z2Get vocab size from text config for compatibility.)rA   
vocab_sizerN   s    r.   rP   zKimiK25Config.vocab_size~   s     **r/   )NNr=   r>   r   Fr?   )r0   r1   r2   __doc__r3   rJ   r   r   r4   boolr5   r*   propertyr   rP   r8   r9   s   @r.   r;   r;   ?   s         J <@6: *0).!A'> '>11D8'> ,,t3'> 	'>
 %('> '> #''> '> '> '> '> '> '>R ,S , , , X, +C + + + X+ + + + +r/   r;   N)rQ   transformersr    transformers.configuration_utilsr   r   r;   r(   r/   r.   <module>rV      s     * ) ) ) ) ) = = = = = =.1 .1 .1 .1 .1* .1 .1 .1bB+ B+ B+ B+ B+$ B+ B+ B+ B+ B+r/   