
    .`io                         d dl mZ d dlmZmZ  G d de          Z G d de          Z G d de          Z G d	 d
e          Z ej	        de            ej	        de            G d de          Z
dS )    )Any)
AutoConfigPretrainedConfigc                        e Zd ZU dZdZeed<   	 	 	 	 	 	 	 	 	 	 	 	 ddedededededededededede	de	de
f fdZ xZS )AIMv2Configa  This is the configuration class to store the configuration of an [`AIMv2Model`].
    Instantiating a configuration with the defaults will yield a similar configuration
    to that of the [apple/aimv2-large-patch14-224](https://huggingface.co/apple/aimv2-large-patch14-224).
    Args:
        hidden_size: Dimension of the hidden representations.
        intermediate_size: Dimension of the SwiGLU representations.
        num_hidden_layers: Number of hidden layers in the Transformer.
        num_attention_heads: Number of attention heads for each attention layer
            in the Transformer.
        num_channels: Number of input channels.
        image_size: Image size.
        patch_size: Patch size.
        rms_norm_eps: Epsilon value used for the RMS normalization layer.
        attention_dropout: Dropout ratio for attention probabilities.
        projection_dropout: Dropout ratio for the projection layer after the attention.
        qkv_bias: Whether to add a bias to the queries, keys and values.
        use_bias: Whether to add a bias in the feed-forward and projection layers.
        kwargs: Keyword arguments for the [`PretrainedConfig`].
    aimv2
model_type                     h㈵>        Fhidden_sizeintermediate_sizenum_hidden_layersnum_attention_headsnum_channels
image_size
patch_sizerms_norm_epsattention_dropoutprojection_dropoutqkv_biasuse_biaskwargsc                      t                      j        di | || _        || _        || _        || _        || _        || _        || _        |	| _	        || _
        |
| _        || _        || _        d S )N )super__init__r   r   r   r   r   r   r   r   r   r   r   r   )selfr   r   r   r   r   r   r   r   r   r   r   r   r   	__class__s                 x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/transformers_utils/configs/ovis.pyr#   zAIMv2Config.__init__$   s      	""6"""&!2!2#6 ($$!2("4      )r
   r   r   r   r   r   r   r   r   r   FF)__name__
__module____qualname____doc__r	   str__annotations__intfloatboolr   r#   __classcell__r%   s   @r&   r   r      s         ( J  !%!##$"#&$'! !! ! 	!
 !! ! ! ! ! !! "! ! ! ! ! ! ! ! ! ! ! ! !r'   r   c                   D     e Zd Z	 	 	 	 	 	 	 d
deez  dz  def fd	Z xZS )BaseVisualTokenizerConfig @  softmax      ?NF   backbone_confighidden_stridec                 n    t                      j        di | || _        || _        || _        t          |t                    rd |                    d          D             }|| _        t          t          t          f                     | _        || _        |t          |t          t          f          sJ dt          |           d            t          |t                    sB|d         }	|	dk    r(|                    d           t!          j        |	fi |}nt%          di |}|| _        || _        d S )Nc                 ,    g | ]}t          |          S r!   )r.   ).0xs     r&   
<listcomp>z6BaseVisualTokenizerConfig.__init__.<locals>.<listcomp>X   s    888c!ff888r'   |zMexpect `backbone_config` to be instance of PretrainedConfig or dict, but got  typer	   r   r!   )r"   r#   
vocab_sizetokenize_functiontau
isinstancer,   splitdepthsdictr   backbone_kwargsdrop_cls_tokenr   typepopr   	for_modelr   r9   r:   )r$   rB   rC   rD   rG   rJ   r9   r:   r   r	   r%   s             r&   r#   z"BaseVisualTokenizerConfig.__init__H   sd    	""6"""$!2fc"" 	988fll3&7&7888F#CH~//,&o0@$/GHH  |`det`u`u||| H o/?@@ E,\:
((#''555&0&:"' '&5' 'OO '2&D&DO&D&DO.*r'   )r5   r6   r7   NFNr8   )r(   r)   r*   r   rH   r.   r#   r1   r2   s   @r&   r4   r4   G   sw         #:>"+ "+ *D047"+ "+ "+ "+ "+ "+ "+ "+ "+ "+ "+r'   r4   c                   "     e Zd ZdZ fdZ xZS )Aimv2VisualTokenizerConfigaimv2_visual_tokenizerc                      t                      j        di | | j        rd| _        | j        r1t	          | j                  dk    sJ | j        d         | j        d<   d S d S NFr8   r   r   r!   r"   r#   rJ   rG   lenrI   r$   r   r%   s     r&   r#   z#Aimv2VisualTokenizerConfig.__init__p       ""6""" 	("'D; 	Gt{##q((((8<AD !4555	G 	Gr'   r(   r)   r*   r	   r#   r1   r2   s   @r&   rO   rO   m   sF        )JG G G G G G G G Gr'   rO   c                   "     e Zd ZdZ fdZ xZS )SiglipVisualTokenizerConfigsiglip_visual_tokenizerc                      t                      j        di | | j        rd| _        | j        r1t	          | j                  dk    sJ | j        d         | j        d<   d S d S rR   rS   rU   s     r&   r#   z$SiglipVisualTokenizerConfig.__init__|   rV   r'   rW   r2   s   @r&   rY   rY   y   sF        *JG G G G G G G G Gr'   rY   rZ   rP   c                   T     e Zd ZdZ	 	 	 	 	 	 	 ddeez  dz  deez  dz  f fdZ xZS )	
OvisConfigovisN    F
llm_configvisual_tokenizer_configc                     t                      j        di | |{t          |t          t          f          sJ dt          |           d            t          |t                    s/|d         }	|                    d           t          j        |	fi |}|| _	        |{t          |t          t          f          sJ dt          |           d            t          |t                    s/|d         }	|                    d           t          j        |	fi |}|| _
        || _        || _        || _        || _        || _        d S )NzHexpect `llm_config` to be instance of PretrainedConfig or dict, but got rA   r	   zUexpect `visual_tokenizer_config` to be instance of PretrainedConfig or dict, but got r!   )r"   r#   rE   r   rH   rK   rL   r   rM   text_configra   multimodal_max_lengthr   conversation_formatter_classllm_attn_implementationdisable_tie_weight)r$   r`   ra   rd   r   re   rf   rg   r   r	   r%   s             r&   r#   zOvisConfig.__init__   s    	""6"""!j+;T*BCC  r[_`j[k[krrr C j*:;; L'5
|,,,'1*KK
KK
 &".58H$7OPP   Mhl  nE  iF  iF  M  M  M P 57GHH 4\B
'++L999*4*>+ +"9+ +' (?$%:"&,H)'>$"4r'   )NNr_   NNNF)r(   r)   r*   r	   r   rH   r#   r1   r2   s   @r&   r]   r]      s        J 6:BF"%) $ '5 '5$t+d2'5 "2D!84!?'5 '5 '5 '5 '5 '5 '5 '5 '5 '5r'   r]   N)typingr   transformersr   r   r   r4   rO   rY   registerr]   r!   r'   r&   <module>rk      sT         5 5 5 5 5 5 5 54! 4! 4! 4! 4!" 4! 4! 4!t#+ #+ #+ #+ #+ 0 #+ #+ #+L	G 	G 	G 	G 	G!: 	G 	G 	G	G 	G 	G 	G 	G"; 	G 	G 	G 
 -/J K K K 
 ,.H I I I*5 *5 *5 *5 *5! *5 *5 *5 *5 *5r'   