§
     `ƒiò  ã                   ót   — d Z ddlmZ ddlmZ ddlmZmZ  ej        e	¦  «        Z
 G d„ de¦  «        ZdgZdS )	zPaliGemmamodel configurationé   )ÚPretrainedConfig)Úloggingé   )ÚCONFIG_MAPPINGÚ
AutoConfigc                   óL   ‡ — e Zd ZdZdZddiZeedœZdgZ	 	 	 	 	 	 dˆ fd„	Z	ˆ xZ
S )ÚPaliGemmaConfiga­  
    This is the configuration class to store the configuration of a [`PaliGemmaForConditionalGeneration`]. It is used to instantiate an
    PaliGemmamodel according to the specified arguments, defining the model architecture. Instantiating a configuration
    with the defaults will yield a similar configuration to that of the PaliGemma-2B.

    e.g. [paligemma-hf/paligemma-2b](https://huggingface.co/paligemma-hf/paligemma-2b)

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Args:
        vision_config (`PaliGemmaVisionConfig`,  *optional*):
            Custom vision config or dict
        text_config (`Union[AutoConfig, dict]`, *optional*):
            The config object of the text backbone. Can be any of `LlamaConfig` or `MistralConfig`.
        image_token_index (`int`, *optional*, defaults to 256000):
            The image token index to encode the image prompt.
        vocab_size (`int`, *optional*, defaults to 257152):
            Vocabulary size of the PaliGemmamodel. Defines the number of different tokens that can be represented by the
            `inputs_ids` passed when calling [`~PaliGemmaForConditionalGeneration`]
        projection_dim (`int`, *optional*, defaults to 2048):
            Dimension of the multimodal projection space.
        hidden_size (`int`, *optional*, defaults to 2048):
            Dimension of the hidden layer of the Language model.

    Example:

    ```python
    >>> from transformers import PaliGemmaForConditionalGeneration, PaliGemmaConfig, SiglipVisionConfig, GemmaConfig

    >>> # Initializing a Siglip-like vision config
    >>> vision_config = SiglipVisionConfig()

    >>> # Initializing a PaliGemma config
    >>> text_config = GemmaConfig()

    >>> # Initializing a PaliGemma paligemma-3b-224 style configuration
    >>> configuration = PaliGemmaConfig(vision_config, text_config)

    >>> # Initializing a model from the paligemma-3b-224 style configuration
    >>> model = PaliGemmaForConditionalGeneration(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```Ú	paligemmaÚimage_token_idÚimage_token_index)Útext_configÚvision_configÚpast_key_valuesNé è é€ì é   c           
      óÄ  •— || _         || _        || _        || _        d| _        t          | j        t          ¦  «        r7|                     dd¦  «        |d<   t          |d                  di |¤Ž| _        n$|€"t          d         dddddd	d
d¬¦  «        | _        || _	        t          | j	        t          ¦  «        r7|                     dd¦  «        |d<   t          |d                  di |¤Ž| _	        n#|€!t          d         dddddd|¬¦  «        | _	        | j        j
        | j        j        z  dz  | j	        _        || j        _         t          ¦   «         j        di |¤Ž d S )NFÚ
model_typeÚsiglip_vision_modeli   i€  é   éà   é   é   r   )Úintermediate_sizeÚhidden_sizeÚ
patch_sizeÚ
image_sizeÚnum_hidden_layersÚnum_attention_headsÚ
vocab_sizeÚvision_use_headÚgemmar   é   i @  é   é   )r   r   r   r   Únum_key_value_headsÚis_encoder_decoderr    r   © )r   Úprojection_dimr   r   r'   Ú
isinstanceÚdictÚgetr   r   r   r   Únum_image_tokensÚsuperÚ__init__)	Úselfr   r   r   r    r)   r   ÚkwargsÚ	__class__s	           €ú‰/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/paligemma/configuration_paligemma.pyr/   zPaliGemmaConfig.__init__N   sž  ø€ ð "3ˆÔØ,ˆÔØ&ˆÔØ*ˆÔØ"'ˆÔådÔ(­$Ñ/Ô/ð 	Ø*7×*;Ò*;¸LÐJ_Ñ*`Ô*`ˆM˜,Ñ'Ý!/°¸lÔ0KÔ!LÐ!]Ð!]È}Ð!]Ð!]ˆDÔÐØÐ"Ý!/Ð0EÔ!FØ"&Ø ØØØ"$Ø$&Ø!Ø %ð	"ñ 	"ô 	"ˆDÔð 'ˆÔÝdÔ&­Ñ-Ô-ð 	Ø(3¯ª¸ÀgÑ(NÔ(NˆK˜Ñ%Ý-¨k¸,Ô.GÔHÐWÐWÈ;ÐWÐWˆDÔÐØÐ Ý-¨gÔ6Ø Ø"$Ø"'Ø$%Ø$%Ø#(Ø%ð ñ  ô  ˆDÔð .2Ô-?Ô-JÈdÔN`ÔNkÑ-kÐpqÑ,qˆÔÔ)Ø,:ˆÔÔ)Ø‰ŒÔÐ"Ð"˜6Ð"Ð"Ð"Ð"Ð"ó    )NNr   r   r   r   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   Úattribute_mapr   Úsub_configsÚkeys_to_ignore_at_inferencer/   Ú__classcell__)r2   s   @r3   r	   r	      sˆ   ø€ € € € € ð,ð ,ð\ €JàÐ-ð€Mð #-¸zÐJÐJ€KØ#4Ð"5Ðð ØØ ØØØð/#ð /#ð /#ð /#ð /#ð /#ð /#ð /#ð /#ð /#r4   r	   N)r8   Úconfiguration_utilsr   Úutilsr   Úautor   r   Ú
get_loggerr5   Úloggerr	   Ú__all__r(   r4   r3   ú<module>rC      sŸ   ðð #Ð "à 3Ð 3Ð 3Ð 3Ð 3Ð 3Ø Ð Ð Ð Ð Ð Ø -Ð -Ð -Ð -Ð -Ð -Ð -Ð -ð 
ˆÔ	˜HÑ	%Ô	%€ðe#ð e#ð e#ð e#ð e#Ð&ñ e#ô e#ð e#ðP Ð
€€€r4   