
     `i                     4    d dl mZ  G d de          ZdgZdS )   )PretrainedConfigc                   z     e Zd ZdZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddededededededef fdZ xZS ) 
EomtConfiga  
    This is the configuration class to store the configuration of a [`EomtForUniversalSegmentation`]. It is used to instantiate an EoMT model
    according to the specified arguments, defining the model architecture. Instantiating a configuration with the
    defaults will yield a similar configuration to that of the EoMT
    [tue-mps/coco_panoptic_eomt_large_640](https://huggingface.co/tue-mps/coco_panoptic_eomt_large_640)
    architecture.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Args:
        hidden_size (`int`, *optional*, defaults to 1024):
            Dimensionality of the hidden representations.
        num_hidden_layers (`int`, *optional*, defaults to 24):
            Number of hidden layers in the Transformer encoder.
        num_attention_heads (`int`, *optional*, defaults to 16):
            Number of attention heads in each attention layer.
        mlp_ratio (`int`, *optional*, defaults to 4):
            Ratio of the MLP hidden dimensionality to the hidden size.
        hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
            The non-linear activation function (function or string) in the encoder.
        hidden_dropout_prob (`float`, *optional*, defaults to 0.0):
            The dropout probability for all fully connected layers in the embeddings and encoder.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
        layer_norm_eps (`float`, *optional*, defaults to 1e-06):
            The epsilon used by the layer normalization layers.
        image_size (`int`, *optional*, defaults to 640):
            The size (resolution) of each input image.
        patch_size (`int`, *optional*, defaults to 16):
            The size (resolution) of each patch.
        num_channels (`int`, *optional*, defaults to 3):
            The number of input channels.
        layerscale_value (`float`, *optional*, defaults to 1.0):
            Initial value for the LayerScale parameter.
        drop_path_rate (`float`, *optional*, defaults to 0.0):
            The stochastic depth rate (drop path) used during training.
        num_upscale_blocks (`int`, *optional*, defaults to 2):
            Number of upsampling blocks used in the decoder or segmentation head.
        attention_dropout (`float`, *optional*, defaults to 0.0):
            Dropout probability applied after attention projection.
        use_swiglu_ffn (`bool`, *optional*, defaults to `False`):
            Whether to use the SwiGLU feedforward neural network.
        num_blocks (`int`, *optional*, defaults to 4):
            Number of feature blocks or stages in the architecture.
        no_object_weight (`float`, *optional*, defaults to 0.1):
            Loss weight for the 'no object' class in panoptic/instance segmentation.
        class_weight (`float`, *optional*, defaults to 2.0):
            Loss weight for classification targets.
        mask_weight (`float`, *optional*, defaults to 5.0):
            Loss weight for mask prediction.
        dice_weight (`float`, *optional*, defaults to 5.0):
            Loss weight for the dice loss component.
        train_num_points (`int`, *optional*, defaults to 12544):
            Number of points to sample for mask loss computation during training.
        oversample_ratio (`float`, *optional*, defaults to 3.0):
            Oversampling ratio used in point sampling for mask training.
        importance_sample_ratio (`float`, *optional*, defaults to 0.75):
            Ratio of points to sample based on importance during training.
        num_queries (`int`, *optional*, defaults to 200):
            Number of object queries in the Transformer.
        num_register_tokens (`int`, *optional*, defaults to 4):
            Number of learnable register tokens added to the transformer input.

    Example:

    ```python
    >>> from transformers import EomtConfig, EomtForUniversalSegmentation

    >>> # Initialize configuration
    >>> config = EomtConfig()

    >>> # Initialize model
    >>> model = EomtForUniversalSegmentation(config)

    >>> # Access config
    >>> config = model.config
    ```eomt            gelu        {Gz?ư>  r         ?   F皙?       @      @ 1        @      ?   no_object_weightclass_weightmask_weightdice_weighttrain_num_pointsoversample_ratioimportance_sample_ratioc                     t                      j        di | || _        || _        || _        || _        || _        || _        || _        |	| _	        |
| _
        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        d S )N )super__init__hidden_sizenum_hidden_layersnum_attention_heads
hidden_acthidden_dropout_probinitializer_rangelayer_norm_eps
image_size
patch_sizenum_channels	mlp_ratioattention_dropoutlayerscale_valuedrop_path_ratenum_upscale_blocksuse_swiglu_ffn
num_blocksr   r   r   r   r   r   r   num_queriesnum_register_tokens)selfr$   r%   r&   r.   r'   r(   r)   r*   r+   r,   r-   r0   r1   r2   r/   r3   r4   r   r   r   r   r   r   r   r5   r6   kwargs	__class__s                               /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/eomt/configuration_eomt.pyr#   zEomtConfig.__init__k   s    < 	""6"""&!2#6 $#6 !2,$$("!2 0,"4,$ 0(&& 0 0'>$&#6       )r   r   r	   r
   r   r   r   r   r   r	   r   r   r   r   r   Fr
   r   r   r   r   r   r   r   r   r
   )	__name__
__module____qualname____doc__
model_typefloatintr#   __classcell__)r9   s   @r:   r   r      s        M M^ J "%!   %"%)-7:7 :7&  ':7( ):7* +:7, -:7. /:70  1:72 "'3:7 :7 :7 :7 :7 :7 :7 :7 :7 :7r;   r   N)configuration_utilsr   r   __all__r!   r;   r:   <module>rF      sZ   , 4 3 3 3 3 3L7 L7 L7 L7 L7! L7 L7 L7^ .r;   