
     `iD                     v    d Z ddlmZ ddlmZ ddlmZmZ  ej        e	          Z
 G d dee          ZdgZdS )zTextNet model configuration    )PretrainedConfig)logging)BackboneConfigMixin*get_aligned_output_features_output_indicesc                   J     e Zd ZdZdZdddddddgddg d	d
dddf fd	Z xZS )TextNetConfiga6  
    This is the configuration class to store the configuration of a [`TextNextModel`]. It is used to instantiate a
    TextNext model according to the specified arguments, defining the model architecture. Instantiating a configuration
    with the defaults will yield a similar configuration to that of the
    [czczup/textnet-base](https://huggingface.co/czczup/textnet-base). Configuration objects inherit from
    [`PretrainedConfig`] and can be used to control the model outputs.Read the documentation from [`PretrainedConfig`]
    for more information.

    Args:
        stem_kernel_size (`int`, *optional*, defaults to 3):
            The kernel size for the initial convolution layer.
        stem_stride (`int`, *optional*, defaults to 2):
            The stride for the initial convolution layer.
        stem_num_channels (`int`, *optional*, defaults to 3):
            The num of channels in input for the initial convolution layer.
        stem_out_channels (`int`, *optional*, defaults to 64):
            The num of channels in out for the initial convolution layer.
        stem_act_func (`str`, *optional*, defaults to `"relu"`):
            The activation function for the initial convolution layer.
        image_size (`tuple[int, int]`, *optional*, defaults to `[640, 640]`):
            The size (resolution) of each image.
        conv_layer_kernel_sizes (`list[list[list[int]]]`, *optional*):
            A list of stage-wise kernel sizes. If `None`, defaults to:
            `[[[3, 3], [3, 3], [3, 3]], [[3, 3], [1, 3], [3, 3], [3, 1]], [[3, 3], [3, 3], [3, 1], [1, 3]], [[3, 3], [3, 1], [1, 3], [3, 3]]]`.
        conv_layer_strides (`list[list[int]]`, *optional*):
            A list of stage-wise strides. If `None`, defaults to:
            `[[1, 2, 1], [2, 1, 1, 1], [2, 1, 1, 1], [2, 1, 1, 1]]`.
        hidden_sizes (`list[int]`, *optional*, defaults to `[64, 64, 128, 256, 512]`):
            Dimensionality (hidden size) at each stage.
        batch_norm_eps (`float`, *optional*, defaults to 1e-05):
            The epsilon used by the batch normalization layers.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
        out_features (`list[str]`, *optional*):
            If used as backbone, list of features to output. Can be any of `"stem"`, `"stage1"`, `"stage2"`, etc.
            (depending on how many stages the model has). If unset and `out_indices` is set, will default to the
            corresponding stages. If unset and `out_indices` is unset, will default to the last stage.
        out_indices (`list[int]`, *optional*):
            If used as backbone, list of indices of features to output. Can be any of 0, 1, 2, etc. (depending on how
            many stages the model has). If unset and `out_features` is set, will default to the corresponding stages.
            If unset and `out_features` is unset, will default to the last stage.

    Examples:

    ```python
    >>> from transformers import TextNetConfig, TextNetBackbone

    >>> # Initializing a TextNetConfig
    >>> configuration = TextNetConfig()

    >>> # Initializing a model (with random weights)
    >>> model = TextNetBackbone(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```textnet      @   relui  N)r   r         i   gh㈵>g{Gz?c                     t                      j        d
i | |3ddgddgddggddgddgddgddggddgddgddgddggddgddgddgddggg}|g dg dg dg dg}|| _        || _        || _        || _        || _        || _        || _        || _	        || _
        |	| _        |
| _        d | j        D             | _        dgd t          dd          D             z   | _        t!          ||| j        	          \  | _        | _        d S )Nr
      )r   r   r   )r   r   r   r   c                 ,    g | ]}t          |          S  )len).0layers     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/textnet/configuration_textnet.py
<listcomp>z*TextNetConfig.__init__.<locals>.<listcomp>   s    LLLes5zzLLL    stemc                     g | ]}d | S )stager   )r   idxs     r   r   z*TextNetConfig.__init__.<locals>.<listcomp>   s    &L&L&L}s}}&L&L&Lr      )out_featuresout_indicesstage_namesr   )super__init__stem_kernel_sizestem_stridestem_num_channelsstem_out_channelsstem_act_func
image_sizeconv_layer_kernel_sizesconv_layer_stridesinitializer_rangehidden_sizesbatch_norm_epsdepthsranger!   r   _out_features_out_indices)selfr$   r%   r&   r'   r(   r)   r*   r+   r-   r.   r,   r   r    kwargs	__class__s                  r   r#   zTextNetConfig.__init__U   s   " 	""6""""*Q!Q!Q(Q!Q!Q!Q0Q!Q!Q!Q0Q!Q!Q!Q0	'# %"+))\\\<<<!V 0&!2!2*$'>$"4!2(,LLt/KLLL"8&L&La&L&L&LL0Z%;DL\1
 1
 1
-D---r   )__name__
__module____qualname____doc__
model_typer#   __classcell__)r5   s   @r   r   r      s        7 7r J : $,,,/
 /
 /
 /
 /
 /
 /
 /
 /
 /
r   r   N)r9   transformersr   transformers.utilsr   !transformers.utils.backbone_utilsr   r   
get_loggerr6   loggerr   __all__r   r   r   <module>rB      s    " ! ) ) ) ) ) ) & & & & & & m m m m m m m m 
	H	%	%k
 k
 k
 k
 k
')9 k
 k
 k
\ 
r   