
     `ib                     $   d Z ddlZddlmZ ddlmZ ddlZddlmc m	Z
 ddlmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZmZ ddlmZ ddlmZmZmZmZ ddlm Z m!Z! ddl"m#Z# ddl$m%Z%  ej&        e'          Z( G d de%          Z)e ed           G d de                                  Z* G d de!          Z+ G d de          Z, G d de          Z- G d de#          Z. G d  d!e          Z/ G d" d#e          Z0 G d$ d%ej1                  Z2 G d& d'ej3                  Z4 G d( d)ej3                  Z5 G d* d+ej3                  Z6e G d, d-e                      Z7 ed.           G d/ d0e                       Z8g d1Z9dS )2zPyTorch EoMT model.    N)	dataclass)Optional)Tensornn   )ACT2FN)ModelOutput)PreTrainedModel)Unpack)TransformersKwargsauto_docstringlogging)check_model_inputs   )Dinov2EmbeddingsDinov2LayerDinov2LayerScaleDinov2PatchEmbeddings)#Mask2FormerForUniversalSegmentationMask2FormerLoss)SiglipAttention)	ViTConfigc                   z     e Zd ZdZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddededededededef fdZ xZS ) 
EomtConfiga  
    This is the configuration class to store the configuration of a [`EomtForUniversalSegmentation`]. It is used to instantiate an EoMT model
    according to the specified arguments, defining the model architecture. Instantiating a configuration with the
    defaults will yield a similar configuration to that of the EoMT
    [tue-mps/coco_panoptic_eomt_large_640](https://huggingface.co/tue-mps/coco_panoptic_eomt_large_640)
    architecture.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Args:
        hidden_size (`int`, *optional*, defaults to 1024):
            Dimensionality of the hidden representations.
        num_hidden_layers (`int`, *optional*, defaults to 24):
            Number of hidden layers in the Transformer encoder.
        num_attention_heads (`int`, *optional*, defaults to 16):
            Number of attention heads in each attention layer.
        mlp_ratio (`int`, *optional*, defaults to 4):
            Ratio of the MLP hidden dimensionality to the hidden size.
        hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
            The non-linear activation function (function or string) in the encoder.
        hidden_dropout_prob (`float`, *optional*, defaults to 0.0):
            The dropout probability for all fully connected layers in the embeddings and encoder.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
        layer_norm_eps (`float`, *optional*, defaults to 1e-06):
            The epsilon used by the layer normalization layers.
        image_size (`int`, *optional*, defaults to 640):
            The size (resolution) of each input image.
        patch_size (`int`, *optional*, defaults to 16):
            The size (resolution) of each patch.
        num_channels (`int`, *optional*, defaults to 3):
            The number of input channels.
        layerscale_value (`float`, *optional*, defaults to 1.0):
            Initial value for the LayerScale parameter.
        drop_path_rate (`float`, *optional*, defaults to 0.0):
            The stochastic depth rate (drop path) used during training.
        num_upscale_blocks (`int`, *optional*, defaults to 2):
            Number of upsampling blocks used in the decoder or segmentation head.
        attention_dropout (`float`, *optional*, defaults to 0.0):
            Dropout probability applied after attention projection.
        use_swiglu_ffn (`bool`, *optional*, defaults to `False`):
            Whether to use the SwiGLU feedforward neural network.
        num_blocks (`int`, *optional*, defaults to 4):
            Number of feature blocks or stages in the architecture.
        no_object_weight (`float`, *optional*, defaults to 0.1):
            Loss weight for the 'no object' class in panoptic/instance segmentation.
        class_weight (`float`, *optional*, defaults to 2.0):
            Loss weight for classification targets.
        mask_weight (`float`, *optional*, defaults to 5.0):
            Loss weight for mask prediction.
        dice_weight (`float`, *optional*, defaults to 5.0):
            Loss weight for the dice loss component.
        train_num_points (`int`, *optional*, defaults to 12544):
            Number of points to sample for mask loss computation during training.
        oversample_ratio (`float`, *optional*, defaults to 3.0):
            Oversampling ratio used in point sampling for mask training.
        importance_sample_ratio (`float`, *optional*, defaults to 0.75):
            Ratio of points to sample based on importance during training.
        num_queries (`int`, *optional*, defaults to 200):
            Number of object queries in the Transformer.
        num_register_tokens (`int`, *optional*, defaults to 4):
            Number of learnable register tokens added to the transformer input.

    Example:

    ```python
    >>> from transformers import EomtConfig, EomtForUniversalSegmentation

    >>> # Initialize configuration
    >>> config = EomtConfig()

    >>> # Initialize model
    >>> model = EomtForUniversalSegmentation(config)

    >>> # Access config
    >>> config = model.config
    ```eomt            gelu        {Gz?ư>  r         ?r   F皙?       @      @ 1        @      ?   no_object_weightclass_weightmask_weightdice_weighttrain_num_pointsoversample_ratioimportance_sample_ratioc                 H    t                      j        d||||||||	|
|d
| | `| `| `| `| `| `|| _        || _	        || _
        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        d S )N)
hidden_sizenum_hidden_layersnum_attention_headshidden_dropout_prob
hidden_actinitializer_rangelayer_norm_eps
image_size
patch_sizenum_channels )super__init__intermediate_sizeqkv_bias
pooler_actpooler_output_sizeencoder_strideattention_probs_dropout_prob	mlp_ratioattention_dropoutlayerscale_valuedrop_path_ratenum_upscale_blocksuse_swiglu_ffn
num_blocksr-   r.   r/   r0   r1   r2   r3   num_queriesnum_register_tokens)selfr5   r6   r7   rH   r9   r8   r:   r;   r<   r=   r>   rJ   rK   rL   rI   rM   rN   r-   r.   r/   r0   r1   r2   r3   rO   rP   kwargs	__class__s                               y/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/eomt/modular_eomt.pyrA   zEomtConfig.__init__   s    < 	 	
#/ 3 3!/)!!%	
 	
 	
 	
 	
 "MO#-"!2 0,"4,$ 0(&& 0 0'>$&#6       )r   r   r   r   r    r!   r"   r#   r$   r   r   r%   r!   r   r!   Fr   r&   r'   r(   r(   r)   r*   r+   r,   r   )	__name__
__module____qualname____doc__
model_typefloatintrA   __classcell__rS   s   @rT   r   r   3   s        M M^ J "%!   %"%)-7B7 B7&  'B7( )B7* +B7, -B7. /B70  1B72 "'3B7 B7 B7 B7 B7 B7 B7 B7 B7 B7rU   r   a  
    Class for outputs of [`EomtForUniversalSegmentationOutput`].

    This output can be directly passed to [`~EomtImageProcessor.post_process_semantic_segmentation`] or
    [`~EomtImageProcessor.post_process_instance_segmentation`] or
    [`~EomtImageProcessor.post_process_panoptic_segmentation`] to compute final segmentation maps. Please, see
    [`~EomtImageProcessor] for details regarding usage.
    )custom_introc                   4   e Zd ZU dZdZeej                 ed<   dZ	eej                 ed<   dZ
eej                 ed<   dZeej                 ed<   dZeeej                          ed<   dZeeej                          ed<   dZeeej                          ed	<   dS )
"EomtForUniversalSegmentationOutputa*  
    loss (`torch.Tensor`, *optional*):
        The computed loss, returned when labels are present.
    class_queries_logits (`torch.FloatTensor`):
        A tensor of shape `(batch_size, num_queries, num_labels + 1)` representing the proposed classes for each
        query. Note the `+ 1` is needed because we incorporate the null class.
    masks_queries_logits (`torch.FloatTensor`):
        A tensor of shape `(batch_size, num_queries, height, width)` representing the proposed masks for each
        query.
    last_hidden_state (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
        Last hidden states (final feature map) of the last layer.
    hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
        shape `(batch_size, sequence_length, hidden_size)`. Hidden-states all layers of the model.
    attentions (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tuple(torch.FloatTensor)` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`. Self and Cross Attentions weights from transformer decoder.
    patch_offsets (`list[torch.Tensor]`, *optional*):
        list of tuples indicating the image index and start and end positions of patches for semantic segmentation.
    Nlossclass_queries_logitsmasks_queries_logitslast_hidden_statehidden_states
attentionspatch_offsets)rV   rW   rX   rY   rb   r   torchFloatTensor__annotations__rc   rd   re   rf   tuplerg   rh   listr   r?   rU   rT   ra   ra      s          * )-D(5$
%,,,8<(5#45<<<8<(5#45<<<59x 129998<M8E%"345<<<59Ju01299926M8D./66666rU   ra   c                       e Zd ZdS )EomtLossNrV   rW   rX   r?   rU   rT   ro   ro              DrU   ro   c                       e Zd ZdS )EomtPatchEmbeddingsNrp   r?   rU   rT   rs   rs      rq   rU   rs   c                   H    e Zd ZdeddfdZd Zdej        dej        fdZdS )EomtEmbeddingsconfigreturnNc                    t           j                            |            || _        |j        | _        t          j        t          j        dd|j                            | _	        t          j        t          j
        d|j        |j                            | _        t          |          | _        | j        j        }t          j        |j                  | _        d|j        z   | _        t          j        ||j                  | _        |                     dt          j        |                              d          d           d S )N   position_ids)ry   F)
persistent)r   ModulerA   rv   r=   	Parameterri   randnr5   	cls_tokenzerosrP   register_tokensrs   patch_embeddingsnum_patchesDropoutr8   dropoutnum_prefix_tokens	Embeddingposition_embeddingsregister_bufferarangeexpand)rQ   rv   r   s      rT   rA   zEomtEmbeddings.__init__   s   
	4    +ek!Q8J&K&KLL!|EK6;UW]Wi,j,jkk 3F ; ;+7z&"<==!"V%?!?#%<V=O#P#P ^U\+-F-F-M-Mg-V-VchiiiiirU   c                      t          d          )NzNot needed for Eomt ModelAttributeErrorrQ   s    rT   interpolate_pos_encodingz'EomtEmbeddings.interpolate_pos_encoding  s    8999rU   pixel_valuesc                    |j         \  }}}}| j        j        j        j        }|                     |                    |                    }| j                            |dd          }| j                            |dd          }|| 	                    | j
                  z   }t          j        |||gd          }|                     |          }|S )N)dtyper{   ry   dim)shaper   
projectionweightr   tor   r   r   r   rz   ri   catr   )rQ   r   
batch_size_target_dtype
embeddings
cls_tokensr   s           rT   forwardzEomtEmbeddings.forward  s    *0
Aq!,7>D**<???+N+NOO
^**:r2>>
.55j"bII$":":4;L"M"MM
Y
OZHaPPP
\\*--
rU   )	rV   rW   rX   r   rA   r   ri   r   r   r?   rU   rT   ru   ru      sq        jz jd j j j j : : :EL U\      rU   ru   c                       e Zd ZdS )EomtAttentionNrp   r?   rU   rT   r   r      rq   rU   r   c                       e Zd ZdS )EomtLayerScaleNrp   r?   rU   rT   r   r   $  rq   rU   r   c                   P    e Zd Z	 ddej        deej                 dej        fdZdS )	EomtLayerNrf   	head_maskrw   c                 j   |                      |          }|                     ||          \  }}|                     |          }|                     |          |z   }|                     |          }|                     |          }|                     |          }|                     |          |z   }|S N)norm1	attentionlayer_scale1	drop_pathnorm2mlplayer_scale2)rQ   rf   r   hidden_states_normself_attention_outputr   layer_outputs          rT   r   zEomtLayer.forward)  s    
 "ZZ66#'>>2Di#P#P q $ 1 12G H H '<==M zz-00xx--((66 ~~l33mCrU   r   )rV   rW   rX   ri   r   r   r   r?   rU   rT   r   r   (  sY         -1 | EL) 
	     rU   r   c                   D     e Zd Zd fd	Zdej        dej        fdZ xZS )EomtLayerNorm2dr#   Tc                 P    t                                          |||           d S )N)epselementwise_affine)r@   rA   )rQ   r>   r   affinerS   s       rT   rA   zEomtLayerNorm2d.__init__A  s(    36JJJJJrU   hidden_staterw   c                     |                     dddd          }t          j        || j        | j        | j        | j                  }|                     dddd          }|S )Nr   r   r   ry   )permuteF
layer_normnormalized_shaper   biasr   )rQ   r   s     rT   r   zEomtLayerNorm2d.forwardD  s^    #++Aq!Q77|L$2GVZV_aeaijj#++Aq!Q77rU   )r#   T)rV   rW   rX   rA   ri   r   r   r]   r^   s   @rT   r   r   @  si        K K K K K KEL U\        rU   r   c                   H     e Zd Zdef fdZdej        dej        fdZ xZS )EomtScaleLayerrv   c                 $   t                                                       |j        }t          j        ||dd          | _        t          |j                 | _        t          j	        ||dd|d          | _
        t          |          | _        d S )Nr   )kernel_sizestrider   ry   F)r   paddinggroupsr   )r@   rA   r5   r   ConvTranspose2dconv1r   r9   
activationConv2dconv2r   layernorm2drQ   rv   r5   rS   s      rT   rA   zEomtScaleLayer.__init__L  s    ('[aXYZZZ
 !23Y
 
 

 +;77rU   rf   rw   c                     |                      |          }|                     |          }|                     |          }|                     |          }|S r   )r   r   r   r   rQ   rf   s     rT   r   zEomtScaleLayer.forward\  sN    

=1166

=11((77rU   	rV   rW   rX   r   rA   ri   r   r   r]   r^   s   @rT   r   r   K  sj        8z 8 8 8 8 8 8 U\ el        rU   r   c                   H     e Zd Zdef fdZdej        dej        fdZ xZS )EomtScaleBlockrv   c                     t                                                       j        | _        t	          j        fdt          | j                  D                       | _        d S )Nc                 .    g | ]}t                    S r?   )r   .0r   rv   s     rT   
<listcomp>z+EomtScaleBlock.__init__.<locals>.<listcomp>h  s!    #[#[#[qN6$:$:#[#[#[rU   )r@   rA   rL   rN   r   
ModuleListrangeblock)rQ   rv   rS   s    `rT   rA   zEomtScaleBlock.__init__e  sX     3]#[#[#[#[E$/DZDZ#[#[#[\\


rU   rf   rw   c                 0    | j         D ]} ||          }|S r   )r   )rQ   rf   r   s      rT   r   zEomtScaleBlock.forwardj  s*    Z 	1 	1E!E-00MMrU   r   r^   s   @rT   r   r   d  sq        ]z ] ] ] ] ] ]
U\ el        rU   r   c                   H     e Zd Zdef fdZdej        dej        fdZ xZS )EomtMaskHeadrv   c                     t                                                       |j        }t          j        ||          | _        t          j        ||          | _        t          j        ||          | _        t          |j	                 | _
        d S r   )r@   rA   r5   r   Linearfc1fc2fc3r   r9   r   r   s      rT   rA   zEomtMaskHead.__init__q  sm    (9[+669[+669[+66 !23rU   rf   rw   c                     |                      |                     |                    }|                      |                     |                    }|                     |          }|S r   )r   r   r   r   r   s     rT   r   zEomtMaskHead.forwardz  sS    (?(?@@(?(?@@//rU   r   r^   s   @rT   r   r   p  sj        4z 4 4 4 4 4 4U\ el        rU   r   c                   X    e Zd ZU dZeed<   dZdZdZdgZ	dZ
eedZd	ej        d
dfdZdS )EomtPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    rv   r   r   Fr   T)rf   rg   modulerw   Nc                 ^   | j         j        }t          |t          j        t          j        t          j        f          rt          j                            |j	        t          j        d                     |j        ot          j                            |j	                  \  }}|dk    rdt          j        |          z  nd}t          j                            |j        | |           d S d S t          |t          j                  r?|j	        j                            d           |j        j                                         d S t          |t          j                  rU|j	        j                            dd           |j        +|j	        j        |j                                                  d S d S t          |t,                    r=t/          |d          r+|j        j                            | j         j                   d S d S t          |t4                    rt          j                            |j        j                            t<          j                  d|                              |j        j                   |j        _        |j!        j                                         d S d S )	N   )ar   ry   r%   r!   )meanstdlambda1)"rv   r:   
isinstancer   r   r   r   initkaiming_uniform_r   mathsqrtr   _calculate_fan_in_and_fan_outuniform_	LayerNormdatafill_zero_r   normal_padding_idxr   hasattrr   rJ   ru   trunc_normal_r   r   ri   float32r   r   )rQ   r   r   fan_inr   bounds         rT   _init_weightsz!EomtPreTrainedModel._init_weights  s[   k+fry")R5GHII 	0G$$V]dill$CCC{&GAA&-PP	17!DIf----  ufe<<<<< '& -- 	0M$$S)))K""$$$$$-- 	0M&&CQ&777!-"6#56<<>>>>> .-// 	0vy)) H#))$+*FGGGGGH H// 	0$&G$9$9 %((77cs %: % %b!'(( ! "'--/////		0 	0rU   )rV   rW   rX   rY   r   rk   base_model_prefixmain_input_namesupports_gradient_checkpointing_no_split_modules_supports_sdpar   r   _can_record_outputsr   r}   r  r?   rU   rT   r   r     s          
 $O&+#$N"# 
0BI 0$ 0 0 0 0 0 0rU   r   zV
    The EoMT Model with head on top for instance/semantic/panoptic segmentation.
    c                       e Zd ZdefdZd Zd Zdej        fdZ	e
d             Zee	 	 	 dd	ed
eee                  deee                  deee                  dee         defd                        ZdS )EomtForUniversalSegmentationrv   c                 P   t          j        |            | _        j        | _        t	                    | _        t          j        j        j	                  | _
        t          j        j        j                  | _        t          j        fdt          j                  D                       | _        t#                    | _        t'                    | _        t          j        j        j        dz             | _        j        j        z  j        j        z  f| _        j        j        j        d| _        t?          | j                  | _         | !                    dtE          j#        j$                             | %                                 d S )N)r   c                 .    g | ]}t                    S r?   )r   r   s     rT   r   z9EomtForUniversalSegmentation.__init__.<locals>.<listcomp>  s!    $`$`$`1Yv%6%6$`$`$`rU   ry   )loss_cross_entropy	loss_mask	loss_dice)rv   weight_dictattn_mask_probs)&r
   rA   rv   r6   ru   r   r   r   r5   r;   	layernormr   rO   queryr   r   layersr   upscale_blockr   	mask_headr   
num_labelsclass_predictorr<   r=   	grid_sizer.   r/   r0   r  ro   	criterionr   ri   onesrN   	post_init)rQ   rv   s    `rT   rA   z%EomtForUniversalSegmentation.__init__  ss    v...!'!9(00f&8f>STTT\&"4f6HII
m$`$`$`$`fF^@_@_$`$`$`aa+F33%f--!y);V=NQR=RSS +v/@@&BSW]WhBhi"("5++.
 .
 "T=MNNN.
6;L0M0MNNNrU   c                     | j         j        S r   )r   r   r   s    rT   get_input_embeddingsz1EomtForUniversalSegmentation.get_input_embeddings  s    //rU   c                      t          d          )NzNote needed for Eomt Model.r   r   s    rT   get_auxiliary_logitsz1EomtForUniversalSegmentation.get_auxiliary_logits  s    :;;;rU   logitsc                    |d d d | j         j        d d f         }|                     |          }|d d | j         j        | j        j        z   d d d f         }|                    dd          } |j        |j        d         dg| j        R  }| 	                    |          }| 
                    |          }t          j        d||          }||fS )Nry   r   r   r{   zbqc, bchw -> bqhw)rv   rO   r  r   r   	transposereshaper   r  r  r  ri   einsum)rQ   r   query_tokensclass_logitsprefix_tokensmask_logitss         rT   predictz$EomtForUniversalSegmentation.predict  s    aaa!:4;#:!:AAA=>++L99qqq$+"9DO<]"]"_"_abababbc%//155--m.A!.DbZ4>ZZZ~~l33**=99l#6mTTL((rU   c                     |dk     r:t          j        | j        d         ||          |k    }d| d d d ||d f         |<   | S )Nry   r   )device)ri   randr   )	attn_maskprobnum_query_tokensencoder_start_tokensr+  random_queriess         rT   _disable_attention_maskz4EomtForUniversalSegmentation._disable_attention_mask  sb    !88"Z	(:<LU[\\\_ccN VWIaaa***,@,A,AAB>RrU   Nr   mask_labelsclass_labelsrh   rR   rw   c                    d\  }}d}|t          d          |                     |          }	t          | j                  D ]w\  }
}|
| j        | j        j        z
  k    ri| j        j        dddddf         	                    |	j
        d         dd                              |	j                  }t          j        ||	fd          }	|
| j        | j        j        z
  k    r| j        s'| j        |
| j        z
  | j        j        z            dk    r|                     |	          }|                     |          \  }}||fz  }||fz  }t          j        |	j
        d         |	j
        d         |	j
        d         |	j        t          j                  }t+          j        || j        d	
          }|                    |                    d          |                    d          d          }| j        j        }|| j        j        z   }|dk    |ddd||df<   |                     || j        |
| j        z
  | j        j        z            |||j                  }|ddddf         	                    d| j        j        dd          }|                                                    | d          } ||	|          }	y|                     |	          }|                     |          \  }}||fz  }||fz  }d}|L|Jd}tA          ||          D ]7\  }}| !                    ||||d          }|| "                    |          z  }8tG          |||||          S )ag  
        mask_labels (`list[torch.Tensor]`, *optional*):
            list of mask labels of shape `(num_labels, height, width)` to be fed to a model
        class_labels (`list[torch.LongTensor]`, *optional*):
            list of target class labels of shape `(num_labels, height, width)` to be fed to a model. They identify the
            labels of `mask_labels`, e.g. the label of `mask_labels[i][j]` if `class_labels[i][j]`.
        patch_offsets (`list[torch.Tensor]`, *optional*):
            list of tuples indicating the image index and start and end positions of patches for semantic segmentation.
        )r?   r?   Nz You have to specify pixel_valuesr   r{   ry   r   )r+  r   bilinear)sizemode)r.  r/  r0  r+  .g    er!   )rd   rc   r3  r4  auxiliary_predictions)rb   rd   rc   re   rh   )$
ValueErrorr   	enumerater  r6   rv   rN   r  r   r   r   r   r+  ri   r   trainingr  r  r)  r  boolr   interpolater  viewr7  rO   r   r2  r7   r[   masked_fillzipget_loss_dictget_lossra   )rQ   r   r3  r4  rh   rR   masks_queries_logits_per_layerclass_queries_logits_per_layerattention_maskrf   idxlayer_moduler  norm_hidden_statesrd   rc   interpolated_logitsr/  r0  sequence_outputrb   	loss_dicts                         rT   r   z$EomtForUniversalSegmentation.forward  s   ( JPF&(F?@@@55!*4;!7!7 .	H .	HCd,t{/EEEE
)$111*5<<]=PQR=SUWY[\\__`m`tuu %	5-*@a H H Hd,t{/EEEE F!%!5cD<R6RUYU`Uk6k!lop!p!p%)^^M%B%B"=A\\J\=]=]:$&:.3G2II..3G2II.!&!'*!'*!'*(/*" " " '(m4Ht~dn&o&o&o#&9&>&>',,Q//1D1I1I!1L1Lb' '# $(;#: '7$/:['[$ ObdeNeqqq"3#3"35I5J5JJK "&!=!="-cD4J.JT[Mc.cd%5)=)0 "> " " "04!=!D!DRIhjlnp!q!q!/!5!5!7!7!C!C^OUY!Z!Z(LGGMM..7759\\/5R5R22&+?*AA&&+?*AA&"|'?D>A.0N? ? 
1 
1:$&: !..)=)= +!-*. /  	 i0001!5!5-'
 
 
 	
rU   )NNN)rV   rW   rX   r   rA   r  r  ri   r   r)  staticmethodr2  r   r   r   rm   r   r   ra   r   r?   rU   rT   r	  r	    s(       z    80 0 0< < <)el ) ) ) )    \  /3/304e
 e
e
 d6l+e
 tF|,	e

  V-e
 +,e
 
,e
 e
 e
 ^ e
 e
 e
rU   r	  )r   r   r	  ):rY   r   dataclassesr   typingr   ri   torch.nn.functionalr   
functionalr   r   activationsr   
file_utilsr	   modeling_utilsr
   processing_utilsr   utilsr   r   r   utils.genericr   dinov2.modeling_dinov2r   r   r   r    mask2former.modeling_mask2formerr   r   siglip.modeling_siglipr   vit.configuration_vitr   
get_loggerrV   loggerr   ra   ro   rs   ru   r   r   r   r   r   r}   r   r   r   r   r	  __all__r?   rU   rT   <module>r_     s      ! ! ! ! ! !                         ! ! ! ! ! !      . - - - - - & & & & & &         
 0 / / / / /            d c c c c c c c 4 4 4 4 4 4 - - - - - - 
	H	%	%T7 T7 T7 T7 T7 T7 T7 T7n 	 	 	7 7 7 7 7 7 7	 	 7>	 	 	 	 	 	 	 		 	 	 	 	/ 	 	 	! ! ! ! !% ! ! !H	 	 	 	 	O 	 	 		 	 	 	 	% 	 	 	       0    bl       RY   2	 	 	 	 	RY 	 	 	    29   " '0 '0 '0 '0 '0/ '0 '0 '0T   
e
 e
 e
 e
 e
#F e
 e
 
e
P P
O
OrU   