
     `i                        d Z ddlZddlmZ ddlmZmZ ddlZddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZmZ ddlmZmZ ddlmZmZmZmZ ddlm Z  ddl!m"Z"m#Z# ddl$m%Z%  ej&        e'          Z(e ed           G d de                                  Z)e ed           G d de                                  Z* G d de	j+                  Z, G d de	j+                  Z- G d de	j+                  Z.	 dUd e	j+        d!ej/        d"ej/        d#ej/        d$eej/                 d%e0d&e0fd'Z1 G d( d)e	j+                  Z2 G d* d+e	j+                  Z3 G d, d-e	j+                  Z4 G d. d/e	j+                  Z5 G d0 d1e	j+                  Z6 G d2 d3e          Z7 G d4 d5e	j+                  Z8 G d6 d7e	j+                  Z9d8 Z: G d9 d:e	j+                  Z; G d; d<e	j+                  Z< G d= d>e	j+                  Z= G d? d@e	j+                  Z>e G dA dBe                      Z?e G dC dDe?                      Z@ G dE dFe	j+                  ZA G dG dHe	j+                  ZB G dI dJe	j+                  ZC edK           G dL dMe?                      ZD G dN dOe	j+                  ZE G dP dQe	j+                  ZFe G dR dSe?                      ZGg dTZHdS )VzPyTorch DPT (Dense Prediction Transformers) model.

This implementation is heavily inspired by OpenMMLab's implementation, found here:
https://github.com/open-mmlab/mmsegmentation/blob/master/mmseg/models/decode_heads/dpt_head.py.

    N)	dataclass)CallableOptional)nn)CrossEntropyLoss   )ACT2FN)GradientCheckpointingLayer)BaseModelOutputDepthEstimatorOutputSemanticSegmenterOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)ModelOutputauto_docstringlogging	torch_int)load_backbone)can_return_tuplecheck_model_inputs   )	DPTConfigz
    Base class for model's outputs that also contains intermediate activations that can be used at later stages. Useful
    in the context of Vision models.:
    )custom_introc                   l    e Zd ZU dZdZeej                 ed<   dZ	ee
ej        df                  ed<   dS )*BaseModelOutputWithIntermediateActivationsak  
    last_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
        Sequence of hidden-states at the output of the last layer of the model.
    intermediate_activations (`tuple(torch.FloatTensor)`, *optional*):
        Intermediate activations that can be used to compute hidden states of the model at various layers.
    Nlast_hidden_states.intermediate_activations)__name__
__module____qualname____doc__r   r   torchFloatTensor__annotations__r   tuple     x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/dpt/modeling_dpt.pyr   r   ,   sZ           7;!23:::HLhuU->-C'DELLLLLr)   r   z
    Base class for model's outputs that also contains a pooling of the last hidden states as well as intermediate
    activations that can be used by the model at later stages.
    c                       e Zd ZU dZdZeej                 ed<   dZ	eej                 ed<   dZ
eeej        df                  ed<   dZeeej        df                  ed<   dZeeej        df                  ed<   dS )	4BaseModelOutputWithPoolingAndIntermediateActivationsa  
    pooler_output (`torch.FloatTensor` of shape `(batch_size, hidden_size)`):
        Last layer hidden-state of the first token of the sequence (classification token) after further processing
        through the layers used for the auxiliary pretraining task. E.g. for BERT-family of models, this returns
        the classification token after processing through a linear layer and a tanh activation function. The linear
        layer weights are trained from the next sentence prediction (classification) objective during pretraining.
    intermediate_activations (`tuple(torch.FloatTensor)`, *optional*):
        Intermediate activations that can be used to compute hidden states of the model at various layers.
    Nlast_hidden_statepooler_output.hidden_states
attentionsr   )r    r!   r"   r#   r-   r   r$   r%   r&   r.   r/   r'   r0   r   r(   r)   r*   r,   r,   ?   s           6:x 1299915M8E-.555=AM8E%"3S"89:AAA:>Ju0#567>>>HLhuU->-C'DELLLLLr)   r,   c                   t     e Zd ZdZddedeeeef                  f fdZddZ		 dd	e
j        d
edefdZ xZS )DPTViTHybridEmbeddingsz
    This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
    `hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
    Transformer.
    Nconfigfeature_sizec                 &   t                                                       |j        |j        }}|j        |j        }}t          |t          j        j	                  r|n||f}t          |t          j        j	                  r|n||f}|d         |d         z  |d         |d         z  z  }t          |          | _        | j        j        d         }t          | j        j                  dk    r)t          dt          | j        j                             ddg| _        ||j        }	|	dd          }|	d         }n7t          |t          j        j	                  r|n||f}| j        j        d         }|| _        |d         | _        || _        t#          j        ||d          | _        t#          j        t+          j        dd|j                            | _        t#          j        t+          j        d|dz   |j                            | _        d S )Nr   r   r   z1Expected backbone to have 3 output features, got kernel_size)super__init__
image_size
patch_sizenum_channelshidden_size
isinstancecollectionsabcIterabler   backbonechannelslen
ValueErrorresidual_feature_map_indexbackbone_featmap_shaper   Conv2d
projection	Parameterr$   zeros	cls_tokenposition_embeddings)selfr3   r4   r<   r=   r>   r?   num_patchesfeature_dimfeat_map_shape	__class__s             r*   r;   zDPTViTHybridEmbeddings.__init___   s   !'!2F4EJ
$*$79Kk#-j+/:R#S#SqZZZdfpYq
#-j+/:R#S#SqZZZdfpYq
!!}
15*Q-:VW=:XY%f--m,R0t}%&&!++nQTUYUbUkQlQlnnooo+,a&'#:N)"##.L(+KK !+<9Q R RtYegsXt  -04K$$Q-()K!LLLek!Q8J&K&KLL#%<A{QPVPb0c0c#d#d   r)   r   c                    |d d d |f         }|d|d f         }t          t          |          dz            }|                    d||d                              dddd          }t          j                            |||fd          }|                    dddd                              d||z  d          }t          j        ||gd	          }|S 
Nr         ?r   r6   r      bilinear)sizemodedim)	r   rF   reshapepermuter   
functionalinterpolater$   catrP   posembgrid_size_heightgrid_size_widthstart_index
posemb_tokposemb_gridold_grid_sizes           r*   _resize_pos_embedz(DPTViTHybridEmbeddings._resize_pos_embed   s    AAA||O,
Q_-!#k"2"2c"9::!))!]M2NNVVWXZ[]^`abbm//CSUdBelv/ww!))!Q155==aAQTcAceghhJ4!<<<r)   Fpixel_valuesinterpolate_pos_encodingreturnc                    |j         \  }}}}|| j        k    rt          d          |sT|| j        d         k    s|| j        d         k    r2t          d| d| d| j        d          d| j        d          d	          |                     | j        || j        z  || j        z            }|                     |          j        d         }fd	| j	        D             }	| 
                    |                              d
                              dd
          }
| j                            |dd          }t          j        ||
fd          }
|
|z   }
t#          |
|	          S )NeMake sure that the channel dimension of the pixel values match with the one set in the configuration.r   r   zInput image size (*z) doesn't match model (z).r6   c                 *    g | ]}j         |         S r(   )feature_maps).0indexbackbone_outputs     r*   
<listcomp>z2DPTViTHybridEmbeddings.forward.<locals>.<listcomp>   s!    qqq <U Cqqqr)   rX   r\   )r   r   )shaper>   rG   r<   rk   rO   r=   rD   rs   rH   rK   flatten	transposerN   expandr$   rb   r   )rP   rl   rm   
batch_sizer>   heightwidthrO   featuresoutput_hidden_states
embeddings
cls_tokensrv   s               @r*   forwardzDPTViTHybridEmbeddings.forward   s    3?2D/
L&%4,,,w   ( 	+++u8J/J/J E E E% E E+E E.2oa.@E E E  
 #44$f&?$/AY
 
 --55"/3  rqqqQUQpqqq__X..66q99CCAqII
^**:r2>>
Y
J7Q???
  "55
 :)%9
 
 
 	
r)   Nr   )F)r    r!   r"   r#   r   r   r'   intr;   rk   r$   Tensorboolr   r   __classcell__rT   s   @r*   r2   r2   X   s          e  ey  esCx8Q  e  e  e  e  e  eD    LQ&
 &
!L&
DH&
	3&
 &
 &
 &
 &
 &
 &
 &
r)   r2   c                   D     e Zd ZdZ fdZddZdej        defdZ	 xZ
S )	DPTViTEmbeddingszB
    Construct the CLS token, position and patch embeddings.

    c                    t                                                       t          j        t	          j        dd|j                            | _        t          |          | _	        | j	        j
        }t          j        t	          j        d|dz   |j                            | _        t          j        |j                  | _        || _        d S )Nr   )r:   r;   r   rL   r$   rM   r?   rN   DPTViTPatchEmbeddingspatch_embeddingsrQ   rO   Dropouthidden_dropout_probdropoutr3   )rP   r3   rQ   rT   s      r*   r;   zDPTViTEmbeddings.__init__   s    ek!Q8J&K&KLL 5f = =+7#%<A{QPVPb0c0c#d#d z&"<==r)   r   c                    |d d d |f         }|d|d f         }t          |                    d          dz            }|                    d||d                              dddd          }t          j                            |||fd          }|                    dddd                              d||z  d          }t          j        ||gd	          }|S rV   )	r   rZ   r^   r_   r   r`   ra   r$   rb   rc   s           r*   rk   z"DPTViTEmbeddings._resize_pos_embed   s    AAA||O,
Q_-!+"2"21"5"5"<==!))!]M2NNVVWXZ[]^`abbm//CSUdBelv/ww!))!Q155==aAQTcAceghhJ4!<<<r)   rl   rn   c                    |j         \  }}}}| j        j        }|                     | j        ||z  ||z            }|                     |          }|                                \  }}	}
| j                            |dd          }t          j
        ||fd          }||z   }|                     |          }t          |          S )Nr6   r   r\   )r   )rx   r3   r=   rk   rO   r   rZ   rN   r{   r$   rb   r   r   )rP   rl   r|   r>   r}   r~   r=   rO   r   seq_len_r   s               r*   r   zDPTViTEmbeddings.forward   s    2>2D/
L&% [+
"44$f
&:EZ<O
 
 **<88
!+!2!2
GQ ^**:r2>>
Y
J7Q???
  "55
\\*--
9ZXXXXr)   r   )r    r!   r"   r#   r;   rk   r$   r   r   r   r   r   s   @r*   r   r      s         
       YEL Y5_ Y Y Y Y Y Y Y Yr)   r   c                   L     e Zd ZdZdef fdZdej        dej        fdZ xZ	S )r   z$
    Image to Patch Embedding.

    r3   c                    t                                                       |j        |j        }}|j        |j        }}t          |t          j        j	                  r|n||f}t          |t          j        j	                  r|n||f}|d         |d         z  |d         |d         z  z  }|| _        || _        || _        || _
        t          j        ||||          | _        d S )Nr   r   )r9   stride)r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rQ   r   rJ   rK   )rP   r3   r<   r=   r>   r?   rQ   rT   s          r*   r;   zDPTViTPatchEmbeddings.__init__   s    !'!2F4EJ
$*$79Kk#-j+/:R#S#SqZZZdfpYq
#-j+/:R#S#SqZZZdfpYq
!!}
15*Q-:VW=:XY$$(&)L+:^hiiir)   rl   rn   c                     |j         \  }}}}|| j        k    rt          d          |                     |                              d                              dd          }|S )Nrp   rX   r   )rx   r>   rG   rK   ry   rz   )rP   rl   r|   r>   r}   r~   r   s          r*   r   zDPTViTPatchEmbeddings.forward  sm    2>2D/
L&%4,,,w   __\22::1==GG1MM
r)   
r    r!   r"   r#   r   r;   r$   r   r   r   r   s   @r*   r   r      s{         
jy j j j j j jEL U\        r)   r           modulequerykeyvalueattention_maskscalingr   c                    t          j        ||                    dd                    |z  }t          j                            |dt           j                                      |j                  }t          j        	                    ||| j
                  }|||z  }t          j        ||          }	|	                    dd                                          }	|	|fS )Nr6   r7   )r]   dtype)ptrainingr   rX   )r$   matmulrz   r   r`   softmaxfloat32tor   r   r   
contiguous)
r   r   r   r   r   r   r   kwargsattn_weightsattn_outputs
             r*   eager_attention_forwardr     s     <s}}R'<'<==GL =((2U](SSVVW\WbccL =((6?([[L !#n4,|U33K''1--88::K$$r)   c            	            e Zd Zdef fdZ	 ddej        deej                 deej        ej        f         fdZ	 xZ
S )	DPTSelfAttentionr3   c                    t                                                       |j        |j        z  dk    r0t	          |d          s t          d|j         d|j         d          || _        |j        | _        t          |j        |j        z            | _        | j        | j        z  | _	        |j
        | _        | j        dz  | _        d| _        t          j        |j        | j	        |j                  | _        t          j        |j        | j	        |j                  | _        t          j        |j        | j	        |j                  | _        d S )	Nr   embedding_sizezThe hidden size z4 is not a multiple of the number of attention heads .g      F)bias)r:   r;   r?   num_attention_headshasattrrG   r3   r   attention_head_sizeall_head_sizeattention_probs_dropout_probdropout_probr   	is_causalr   Linearqkv_biasr   r   r   rP   r3   rT   s     r*   r;   zDPTSelfAttention.__init__/  sB    ::a??PVXhHiHi?76#5 7 737 7 7  
 #)#= #&v'9F<V'V#W#W !58PP"?/5Yv143EFO\\\
9V/1C&/ZZZYv143EFO\\\


r)   Nr/   	head_maskrn   c           
         |j         d         }|d| j        | j        f} |                     |          j        |                     dd          } |                     |          j        |                     dd          } |                     |          j        |                     dd          }t          }| j	        j
        dk    rt          | j	        j
                 } || ||||| j        | j        | j        sdn| j                  \  }	}
|	                                d d         | j        fz   }|	                    |          }	|	|
fS )	Nr   r6   r   rX   eagerr   )r   r   r   r7   )rx   r   r   r   viewrz   r   r   r   r3   _attn_implementationr   r   r   r   r   rZ   r   r^   )rP   r/   r   r|   	new_shape	key_layervalue_layerquery_layerattention_interfacecontext_layerattention_probsnew_context_layer_shapes               r*   r   zDPTSelfAttention.forwardC  sY    #(+
D$<d>VV	0DHH]++0)<FFq!LL	4djj//4i@JJ1aPP4djj//4i@JJ1aPP(?;+w66"9$+:Z"[)<)<nL#}CCC$2C	*
 	*
 	*
& #0"4"4"6"6ss";t?Q>S"S%--.EFFo--r)   r   )r    r!   r"   r   r;   r$   r   r   r'   r   r   r   s   @r*   r   r   .  s        ]y ] ] ] ] ] ]* PT. ."\.6>u|6L.	u|U\)	*. . . . . . . .r)   r   c                   Z     e Zd ZdZdef fdZdej        dej        dej        fdZ xZ	S )DPTViTSelfOutputz
    The residual connection is defined in ViTLayer instead of here (as is the case with other models), due to the
    layernorm applied before each block.
    r3   c                     t                                                       t          j        |j        |j                  | _        t          j        |j                  | _        d S r   )	r:   r;   r   r   r?   denser   r   r   r   s     r*   r;   zDPTViTSelfOutput.__init__i  sJ    Yv163EFF
z&"<==r)   r/   input_tensorrn   c                 Z    |                      |          }|                     |          }|S r   r   r   rP   r/   r   s      r*   r   zDPTViTSelfOutput.forwardn  s*    

=11]33r)   r   r   s   @r*   r   r   c  s         
>y > > > > > >
U\  RWR^        r)   r   c                   |     e Zd Zdef fdZdee         fdZd
dej	        de
ej	                 dej	        fd	Z xZS )DPTViTAttentionr3   c                     t                                                       t          |          | _        t	          |          | _        t                      | _        d S r   )r:   r;   r   	attentionr   outputsetpruned_headsr   s     r*   r;   zDPTViTAttention.__init__v  sI    )&11&v..EEr)   headsc                    t          |          dk    rd S t          || j        j        | j        j        | j                  \  }}t          | j        j        |          | j        _        t          | j        j        |          | j        _        t          | j        j	        |          | j        _	        t          | j
        j        |d          | j
        _        | j        j        t          |          z
  | j        _        | j        j        | j        j        z  | j        _        | j                            |          | _        d S )Nr   r   r\   )rF   r   r   r   r   r   r   r   r   r   r   r   r   union)rP   r   ru   s      r*   prune_headszDPTViTAttention.prune_heads|  s   u::??F74>5t~7Y[_[l
 
u
  2$.2FNN/0BEJJ1$.2FNN.t{/@%QOOO .2^-ORUV[R\R\-\*'+~'IDNLn'n$ -33E::r)   Nr/   r   rn   c                 d    |                      ||          \  }}|                     ||          }|S r   )r   r   )rP   r/   r   self_attn_outputr   r   s         r*   r   zDPTViTAttention.forward  s4    "nn]IFF!-}==r)   r   )r    r!   r"   r   r;   r   r   r   r$   r   r   r   r   r   s   @r*   r   r   u  s        "y " " " " " ";S ; ; ; ;$ U\ hu|>T `e`l        r)   r   c                   H     e Zd Zdef fdZdej        dej        fdZ xZS )DPTViTIntermediater3   c                    t                                                       t          j        |j        |j                  | _        t          |j        t                    rt          |j                 | _        d S |j        | _        d S r   )r:   r;   r   r   r?   intermediate_sizer   r@   
hidden_actstrr	   intermediate_act_fnr   s     r*   r;   zDPTViTIntermediate.__init__  sn    Yv163KLL
f'-- 	9'-f.?'@D$$$'-'8D$$$r)   r/   rn   c                 Z    |                      |          }|                     |          }|S r   )r   r   )rP   r/   s     r*   r   zDPTViTIntermediate.forward  s,    

=1100??r)   	r    r!   r"   r   r;   r$   r   r   r   r   s   @r*   r   r     sj        9y 9 9 9 9 9 9U\ el        r)   r   c                   V     e Zd Zdef fdZdej        dej        dej        fdZ xZS )DPTViTOutputr3   c                     t                                                       t          j        |j        |j                  | _        t          j        |j                  | _	        d S r   )
r:   r;   r   r   r   r?   r   r   r   r   r   s     r*   r;   zDPTViTOutput.__init__  sJ    Yv79KLL
z&"<==r)   r/   r   rn   c                 d    |                      |          }|                     |          }||z   }|S r   r   r   s      r*   r   zDPTViTOutput.forward  s4    

=11]33%4r)   r   r   s   @r*   r   r     su        >y > > > > > >
U\  RWR^        r)   r   c                   h     e Zd ZdZdef fdZd	dej        deej                 dej        fdZ	 xZ
S )
DPTViTLayerz?This corresponds to the Block class in the timm implementation.r3   c                 z   t                                                       |j        | _        d| _        t	          |          | _        t          |          | _        t          |          | _	        t          j        |j        |j                  | _        t          j        |j        |j                  | _        d S )Nr   eps)r:   r;   chunk_size_feed_forwardseq_len_dimr   r   r   intermediater   r   r   	LayerNormr?   layer_norm_epslayernorm_beforelayernorm_afterr   s     r*   r;   zDPTViTLayer.__init__  s    '-'E$(00.v66"6** "V-?VEZ [ [ [!|F,>FDYZZZr)   Nr/   r   rn   c                     |                      |          }|                     ||          }||z   }|                     |          }|                     |          }|                     ||          }|S r   )r   r   r   r   r   )rP   r/   r   hidden_states_normattention_outputlayer_outputs         r*   r   zDPTViTLayer.forward  sz    !22=AA>>*<iHH )=8 ++M::((66 {{<??r)   r   )r    r!   r"   r#   r   r;   r$   r   r   r   r   r   s   @r*   r   r     s        II[y [ [ [ [ [ [ U\ hu|>T `e`l        r)   r   c            	       `     e Zd Zdef fdZ	 d
dej        deej                 dede	fd	Z
 xZS )DPTViTEncoderr3   c                     t                                                       | _        t          j        fdt          j                  D                       | _        d| _        d S )Nc                 .    g | ]}t                    S r(   )r   )rt   r   r3   s     r*   rw   z*DPTViTEncoder.__init__.<locals>.<listcomp>  s!    #a#a#aAK$7$7#a#a#ar)   F)	r:   r;   r3   r   
ModuleListrangenum_hidden_layerslayergradient_checkpointingr   s    `r*   r;   zDPTViTEncoder.__init__  s`    ]#a#a#a#avG_A`A`#a#a#abb
&+###r)   NFr/   r   r   rn   c                     |r|gnd }t          | j                  D ]4\  }}|||         nd } |||          }|r|                    |           5t          ||rt	          |          nd           S )N)r-   r/   )	enumerater  appendr   r'   )rP   r/   r   r   all_hidden_statesilayer_modulelayer_head_masks           r*   r   zDPTViTEncoder.forward  s     0DM]OO(44 	8 	8OA|.7.CillO(LHHM  8!((777+6GQ% 1222T
 
 
 	
r)   NF)r    r!   r"   r   r;   r$   r   r   r   r   r   r   r   s   @r*   r   r     s        ,y , , , , , , sx
 
"\
6>u|6L
ko
	
 
 
 
 
 
 
 
r)   r   c                   l     e Zd ZdZ fdZd Zd Zd	deej	                 deej	                 fdZ
 xZS )
DPTReassembleStagea@  
    This class reassembles the hidden states of the backbone into image-like feature representations at various
    resolutions.

    This happens in 3 stages:
    1. Map the N + 1 tokens to a set of N tokens, by taking into account the readout ([CLS]) token according to
       `config.readout_type`.
    2. Project the channel dimension of the hidden states according to `config.neck_hidden_sizes`.
    3. Resizing the spatial dimensions (height, width).

    Args:
        config (`[DPTConfig]`):
            Model configuration class defining the model architecture.
    c                    t                                                       || _        t          j                    | _        |j        r|                     |           n|                     |           |j	        | _	        d S r   )
r:   r;   r3   r   r  layers	is_hybrid_init_reassemble_dpt_hybrid_init_reassemble_dptneck_ignore_stagesr   s     r*   r;   zDPTReassembleStage.__init__  st    moo 	.,,V4444%%f---"(";r)   c           	      j   t          t          t          |j                            |j                  D ]r\  }}|dk    r,| j                            t          j                               7|dk    r5| j                            t          ||j        |         |                     s|j
        dk    rt          d|j
         d          t          j                    | _        t          |          }t          t          |j                            D ]}|dk    r>| j                            t          j        t          j                                         F|dk    rS| j                            t          j        t          j        d|z  |          t"          |j                                      dS )a   "
        For DPT-Hybrid the first 2 reassemble layers are set to `nn.Identity()`, please check the official
        implementation: https://github.com/isl-org/DPT/blob/f43ef9e08d70a752195028a51be5e1aff227b913/dpt/vit.py#L438
        for more details.
        r   rE   factorprojectzReadout type z! is not supported for DPT-Hybrid.rX   N)zipr  rF   neck_hidden_sizesreassemble_factorsr  r
  r   IdentityDPTReassembleLayerreadout_typerG   r  readout_projects_get_backbone_hidden_size
Sequentialr   r	   r   )rP   r3   r  r  r?   s        r*   r  z.DPTReassembleStage._init_reassemble_dpt_hybrid  s    U3v'?#@#@AA6C\]] 	t 	tIAvAvv""2;==1111Q""#5fvG_`aGbkq#r#r#rsss)++cV-@cccddd !#/77s634455 	 	AAvv%,,R]2;==-I-IJJJJQ%,,M")AO["I"I6RXRcKdee  		 	r)   c           	      :   t          t          t          |j                            |j                  D ]:\  }}| j                            t          ||j        |         |                     ;|j        dk    rt          j
                    | _        t          |          }t          t          |j                            D ]W}| j                            t          j        t          j        d|z  |          t          |j                                      Vd S d S )Nr  r  rX   )r  r  rF   r  r  r  r
  r   r!  r   r  r"  r#  r$  r   r	   r   )rP   r3   r  r  r?   r   s         r*   r  z'DPTReassembleStage._init_reassemble_dpt  s   U3v'?#@#@AA6C\]] 	p 	pIAvK1&6C[\]C^gmnnnoooo)++$&MOOD!3F;;K3v78899  %,,M")AO["I"I6RXRcKdee   	 ,+ r)   Nr/   rn   c                    g }t          |          D ]\  }}|| j        vr|dddf         |ddddf         }}|j        \  }}	}
|||                    ||||
          }n*t	          |	dz            }|                    ||||
          }|                    dddd                                          }|j        }| j        j        dk    r|	                    d                              d          }|
                    d                              |          } | j        |         t          j        ||fd	                    }|                    ddd                              |          }nP| j        j        d
k    r@|	                    d          |
                    d	          z   }|                    |          } | j        |         |          }|                    |           |S )z
        Args:
            hidden_states (`list[torch.FloatTensor]`, each of shape `(batch_size, sequence_length + 1, hidden_size)`):
                List of hidden states from the backbone.
        Nr   r   rW   r   rX   r  )r   rX   r   r6   add)r	  r  rx   r^   r   r_   r   r3   r!  ry   	unsqueeze	expand_asr"  r$   rb   r  r
  )rP   r/   patch_heightpatch_widthoutr  hidden_staterN   r|   sequence_lengthr>   rZ   feature_shapereadouts                 r*   r   zDPTReassembleStage.forward+  s    (77 	% 	%OA|///*6qqq!t*<l111abb5>Q<	<H<N9
O\+0G#/#7#7
LR]_k#l#lLL$_c%9::D#/#7#7
D$P\#]#]L+33Aq!Q??JJLL , 2;+y88#/#7#7#:#:#B#B9#M#ML'11!44>>|LLG#;4#8#;EI|U\F]_a<b<b#c#cL#/#7#71a#@#@#H#H#W#WLL[-66#/#7#7#:#:Y=P=PQS=T=T#TL#/#7#7#F#FL-t{1~l;;JJ|$$$$
r)   NN)r    r!   r"   r#   r;   r  r  listr$   r   r   r   r   s   @r*   r  r    s         
< 
< 
< 
< 
<  4
 
 
# #T%,%7 #aefkfras # # # # # # # #r)   r  c                 H    | j         | j        du r| j         j        S | j        S r  )backbone_configr  r?   )r3   s    r*   r#  r#  Q  s-    )f.>%.G.G%11!!r)   c                   2     e Zd Zdededef fdZd Z xZS )r   r3   rE   r  c           	         t                                                       t          |          }t          j        ||d          | _        |dk    r t          j        ||||d          | _        d S |dk    rt          j                    | _        d S |dk     r0t          j        ||dt          d|z            d          | _        d S d S )Nr   )in_channelsout_channelsr9   r   r9   r   paddingr   )
r:   r;   r#  r   rJ   rK   ConvTranspose2dresizer  r   )rP   r3   rE   r  r?   rT   s        r*   r;   zDPTReassembleLayer.__init__Y  s    /77)(`abbb A::,XxV\blmnnnDKKKq[[+--DKKKaZZ)HhAcRSV\R\ooghiiiDKKK Zr)   c                 Z    |                      |          }|                     |          }|S r   )rK   r<  )rP   r-  s     r*   r   zDPTReassembleLayer.forwardh  s*    |44{{<00r)   )r    r!   r"   r   r   r;   r   r   r   s   @r*   r   r   X  sj        jy jC j j j j j j j      r)   r   c                   *     e Zd Zdef fdZd Z xZS )DPTFeatureFusionStager3   c                    t                                                       t          j                    | _        t          t          |j                            D ])}| j                            t          |                     *d S r   )
r:   r;   r   r  r  r  rF   r  r
  DPTFeatureFusionLayer)rP   r3   r   rT   s      r*   r;   zDPTFeatureFusionStage.__init__o  st    moos634455 	> 	>AK4V<<====	> 	>r)   c                     |d d d         }g }d }t          || j                  D ]4\  }}| ||          }n |||          }|                    |           5|S )Nr6   )r  r  r
  )rP   r/   fused_hidden_statesfused_hidden_stater-  r  s         r*   r   zDPTFeatureFusionStage.forwardu  s    %ddd+ !#&}dk#B#B 	; 	;L%!)%*U<%8%8""%*U+=|%L%L"&&'9::::""r)   )r    r!   r"   r   r;   r   r   r   s   @r*   r?  r?  n  sS        >y > > > > > ># # # # # # #r)   r?  c                   L     e Zd ZdZdef fdZdej        dej        fdZ xZ	S )DPTPreActResidualLayerz
    ResidualConvUnit, pre-activate residual unit.

    Args:
        config (`[DPTConfig]`):
            Model configuration class defining the model architecture.
    r3   c                    t                                                       |j        | _        |j        |j        n| j         }t          j                    | _        t          j        |j	        |j	        ddd|          | _
        t          j                    | _        t          j        |j	        |j	        ddd|          | _        | j        r>t          j        |j	                  | _        t          j        |j	                  | _        d S d S )Nr   r   )r9   r   r:  r   )r:   r;   !use_batch_norm_in_fusion_residualuse_batch_normuse_bias_in_fusion_residualr   ReLUactivation1rJ   fusion_hidden_sizeconvolution1activation2convolution2BatchNorm2dbatch_norm1batch_norm2)rP   r3   rJ  rT   s      r*   r;   zDPTPreActResidualLayer.__init__  s   $F 1= ..(( 	$ 799I%%,
 
 
 799I%%,
 
 
  	I!~f.GHHD!~f.GHHD	I 	Ir)   r-  rn   c                 (   |}|                      |          }|                     |          }| j        r|                     |          }|                     |          }|                     |          }| j        r|                     |          }||z   S r   )rL  rN  rI  rR  rO  rP  rS  rP   r-  residuals      r*   r   zDPTPreActResidualLayer.forward  s    ''55((66 	:++L99L''55((66 	:++L99Lh&&r)   r   r   s   @r*   rF  rF    s|          Iy  I  I  I  I  I  ID'EL 'U\ ' ' ' ' ' ' ' 'r)   rF  c                   n     e Zd ZdZddedef fdZddej        de	ej                 d	ej        fd
Z
 xZS )rA  a3  Feature fusion layer, merges feature maps from different stages.

    Args:
        config (`[DPTConfig]`):
            Model configuration class defining the model architecture.
        align_corners (`bool`, *optional*, defaults to `True`):
            The align_corner setting for bilinear upsample.
    Tr3   align_cornersc                     t                                                       || _        t          j        |j        |j        dd          | _        t          |          | _        t          |          | _	        d S )Nr   T)r9   r   )
r:   r;   rX  r   rJ   rM  rK   rF  residual_layer1residual_layer2)rP   r3   rX  rT   s      r*   r;   zDPTFeatureFusionLayer.__init__  si    *)F$=v?Xfgnrsss5f==5f==r)   Nr-  rV  rn   c                 t   |c|j         |j         k    r;t          j                            ||j         d         |j         d         fdd          }||                     |          z   }|                     |          }t          j                            |dd| j                  }|                     |          }|S )NrX   r   rY   FrZ   r[   rX  scale_factorr[   rX  )rx   r   r`   ra   rZ  r[  rX  rK   rU  s      r*   r   zDPTFeatureFusionLayer.forward  s    !X^33=44L$6q$9<;Ma;P#QXbrw 5   ($*>*>x*H*HHL++L99}00qzI[ 1 
 
 |44r)   Tr   )r    r!   r"   r#   r   r   r;   r$   r   r   r   r   r   s   @r*   rA  rA    s         > >y > > > > > > > EL HU\<R ^c^j        r)   rA  c                   D    e Zd ZU eed<   dZdZdZdZdZ	dZ
dZdeiZd ZdS )DPTPreTrainedModelr3   dptrl   Tr0   c                    t          |t          j        t          j        t          j        f          rQ|j        j                            d| j        j	                   |j
        |j
        j                                         nct          |t          j        t          j        f          r=|j
        j                                         |j        j                            d           t          |t          t           f          r>|j        j                                         |j        j                                         dS dS )zInitialize the weightsr   )meanstdNg      ?)r@   r   r   rJ   r;  weightdatanormal_r3   initializer_ranger   zero_r   rQ  fill_r   r2   rN   rO   )rP   r   s     r*   _init_weightsz DPTPreTrainedModel._init_weights  s   fry")R5GHII 	* M&&CT[5R&SSS{& &&(((r~ >?? 	*K""$$$M$$S)))f/1GHII 	4!'')))&+1133333	4 	4r)   N)r    r!   r"   r   r&   base_model_prefixmain_input_namesupports_gradient_checkpointing_supports_sdpa_supports_flash_attn_supports_flex_attn_supports_attention_backendr   _can_record_outputsrm  r(   r)   r*   rb  rb    sg         $O&*#N"&&4 4 4 4 4r)   rb  c                        e Zd Zddedef fdZd Zd Z ed          e		 	 dd
e
j        dee
j                 dee         defd                        Z xZS )DPTModelTr3   add_pooling_layerc                    t                                          |           || _        |j        rt	          |          | _        nt          |          | _        t          |          | _        t          j
        |j        |j                  | _        |rt          |          nd| _        |                                  dS )zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        r   N)r:   r;   r3   r  r2   r   r   r   encoderr   r   r?   r   	layernormDPTViTPoolerpooler	post_init)rP   r3   rx  rT   s      r*   r;   zDPTModel.__init__  s    
 	     	74V<<DOO.v66DO$V,,f&8f>STTT.?Il6***T 	r)   c                 @    | j         j        r| j        S | j        j        S r   )r3   r  r   r   )rP   s    r*   get_input_embeddingszDPTModel.get_input_embeddings  s"    ;  	4?"?33r)   c                     |                                 D ]/\  }}| j        j        |         j                            |           0dS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsrz  r  r   r   )rP   heads_to_pruner  r   s       r*   _prune_headszDPTModel._prune_heads#  sU    
 +0022 	C 	CLE5Lu%/;;EBBBB	C 	Cr)   F)tie_last_hidden_statesNrl   r   r   rn   c                 t   || j         j        }|                     || j         j                  }|                     |          }|j        }|                     |||          }|j        }|                     |          }| j	        | 	                    |          nd }	t          ||	|j        |j                  S )Nr   r   )r-   r.   r   r/   )r3   r   get_head_maskr  r   r   rz  r-   r{  r}  r,   r   r/   )
rP   rl   r   r   r   embedding_outputembedding_last_hidden_statesencoder_outputssequence_outputpooled_outputs
             r*   r   zDPTModel.forward+  s      '#';#C  &&y$+2OPP	GKWcGdGd'7'J$+/<<(ITh ,8 ,
 ,
 *;..998<8OO444UYC-'%5%N)7	
 
 
 	
r)   r`  r1  )r    r!   r"   r   r   r;   r  r  r   r   r$   r%   r   r,   r   r   r   s   @r*   rw  rw    s         y T      *4 4 4C C C u555 26/3	!
 !
'!
 E-.!
 'tn	!
 
>!
 !
 !
 ^ 65!
 !
 !
 !
 !
r)   rw  c                   H     e Zd Zdef fdZdej        dej        fdZ xZS )r|  r3   c                     t                                                       t          j        |j        |j                  | _        t          |j                 | _	        d S r   )
r:   r;   r   r   r?   pooler_output_sizer   r	   
pooler_act
activationr   s     r*   r;   zDPTViTPooler.__init__S  sE    Yv163LMM
 !23r)   r/   rn   c                 r    |d d df         }|                      |          }|                     |          }|S )Nr   )r   r  )rP   r/   first_token_tensorr  s       r*   r   zDPTViTPooler.forwardX  s@     +111a40

#56666r)   r   r   s   @r*   r|  r|  R  sj        4y 4 4 4 4 4 4
U\ el        r)   r|  c            
            e Zd ZdZdef fdZ	 	 d
deej                 de	e
         de	e
         deej                 fd	Z xZS )DPTNecka;  
    DPTNeck. A neck is a module that is normally used between the backbone and the head. It takes a list of tensors as
    input and produces another list of tensors as output. For DPT, it includes 2 stages:

    * DPTReassembleStage
    * DPTFeatureFusionStage.

    Args:
        config (dict): config dict.
    r3   c           
         t                                                       || _        |j        |j        j        dk    rd | _        nt          |          | _        t          j                    | _	        |j
        D ]8}| j	                            t          j        ||j        ddd                     9t          |          | _        d S )Nswinv2r   r   Fr9   r:  r   )r:   r;   r3   r4  
model_typereassemble_stager  r   r  convsr  r
  rJ   rM  r?  fusion_stage)rP   r3   channelrT   s      r*   r;   zDPTNeck.__init__m  s     !-&2H2SW_2_2_$(D!!$6v$>$>D!]__
/ 	s 	sGJbi1JXYcdkpqqqrrrr 2&99r)   Nr/   r*  r+  rn   c                 l    t          |t          t          f          st          d          t	          |          t	           j        j                  k    rt          d           j                             |||          } fdt          |          D             } 
                    |          }|S )z
        Args:
            hidden_states (`list[torch.FloatTensor]`, each of shape `(batch_size, sequence_length, hidden_size)` or `(batch_size, hidden_size, height, width)`):
                List of hidden states from the backbone.
        z2hidden_states should be a tuple or list of tensorszOThe number of hidden states should be equal to the number of neck hidden sizes.Nc                 B    g | ]\  }} j         |         |          S r(   )r  )rt   r  featurerP   s      r*   rw   z#DPTNeck.forward.<locals>.<listcomp>  s-    VVVzq'MDJqM'**VVVr)   )r@   r'   r2  	TypeErrorrF   r3   r  rG   r  r	  r  )rP   r/   r*  r+  r   r   s   `     r*   r   zDPTNeck.forward~  s     -%77 	RPQQQ}T[%B!C!CCCnooo  , 11-{[[MVVVVY}=U=UVVV ""8,,r)   r1  )r    r!   r"   r#   r   r;   r2  r$   r   r   r   r   r   r   s   @r*   r  r  a  s        	 	:y : : : : : :( '+%)	 EL) sm c]	
 
el	       r)   r  c                   X     e Zd ZdZdef fdZdeej                 dej        fdZ	 xZ
S )DPTDepthEstimationHeada	  
    Output head consisting of 3 convolutional layers. It progressively halves the feature dimension and upsamples
    the predictions to the input resolution after the first convolutional layer (details can be found in the paper's
    supplementary material).
    r3   c                    t                                                       || _        d | _        |j        rt          j        ddddd          | _        |j        }t          j        t          j        ||dz  ddd          t          j	        ddd	
          t          j        |dz  dddd          t          j
                    t          j        ddddd          t          j
                              | _        d S )N   )r   r   )r   r   r9  rX   r   r   rY   Tr^      r   )r:   r;   r3   rK   add_projectionr   rJ   rM  r$  UpsamplerK  headrP   r3   r   rT   s      r*   r;   zDPTDepthEstimationHead.__init__  s      	e iSfV]cdddDO,MIhA1QPQRRRKQZtLLLIh!mRQq!LLLGIIIb!1a@@@GII
 
			r)   r/   rn   c                     || j         j                 }| j        1|                     |          } t          j                    |          }|                     |          }|                    d          }|S )Nr   r\   )r3   head_in_indexrK   r   rK  r  squeeze)rP   r/   predicted_depths      r*   r   zDPTDepthEstimationHead.forward  sl    %dk&?@?& OOM::M%BGIIm44M))M22)11a188r)   )r    r!   r"   r#   r   r;   r2  r$   r   r   r   r   s   @r*   r  r    sy         
y 
 
 
 
 
 
&T%,%7 EL        r)   r  zu
    DPT Model with a depth estimation head on top (consisting of 3 convolutional layers) e.g. for KITTI, NYUv2.
    c                        e Zd Z fdZee	 	 	 d	dej        deej                 deej	                 dee
         def
d                        Z xZS )
DPTForDepthEstimationc                 T   t                                          |           d | _        |j        du r#|j        |j        t          |          | _        nt          |d          | _        t          |          | _	        t          |          | _        |                                  d S NF)rx  )r:   r;   rD   r  r4  r   rw  rc  r  neckr  r  r~  r   s     r*   r;   zDPTForDepthEstimation.__init__  s       u$$&*@*LPVP_Pk)&11DMM%@@@DH FOO	 +622	 	r)   Nrl   r   labelsr   rn   c                     | j         j        }d}|t          d           j          j        j        |fddi|}|j        }n  j        |f|dd|}|j        } j         j        s$ fdt          |dd                   D             }n?|j
        }	|	                     fdt          |dd                   D                        |	}d	\  }
} j         j        5 j         j        d
u r'|j        \  }}}} j         j        j        }||z  }
||z  }                     ||
|          }                     |          }t#          |||r|j        nd|j                  S )a  
        labels (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
            Ground truth depth estimation maps for computing the loss.

        Examples:
        ```python
        >>> from transformers import AutoImageProcessor, DPTForDepthEstimation
        >>> import torch
        >>> import numpy as np
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> image_processor = AutoImageProcessor.from_pretrained("Intel/dpt-large")
        >>> model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")

        >>> # prepare image for the model
        >>> inputs = image_processor(images=image, return_tensors="pt")

        >>> with torch.no_grad():
        ...     outputs = model(**inputs)

        >>> # interpolate to original size
        >>> post_processed_output = image_processor.post_process_depth_estimation(
        ...     outputs,
        ...     target_sizes=[(image.height, image.width)],
        ... )

        >>> # visualize the prediction
        >>> predicted_depth = post_processed_output[0]["predicted_depth"]
        >>> depth = predicted_depth * 255 / predicted_depth.max()
        >>> depth = depth.detach().cpu().numpy()
        >>> depth = Image.fromarray(depth.astype("uint8"))
        ```NzTraining is not implemented yetr   Tr  c                 6    g | ]\  }}|j         j        v |S r(   r3   backbone_out_indicesrt   idxr  rP   s      r*   rw   z1DPTForDepthEstimation.forward.<locals>.<listcomp>  s6     ! ! ! ,WPSW[WbWwPwPwGPwPwPwr)   r   c              3   N   K   | ]\  }}|j         j        d d         v |V   dS rX   Nr  r  s      r*   	<genexpr>z0DPTForDepthEstimation.forward.<locals>.<genexpr>  sL       . .$Wdk>qrrBBB BBBB. .r)   r1  F)lossr  r/   r0   )r3   r   NotImplementedErrorrD   forward_with_filtered_kwargsrs   rc  r/   r  r	  r   extendr4  rx   r=   r  r  r   r0   )rP   rl   r   r  r   r   r  outputsr/   backbone_hidden_statesr*  r+  r   r}   r~   r=   r  s   `                r*   r   zDPTForDepthEstimation.forward  s   ^  '#';#C %&GHHH=$@dm@ssdhslrssG#0MMdh|fyW[ff_effG#1M ;( 7! ! ! !09-:K0L0L! ! ! *1)I&&-- . . . .(1-2C(D(D. . .   
 !7$.!k;&2t{7LPU7U7U"."4Aq&%4?J!Z/L:-K		-{KK))M22#+3GQ'//T)	
 
 
 	
r)   )NNN)r    r!   r"   r;   r   r   r$   r%   r   
LongTensorr   r   r   r   r   s   @r*   r  r    s            $  26-1/3X
 X
'X
 E-.X
 )*	X

 'tnX
 
X
 X
 X
 ^ X
 X
 X
 X
 X
r)   r  c                   T     e Zd Zdef fdZdeej                 dej        fdZ xZ	S )DPTSemanticSegmentationHeadr3   c                    t                                                       || _        |j        }t	          j        t	          j        ||ddd          t	          j        |          t	          j                    t	          j	        |j
                  t	          j        ||j        d          t	          j        ddd	                    | _        d S )
Nr   r   Fr  r8   rX   rY   Tr^  )r:   r;   r3   rM  r   r$  rJ   rQ  rK  r   semantic_classifier_dropout
num_labelsr  r  r  s      r*   r;   z$DPTSemanticSegmentationHead.__init__9  s    ,MIhaOOON8$$GIIJv9::Ih 1qAAAKQZtLLL
 
			r)   r/   rn   c                 T    || j         j                 }|                     |          }|S r   )r3   r  r  rP   r/   logitss      r*   r   z#DPTSemanticSegmentationHead.forwardG  s'    %dk&?@=))r)   )
r    r!   r"   r   r;   r2  r$   r   r   r   r   s   @r*   r  r  8  so        
y 
 
 
 
 
 
T%,%7 EL        r)   r  c                   H     e Zd Zdef fdZdej        dej        fdZ xZS )DPTAuxiliaryHeadr3   c                 ^   t                                                       |j        }t          j        t          j        ||ddd          t          j        |          t          j                    t          j        dd          t          j        ||j	        d                    | _
        d S )Nr   r   Fr  g?r8   )r:   r;   rM  r   r$  rJ   rQ  rK  r   r  r  r  s      r*   r;   zDPTAuxiliaryHead.__init__O  s    ,MIhaOOON8$$GIIJsE""Ih 1qAAA
 
			r)   r/   rn   c                 0    |                      |          }|S r   )r  r  s      r*   r   zDPTAuxiliaryHead.forward[  s    =))r)   r   r   s   @r*   r  r  N  sj        

y 

 

 

 

 

 

U\ el        r)   r  c                        e Zd Zdef fdZee	 	 	 	 d
deej	                 deej	                 deej
                 dee         def
d	                        Z xZS )DPTForSemanticSegmentationr3   c                 (   t                                          |           t          |d          | _        t	          |          | _        t          |          | _        |j        rt          |          nd | _
        |                                  d S r  )r:   r;   rw  rc  r  r  r  r  use_auxiliary_headr  auxiliary_headr~  r   s     r*   r;   z#DPTForSemanticSegmentation.__init__b  s       Fe<<< FOO	 077	:@:S].v666Y] 	r)   Nrl   r   r  r   rn   c                     | j         j        }| j         j        dk    rt          d            j        |f|dd|}|j        } j         j        s$ fdt          |dd                   D             }n?|j        }|	                     fdt          |dd                   D                        |} 
                    |          }                     |          }	d}
 j                             |d	                   }
d}|t          j                            |	|j        d
d         dd          }|
0t          j                            |
|j        d
d         dd          }t#           j         j                  } |||          } |||          }| j         j        |z  z   }t)          ||	|r|j        nd|j                  S )a  
        labels (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
            Ground truth semantic segmentation maps for computing the loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels > 1`, a classification loss is computed (Cross-Entropy).

        Examples:
        ```python
        >>> from transformers import AutoImageProcessor, DPTForSemanticSegmentation
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> image_processor = AutoImageProcessor.from_pretrained("Intel/dpt-large-ade")
        >>> model = DPTForSemanticSegmentation.from_pretrained("Intel/dpt-large-ade")

        >>> inputs = image_processor(images=image, return_tensors="pt")

        >>> outputs = model(**inputs)
        >>> logits = outputs.logits
        ```Nr   z/The number of labels should be greater than oneTr  c                 6    g | ]\  }}|j         j        v |S r(   r  r  s      r*   rw   z6DPTForSemanticSegmentation.forward.<locals>.<listcomp>  s5       (CCSWS^SsLsLsLsLsLsr)   c              3   N   K   | ]\  }}|j         j        d d         v |V   dS r  r  r  s      r*   r  z5DPTForSemanticSegmentation.forward.<locals>.<genexpr>  sM       * *(CCSWS^SstutvtvSwLwLwLwLwLwLw* *r)   )r/   r6   r7   rY   Fr]  )ignore_index)r  r  r/   r0   )r3   r   r  rG   rc  r/   r  r	  r   r  r  r  r  r   r`   ra   rx   r   semantic_loss_ignore_indexauxiliary_loss_weightr   r0   )rP   rl   r   r  r   r   r  r/   r  r  auxiliary_logitsr  upsampled_logitsupsampled_auxiliary_logitsloss_fct	main_lossauxiliary_losss   `                r*   r   z"DPTForSemanticSegmentation.forwardq  sg   @  '#';#C $+"8A"="=NOOOHPI
$-DI
 I
LRI
 I
  - {$ 
	3   ,5mABB6G,H,H  MM &-%E"")) * * * *,5mABB6G,H,H* * *    3M			>>=))*#22=3DEE!}88V\"##.Zu  9      +-/]-F-F$6<+<:]b .G . .* (T[5[\\\H !16::I%X&@&IINt{@>QQD&3GQ'//T)	
 
 
 	
r)   )NNNN)r    r!   r"   r   r;   r   r   r   r$   r%   r  r   r   r   r   r   s   @r*   r  r  `  s        y        5915-1/3S
 S
u01S
 E-.S
 )*	S

 'tnS
 
!S
 S
 S
 ^ S
 S
 S
 S
 S
r)   r  )r  r  rw  rb  )r   )Ir#   collections.abcrA   dataclassesr   typingr   r   r$   r   torch.nnr   activationsr	   modeling_layersr
   modeling_outputsr   r   r   modeling_utilsr   r   pytorch_utilsr   r   utilsr   r   r   r   utils.backbone_utilsr   utils.genericr   r   configuration_dptr   
get_loggerr    loggerr   r,   Moduler2   r   r   r   floatr   r   r   r   r   r   r   r   r  r#  r   r?  rF  rA  rb  rw  r|  r  r  r  r  r  r  __all__r(   r)   r*   <module>r     sL        ! ! ! ! ! ! % % % % % % % %        % % % % % % ! ! ! ! ! ! 9 9 9 9 9 9 ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ F F F F F F F F Q Q Q Q Q Q Q Q D D D D D D D D D D D D 1 1 1 1 1 1 A A A A A A A A ( ( ( ( ( ( 
	H	%	%   	M 	M 	M 	M 	M 	M 	M  	M   M M M M M; M M  M$]
 ]
 ]
 ]
 ]
RY ]
 ]
 ]
@4Y 4Y 4Y 4Y 4Yry 4Y 4Y 4Yn    BI   N % %I%<% 
% <	%
 U\*% % % % % %>1. 1. 1. 1. 1.ry 1. 1. 1.j    ry   $    bi   @        
 
 
 
 
29 
 
 
    ,   >
 
 
 
 
BI 
 
 
.e e e e e e e eP" " "       ,# # # # #BI # # #0:' :' :' :' :'RY :' :' :'z" " " " "BI " " "J 4 4 4 4 4 4 4 4: G
 G
 G
 G
 G
! G
 G
 G
V    29   7 7 7 7 7bi 7 7 7t% % % % %RY % % %P   
m
 m
 m
 m
 m
. m
 m
 
m
`    ")   ,    ry   $ e
 e
 e
 e
 e
!3 e
 e
 e
P d
c
cr)   