
     `iq              	          d Z ddlZddlmZmZ ddlZddlmZ ddlmZ ddl	m
Z
mZ ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ  ej        e          Zd9dej        dededej        fdZ G d dej                  Z G d dej                  Z G d dej                  Z  G d dej                  Z! G d dej                  Z" G d dej                  Z# G d  d!ej                  Z$ G d" d#ej                  Z% G d$ d%ej                  Z&e G d& d'e                      Z'e G d( d)e'                      Z( G d* d+ej                  Z) G d, d-ej                  Z* G d. d/ej                  Z+ G d0 d1ej                  Z, G d2 d3ej                  Z- ed45           G d6 d7e'                      Z.g d8Z/dS ):zPyTorch GLPN model.    N)OptionalUnion)nn   )ACT2FN)BaseModelOutputDepthEstimatorOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging   )
GLPNConfig        Finput	drop_probtrainingreturnc                     |dk    s|s| S d|z
  }| j         d         fd| j        dz
  z  z   }|t          j        || j        | j                  z   }|                                 |                     |          |z  }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
    r   r   r   )r   )dtypedevice)shapendimtorchrandr   r   floor_div)r   r   r   	keep_probr   random_tensoroutputs          z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/glpn/modeling_glpn.py	drop_pathr#   #   s     CxII[^
Q 77E
5EL Y Y YYMYYy!!M1FM    c                   j     e Zd ZdZd	dee         ddf fdZdej        dej        fdZ	de
fdZ xZS )
GLPNDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                 V    t                                                       || _        d S N)super__init__r   )selfr   	__class__s     r"   r*   zGLPNDropPath.__init__;   s$    "r$   hidden_statesc                 8    t          || j        | j                  S r(   )r#   r   r   )r+   r-   s     r"   forwardzGLPNDropPath.forward?   s    FFFr$   c                     d| j          S )Nzp=)r   )r+   s    r"   
extra_reprzGLPNDropPath.extra_reprB   s    $DN$$$r$   r(   )__name__
__module____qualname____doc__r   floatr*   r   Tensorr/   strr1   __classcell__r,   s   @r"   r&   r&   8   s        bb# #(5/ #T # # # # # #GU\ Gel G G G G%C % % % % % % % %r$   r&   c                   (     e Zd ZdZ fdZd Z xZS )GLPNOverlapPatchEmbeddingsz+Construct the overlapping patch embeddings.c                     t                                                       t          j        |||||dz            | _        t          j        |          | _        d S )N   kernel_sizestridepadding)r)   r*   r   Conv2dproj	LayerNorm
layer_norm)r+   
patch_sizerA   num_channelshidden_sizer,   s        r"   r*   z#GLPNOverlapPatchEmbeddings.__init__J   s[    I"!O
 
 
	 ,{33r$   c                     |                      |          }|j        \  }}}}|                    d                              dd          }|                     |          }|||fS )Nr>   r   )rD   r   flatten	transposerF   )r+   pixel_values
embeddings_heightwidths         r"   r/   z"GLPNOverlapPatchEmbeddings.forwardV   sg    YY|,,
(.1fe  ''**44Q::
__Z00
65((r$   r2   r3   r4   r5   r*   r/   r9   r:   s   @r"   r<   r<   G   sM        55
4 
4 
4 
4 
4) ) ) ) ) ) )r$   r<   c                   ,     e Zd ZdZ fdZ	 ddZ xZS )GLPNEfficientSelfAttentionzSegFormer's efficient self-attention mechanism. Employs the sequence reduction process introduced in the [PvT
    paper](https://huggingface.co/papers/2102.12122).c                    t                                                       || _        || _        | j        | j        z  dk    r t	          d| j         d| j         d          t          | j        | j        z            | _        | j        | j        z  | _        t          j	        | j        | j                  | _
        t          j	        | j        | j                  | _        t          j	        | j        | j                  | _        t          j        |j                  | _        || _        |dk    r8t          j        ||||          | _        t          j        |          | _        d S d S )Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()r   )r@   rA   )r)   r*   rI   num_attention_heads
ValueErrorintattention_head_sizeall_head_sizer   LinearquerykeyvalueDropoutattention_probs_dropout_probdropoutsr_ratiorC   srrE   rF   r+   configrI   rW   sequence_reduction_ratior,   s        r"   r*   z#GLPNEfficientSelfAttention.__init__e   s]   &#6 d66!;;6D$4 6 626 6 6  
 $'t'7$:R'R#S#S !58PPYt/1CDD
9T-t/ABBYt/1CDD
z&"EFF0#a''i[6NWo  DG !l;77DOOO	 ('r$   Fc                    |j         \  }}}|                     |                              |d| j        | j                                      dd          }| j        dk    r|j         \  }}	}
|                    ddd                              ||
||          }| 	                    |          }|                    ||
d                              ddd          }| 
                    |          }|                     |                              |d| j        | j                                      dd          }|                     |                              |d| j        | j                                      dd          }t          j        ||                    dd                    }|t          j        | j                  z  }t"          j                            |d          }|                     |          }t          j        ||          }|                    dddd                                          }|                                d d         | j        fz   }|                    |          }|r||fn|f}|S )Nr   r>   r   dimr   )r   r]   viewrW   rZ   rL   rc   permutereshaperd   rF   r^   r_   r   matmulmathsqrtr   
functionalsoftmaxrb   
contiguoussizer[   )r+   r-   rP   rQ   output_attentions
batch_size
seq_lengthrO   query_layerseq_lenrH   	key_layervalue_layerattention_scoresattention_probscontext_layernew_context_layer_shapeoutputss                     r"   r/   z"GLPNEfficientSelfAttention.forward   sV    %2$7!
JJJ}%%T*b$":D<TUUYq!__ 	 =10=0C-J)11!Q::BB:|]cejkkM GGM22M)11*lBOOWWXY[\^_``M OOM::M HH]##T*b$":D<TUUYq!__ 	 JJ}%%T*b$":D<TUUYq!__ 	 !<Y5H5HR5P5PQQ+di8P.Q.QQ -//0@b/II ,,77_kBB%--aAq99DDFF"/"4"4"6"6ss";t?Q>S"S%**+BCC6G]=/22mM]r$   FrR   r:   s   @r"   rT   rT   a   s\        9 98 8 8 8 8@  7 7 7 7 7 7 7 7r$   rT   c                   $     e Zd Z fdZd Z xZS )GLPNSelfOutputc                     t                                                       t          j        ||          | _        t          j        |j                  | _        d S r(   )r)   r*   r   r\   denser`   hidden_dropout_probrb   )r+   rf   rI   r,   s      r"   r*   zGLPNSelfOutput.__init__   sD    Y{K88
z&"<==r$   c                 Z    |                      |          }|                     |          }|S r(   )r   rb   )r+   r-   input_tensors      r"   r/   zGLPNSelfOutput.forward   s*    

=11]33r$   r2   r3   r4   r*   r/   r9   r:   s   @r"   r   r      sG        > > > > >
      r$   r   c                   ,     e Zd Z fdZd ZddZ xZS )GLPNAttentionc                     t                                                       t          ||||          | _        t	          ||          | _        t                      | _        d S )N)rf   rI   rW   rg   )rI   )r)   r*   rT   r+   r   r!   setpruned_headsre   s        r"   r*   zGLPNAttention.__init__   sc    .# 3%=	
 
 
	 %VEEEEEr$   c                    t          |          dk    rd S t          || j        j        | j        j        | j                  \  }}t          | j        j        |          | j        _        t          | j        j        |          | j        _        t          | j        j	        |          | j        _	        t          | j
        j        |d          | j
        _        | j        j        t          |          z
  | j        _        | j        j        | j        j        z  | j        _        | j                            |          | _        d S )Nr   r   rk   )lenr   r+   rW   rZ   r   r   r]   r^   r_   r!   r   r[   union)r+   headsindexs      r"   prune_headszGLPNAttention.prune_heads   s    u::??F7490$)2OQUQb
 
u
 -TY_eDD	*49=%@@	,TY_eDD	.t{/@%QOOO )-	(EE

(R	%"&)"?$)B_"_	 -33E::r$   Fc                     |                      ||||          }|                     |d         |          }|f|dd          z   }|S )Nr   r   )r+   r!   )r+   r-   rP   rQ   rw   self_outputsattention_outputr   s           r"   r/   zGLPNAttention.forward   sM    yy?PQQ;;|AFF#%QRR(88r$   r   )r2   r3   r4   r*   r   r/   r9   r:   s   @r"   r   r      s[        	" 	" 	" 	" 	"; ; ;$       r$   r   c                   &     e Zd Zd fd	Zd Z xZS )
GLPNDWConv   c           	          t                                                       t          j        ||dddd|          | _        d S )Nr   r   T)biasgroups)r)   r*   r   rC   dwconv)r+   rl   r,   s     r"   r*   zGLPNDWConv.__init__   s=    iS!QSIIIr$   c                     |j         \  }}}|                    dd                              ||||          }|                     |          }|                    d                              dd          }|S )Nr   r>   )r   rL   rm   r   rK   )r+   r-   rP   rQ   rx   r{   rH   s          r"   r/   zGLPNDWConv.forward   sv    ,9,?)
G\%//155:::|U[]bccM22%--a00::1a@@r$   )r   r   r:   s   @r"   r   r      sR        J J J J J J      r$   r   c                   &     e Zd Zd fd	Zd Z xZS )
GLPNMixFFNNc                    t                                                       |p|}t          j        ||          | _        t          |          | _        t          |j        t                    rt          |j                 | _        n|j        | _        t          j        ||          | _        t          j        |j                  | _        d S r(   )r)   r*   r   r\   dense1r   r   
isinstance
hidden_actr8   r   intermediate_act_fndense2r`   r   rb   )r+   rf   in_featureshidden_featuresout_featuresr,   s        r"   r*   zGLPNMixFFN.__init__   s    #2{i_== 11f'-- 	9'-f.?'@D$$'-'8D$i>>z&"<==r$   c                    |                      |          }|                     |||          }|                     |          }|                     |          }|                     |          }|                     |          }|S r(   )r   r   r   rb   r   )r+   r-   rP   rQ   s       r"   r/   zGLPNMixFFN.forward  st    M22M65AA00??]33M22]33r$   )NNr   r:   s   @r"   r   r      sL        
> 
> 
> 
> 
> 
>      r$   r   c                   *     e Zd ZdZ fdZddZ xZS )	GLPNLayerzCThis corresponds to the Block class in the original implementation.c                    t                                                       t          j        |          | _        t          ||||          | _        |dk    rt          |          nt          j                    | _	        t          j        |          | _
        t          ||z            }t          |||          | _        d S )N)rI   rW   rg   r   )r   r   )r)   r*   r   rE   layer_norm_1r   	attentionr&   Identityr#   layer_norm_2rY   r   mlp)	r+   rf   rI   rW   r#   rg   	mlp_ratiomlp_hidden_sizer,   s	           r"   r*   zGLPNLayer.__init__  s    L55&# 3%=	
 
 
 5>OOi000L55kI566f+___r$   Fc                 J   |                      |                     |          |||          }|d         }|dd          }|                     |          }||z   }|                     |                     |          ||          }|                     |          }||z   }	|	f|z   }|S )N)rw   r   r   )r   r   r#   r   r   )
r+   r-   rP   rQ   rw   self_attention_outputsr   r   
mlp_outputlayer_outputs
             r"   r/   zGLPNLayer.forward'  s    !%m,,/	 "0 "
 "
 2!4(,  >>*:;;(=8XXd//>>NN
 ^^J//
!M1/G+r$   r   rR   r:   s   @r"   r   r     sW        MM` ` ` ` `       r$   r   c                   ,     e Zd Z fdZ	 	 	 ddZ xZS )GLPNEncoderc                 @   t                                                       | _        d t          j        dj        t          j                  d          D             }g }t          j	                  D ]d}|
                    t          j        |         j        |         |dk    rj        nj        |dz
           j        |                              et!          j        |          | _        g }d}t          j	                  D ]}g }|dk    r|j        |dz
           z  }t          j        |                   D ]_}|
                    t'          j        |         j        |         |||z            j        |         j        |                              `|
                    t!          j        |                     t!          j        |          | _        t!          j        fdt          j	                  D                       | _        d S )	Nc                 6    g | ]}|                                 S  )item).0xs     r"   
<listcomp>z(GLPNEncoder.__init__.<locals>.<listcomp>G  s     lllAqvvxxlllr$   r   cpu)r   r   )rG   rA   rH   rI   )rI   rW   r#   rg   r   c                 N    g | ]!}t          j        j        |                   "S r   )r   rE   hidden_sizes)r   irf   s     r"   r   z(GLPNEncoder.__init__.<locals>.<listcomp>o  s+    \\\aR\&-a011\\\r$   )r)   r*   rf   r   linspacedrop_path_ratesumdepthsrangenum_encoder_blocksappendr<   patch_sizesstridesrH   r   r   
ModuleListpatch_embeddingsr   rW   	sr_ratios
mlp_ratiosblockrF   )
r+   rf   dprrN   r   blockscurlayersjr,   s
    `       r"   r*   zGLPNEncoder.__init__B  s9    ml63H#fmJ\J\ej!k!k!klll 
v011 	 	A*%1!4!>!,89Q!4!4FDWXY\]X]D^ & 3A 6	      !#j 9 9 v011 	1 	1AFAvvv}QU++6=+,, 
 
$*$7$:,2,Fq,I"%cAg,171A!1D"("3A"6  	 	 	 	 MM"-//0000]6**
 -\\\\5AZ;[;[\\\
 
r$   FTc                 @   |rdnd }|rdnd }|j         d         }|}t          t          | j        | j        | j                            D ]\  }	}
|
\  }}} ||          \  }}}t          |          D ])\  }} |||||          }|d         }|r||d         fz   }* ||          }|                    |||d                              dddd                                          }|r||fz   }|st          d |||fD                       S t          |||          S )	Nr   r   r   ri   r   r>   c              3      K   | ]}||V  	d S r(   r   )r   vs     r"   	<genexpr>z&GLPNEncoder.forward.<locals>.<genexpr>  s(      mmq_`_l_l_l_l_lmmr$   last_hidden_stater-   
attentions)r   	enumeratezipr   r   rF   ro   rn   ru   tupler   )r+   rM   rw   output_hidden_statesreturn_dictall_hidden_statesall_self_attentionsrx   r-   idxr   embedding_layerblock_layer
norm_layerrP   rQ   r   blklayer_outputss                      r"   r/   zGLPNEncoder.forwardr  s    #7@BBD$5?bb4!'*
$D$94:t W WXX 	I 	IFC784O[*+:?=+I+I(M65#K00 T T3 #M65BS T T -a 0$ T*=qAQ@S*S'&J}55M)11*feRPPXXYZ\]_`bcddooqqM# I$58H$H! 	nmm]4EGZ$[mmmmmm++*
 
 
 	
r$   )FFTr   r:   s   @r"   r   r   A  sX        .
 .
 .
 .
 .
f  "$
 $
 $
 $
 $
 $
 $
 $
r$   r   c                   ,    e Zd ZU eed<   dZdZg Zd ZdS )GLPNPreTrainedModelrf   glpnrM   c                    t          |t          j        t          j        f          rT|j        j                            d| j        j                   |j	         |j	        j        
                                 dS dS t          |t          j                  r_|j        j                            d| j        j                   |j        +|j        j        |j                 
                                 dS dS t          |t          j        t          j        f          r?|j	        j        
                                 |j        j                            d           dS dS )zInitialize the weightsr   )meanstdNg      ?)r   r   r\   rC   weightdatanormal_rf   initializer_ranger   zero_	Embeddingpadding_idxrE   BatchNorm2dfill_)r+   modules     r"   _init_weightsz!GLPNPreTrainedModel._init_weights  s7   fry")455 	* M&&CT[5R&SSS{& &&((((( '&-- 	*M&&CT[5R&SSS!-"6#56<<>>>>> .-r~ >?? 	*K""$$$M$$S)))))	* 	*r$   N)	r2   r3   r4   r   __annotations__base_model_prefixmain_input_name_no_split_modulesr   r   r$   r"   r   r     sB         $O* * * * *r$   r   c                        e Zd Z fdZd Ze	 	 	 d
dej        dee	         dee	         dee	         de
eef         f
d	            Z xZS )	GLPNModelc                     t                                          |           || _        t          |          | _        |                                  d S r(   )r)   r*   rf   r   encoder	post_initr+   rf   r,   s     r"   r*   zGLPNModel.__init__  sK        #6** 	r$   c                     |                                 D ]/\  }}| j        j        |         j                            |           0dS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr  layerr   r   )r+   heads_to_pruner  r   s       r"   _prune_headszGLPNModel._prune_heads  sU    
 +0022 	C 	CLE5Lu%/;;EBBBB	C 	Cr$   NrM   rw   r   r   r   c                     ||n| j         j        }||n| j         j        }||n| j         j        }|                     ||||          }|d         }|s|f|dd          z   S t          ||j        |j                  S )Nrw   r   r   r   r   r   )rf   rw   r   use_return_dictr  r   r-   r   )r+   rM   rw   r   r   encoder_outputssequence_outputs          r"   r/   zGLPNModel.forward  s     2C1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+B],,/!5#	 ' 
 
 *!, 	<#%(;;;-)7&1
 
 
 	
r$   )NNN)r2   r3   r4   r*   r  r   r   FloatTensorr   boolr   r   r   r/   r9   r:   s   @r"   r  r    s            C C C 
 -1/3&*
 
'
 $D>
 'tn	

 d^
 
uo%	&
 
 
 ^
 
 
 
 
r$   r  c                   *     e Zd ZdZd fd	Zd Z xZS )GLPNSelectiveFeatureFusionz
    Selective Feature Fusion module, as explained in the [paper](https://huggingface.co/papers/2201.07436) (section 3.4). This
    module adaptively selects and integrates local and global features by attaining an attention map for each feature.
    @   c           	         t                                                       t          j        t          j        t          |dz            |ddd          t          j        |          t          j                              | _        t          j        t          j        |t          |dz            ddd          t          j        t          |dz                      t          j                              | _	        t          j        t          |dz            dddd          | _
        t          j                    | _        d S )Nr>   r   r   )in_channelsout_channelsr@   rA   rB   )r)   r*   r   
SequentialrC   rY   r   ReLUconvolutional_layer1convolutional_layer2convolutional_layer3Sigmoidsigmoid)r+   
in_channelr,   s     r"   r*   z#GLPNSelectiveFeatureFusion.__init__  s   $&MI#j1n"5"5J\]fgqrsssN:&&GII%
 %
! %'MI*3zA~;N;N\]fgqrsssN3zA~..//GII%
 %
! %'IJN++!ST^_%
 %
 %
! z||r$   c                    t          j        ||fd          }|                     |          }|                     |          }|                     |          }|                     |          }||d d dd d d d f                             d          z  ||d d dd d d d f                             d          z  z   }|S )Nr   rk   r   )r   catr  r   r!  r#  	unsqueeze)r+   local_featuresglobal_featuresfeaturesattnhybrid_featuress         r"   r/   z"GLPNSelectiveFeatureFusion.forward  s    9no>AFFF,,X66,,X66,,X66||H%%(41aaa
+;+E+Ea+H+HH?]aAAq!!!QQQJ^

)A,,L  r$   )r  rR   r:   s   @r"   r  r    sV         
$ $ $ $ $ $*      r$   r  c                   &     e Zd Z fdZddZ xZS )GLPNDecoderStagec                    t                                                       ||k    }|st          j        ||d          nt          j                    | _        t          |          | _        t          j        ddd          | _	        d S )Nr   )r@   r>   bilinearFscale_factormodealign_corners)
r)   r*   r   rC   r   convolutionr  fusionUpsampleupsample)r+   r  r  should_skipr,   s       r"   r*   zGLPNDecoderStage.__init__  s|    !\1Vat29[,ANNNNgigrgtgt0>>SXYYYr$   Nc                     |                      |          }||                     ||          }|                     |          }|S r(   )r5  r6  r8  )r+   hidden_stateresiduals      r"   r/   zGLPNDecoderStage.forward  sE    ''55;;|X>>L}}\22r$   r(   r   r:   s   @r"   r.  r.    sQ        Z Z Z Z Z	 	 	 	 	 	 	 	r$   r.  c                   Z     e Zd Z fdZdeej                 deej                 fdZ xZS )GLPNDecoderc                    t                                                       |j        d d d         }|j        t	          j        fd|D                       | _        d | j        d         _        t	          j        ddd          | _	        d S )Nri   c                 0    g | ]}t          |          S r   )r.  )r   rI   r  s     r"   r   z(GLPNDecoder.__init__.<locals>.<listcomp>0  s$    bbb[k<88bbbr$   r   r>   r0  Fr1  )
r)   r*   r   decoder_hidden_sizer   r   stagesr6  r7  final_upsample)r+   rf   reserved_hidden_sizesr  r,   s      @r"   r*   zGLPNDecoder.__init__)  s     & 3DDbD 91mbbbbLabbb
 
 !%A kqzY^___r$   r-   r   c                     g }d }t          |d d d         | j                  D ]&\  }} |||          }|                    |           '|                     |          |d<   |S )Nri   )r   rB  r   rC  )r+   r-   stage_hidden_statesstage_hidden_stater;  stages         r"   r/   zGLPNDecoder.forward7  s     !#&}TTrT':DK#H#H 	; 	;L%!&|5G!H!H&&'9::::"&"5"56H"I"IB""r$   	r2   r3   r4   r*   listr   r7   r/   r9   r:   s   @r"   r>  r>  (  sm        ` ` ` ` `	#T%,%7 	#D<N 	# 	# 	# 	# 	# 	# 	# 	#r$   r>  c                   *     e Zd ZdZd fd	Zd Z xZS )	SiLogLossz
    Implements the Scale-invariant log scale loss [Eigen et al., 2014](https://huggingface.co/papers/1406.2283).

    $$L=\frac{1}{n} \sum_{i} d_{i}^{2}-\frac{1}{2 n^{2}}\left(\sum_{i} d_{i}^{2}\right)$$ where $d_{i}=\log y_{i}-\log
    y_{i}^{*}$.

          ?c                 V    t                                                       || _        d S r(   )r)   r*   lambd)r+   rO  r,   s     r"   r*   zSiLogLoss.__init__L  s$    


r$   c                 r   |dk                                     }t          j        ||                   t          j        ||                   z
  }t          j        t          j        |d                                          | j        t          j        |                                d          z  z
            }|S )Nr   r>   )detachr   logrr   powr   rO  )r+   predtarget
valid_maskdiff_loglosss         r"   r/   zSiLogLoss.forwardP  s    qj((**
9VJ/0059T*=M3N3NNz%)Ha005577$*uyQYQ^Q^Q`Q`bcGdGd:ddeer$   )rM  rR   r:   s   @r"   rL  rL  C  sV                    r$   rL  c                   N     e Zd Z fdZdeej                 dej        fdZ xZS )GLPNDepthEstimationHeadc                    t                                                       || _        |j        }t	          j        t	          j        ||ddd          t	          j        d          t	          j        |dddd                    | _        d S )Nr   r   r?   F)inplace)	r)   r*   rf   rA  r   r  rC   r  head)r+   rf   channelsr,   s      r"   r*   z GLPNDepthEstimationHead.__init__Y  s    -MIha1MMMGE"""IhqAFFF
 
			r$   r-   r   c                     || j         j                 }|                     |          }t          j        |          | j         j        z  }|                    d          }|S )Nr   rk   )rf   head_in_indexr]  r   r#  	max_depthsqueeze)r+   r-   predicted_depths      r"   r/   zGLPNDepthEstimationHead.forwarde  sW    %dk&?@		-00-669NN)11a188r$   rI  r:   s   @r"   rZ  rZ  X  sc        

 

 

 

 

	T%,%7 	EL 	 	 	 	 	 	 	 	r$   rZ  zg
    GLPN Model transformer with a lightweight depth estimation head on top e.g. for KITTI, NYUv2.
    )custom_introc                        e Zd Z fdZe	 	 	 	 d
dej        deej                 dee         dee         dee         de	e
ej                 ef         fd	            Z xZS )GLPNForDepthEstimationc                     t                                          |           t          |          | _        t	          |          | _        t          |          | _        |                                  d S r(   )	r)   r*   r  r   r>  decoderrZ  r]  r	  r
  s     r"   r*   zGLPNForDepthEstimation.__init__w  s`       f%%	"6**+F33	 	r$   NrM   labelsrw   r   r   r   c                    ||n| j         j        }||n| j         j        }|                     ||d|          }|r|j        n|d         }|                     |          }|                     |          }	d}
|t                      } ||	|          }
|s)|r|	f|dd         z   }n|	f|dd         z   }|
|
f|z   n|S t          |
|	|r|j        nd|j	                  S )a  
        labels (`torch.FloatTensor` of shape `(batch_size, height, width)`, *optional*):
            Ground truth depth estimation maps for computing the loss.

        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, GLPNForDepthEstimation
        >>> import torch
        >>> import numpy as np
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> image_processor = AutoImageProcessor.from_pretrained("vinvino02/glpn-kitti")
        >>> model = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-kitti")

        >>> # prepare image for the model
        >>> inputs = image_processor(images=image, return_tensors="pt")

        >>> with torch.no_grad():
        ...     outputs = model(**inputs)

        >>> # interpolate to original size
        >>> post_processed_output = image_processor.post_process_depth_estimation(
        ...     outputs,
        ...     target_sizes=[(image.height, image.width)],
        ... )

        >>> # visualize the prediction
        >>> predicted_depth = post_processed_output[0]["predicted_depth"]
        >>> depth = predicted_depth * 255 / predicted_depth.max()
        >>> depth = depth.detach().cpu().numpy()
        >>> depth = Image.fromarray(depth.astype("uint8"))
        ```NTr  r   r>   )rX  rc  r-   r   )
rf   r  r   r   r-   rh  r]  rL  r	   r   )r+   rM   ri  rw   r   r   r   r-   outrc  rX  loss_fctr!   s                r"   r/   zGLPNForDepthEstimation.forward  s:   \ &1%<kk$+B]$8$D  $+Jj 	 ))/!%#	  
 
 2=L--'!*ll=))))C.. {{H8OV44D 	F# :)+gabbk9)+gabbk9)-)9TGf$$vE#+3GQ'//T)	
 
 
 	
r$   )NNNN)r2   r3   r4   r*   r   r   r  r   r  r   r   r7   r	   r/   r9   r:   s   @r"   rf  rf  q  s              /3,0/3&*O
 O
'O
 *+O
 $D>	O

 'tnO
 d^O
 
uU\"$88	9O
 O
 O
 ^O
 O
 O
 O
 O
r$   rf  )rf  r   r  r   )r   F)0r5   rq   typingr   r   r   r   activationsr   modeling_outputsr   r	   modeling_utilsr
   pytorch_utilsr   r   utilsr   r   configuration_glpnr   
get_loggerr2   loggerr7   r6   r  r#   Moduler&   r<   rT   r   r   r   r   r   r   r   r  r  r.  r>  rL  rZ  rf  __all__r   r$   r"   <module>rx     sl      " " " " " " " "        ! ! ! ! ! ! E E E E E E E E - - - - - - Q Q Q Q Q Q Q Q , , , , , , , , * * * * * * 
	H	%	% U\ e T V[Vb    *% % % % %29 % % %) ) ) ) ) ) ) )4V V V V V V V Vt	 	 	 	 	RY 	 	 	# # # # #BI # # #N              0( ( ( ( (	 ( ( (VU
 U
 U
 U
 U
") U
 U
 U
p * * * * */ * * *0 2
 2
 2
 2
 2
# 2
 2
 2
j) ) ) ) ) ) ) )X    ry   (# # # # #") # # #6    	   *    bi   2   
[
 [
 [
 [
 [
0 [
 [
 
[
| V
U
Ur$   