
     `i               	          d Z ddlZddlmZ ddlmZmZ ddlZddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
lmZmZ ddlmZmZmZmZmZmZ ddlmZ ddlmZ  e            r	ddlmZmZ nd Zd Z ej         e!          Z"e ed           G d de                                  Z#e ed           G d de                                  Z$e ed           G d de                                  Z% G d dej&                  Z' G d dej&                  Z( G d  d!ej&                  Z)dFd$ej*        d%e+d&e,d'ej*        fd(Z- G d) d*ej&                  Z. G d+ d,ej&                  Z/ G d- d.ej&                  Z0 G d/ d0ej&                  Z1 G d1 d2ej&                  Z2 G d3 d4ej&                  Z3 G d5 d6ej&                  Z4 G d7 d8ej&                  Z5 G d9 d:ej&                  Z6e G d; d<e                      Z7e G d= d>e7                      Z8 ed?           G d@ dAe7                      Z9 edB           G dC dDe7e                      Z:g dEZ;dS )Gz9PyTorch Dilated Neighborhood Attention Transformer model.    N)	dataclass)OptionalUnion)nn   )ACT2FN)BackboneOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)ModelOutputOptionalDependencyNotAvailableauto_docstringis_natten_availableloggingrequires_backends)BackboneMixin   )DinatConfig)
natten2davnatten2dqkrpbc                      t                      Nr   argskwargss     |/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/dinat/modeling_dinat.pyr   r   ,       ,...    c                      t                      r   r   r   s     r   r   r   /   r   r    zO
    Dinat encoder's outputs, with potential hidden states and attentions.
    )custom_introc                       e Zd ZU dZdZeej                 ed<   dZ	ee
ej        df                  ed<   dZee
ej        df                  ed<   dZee
ej        df                  ed<   dS )DinatEncoderOutputa  
    reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
        shape `(batch_size, hidden_size, height, width)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
        include the spatial dimensions.
    Nlast_hidden_state.hidden_states
attentionsreshaped_hidden_states)__name__
__module____qualname____doc__r%   r   torchFloatTensor__annotations__r&   tupler'   r(    r    r   r$   r$   9   s           6:x 12999=AM8E%"3S"89:AAA:>Ju0#567>>>FJHU5+<c+A%BCJJJJJr    r$   zW
    Dinat model's outputs that also contains a pooling of the last hidden states.
    c                       e Zd ZU dZdZeej                 ed<   dZ	eej                 ed<   dZ
eeej        df                  ed<   dZeeej        df                  ed<   dZeeej        df                  ed<   dS )	DinatModelOutputa  
    pooler_output (`torch.FloatTensor` of shape `(batch_size, hidden_size)`, *optional*, returned when `add_pooling_layer=True` is passed):
        Average pooling of the last layer hidden-state.
    reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
        shape `(batch_size, hidden_size, height, width)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
        include the spatial dimensions.
    Nr%   pooler_output.r&   r'   r(   )r)   r*   r+   r,   r%   r   r-   r.   r/   r4   r&   r0   r'   r(   r1   r    r   r3   r3   O   s         	 	 6:x 1299915M8E-.555=AM8E%"3S"89:AAA:>Ju0#567>>>FJHU5+<c+A%BCJJJJJr    r3   z1
    Dinat outputs for image classification.
    c                       e Zd ZU dZdZeej                 ed<   dZ	eej                 ed<   dZ
eeej        df                  ed<   dZeeej        df                  ed<   dZeeej        df                  ed<   dS )	DinatImageClassifierOutputa7  
    loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
        Classification (or regression if config.num_labels==1) loss.
    logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
        Classification (or regression if config.num_labels==1) scores (before SoftMax).
    reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
        shape `(batch_size, hidden_size, height, width)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
        include the spatial dimensions.
    Nlosslogits.r&   r'   r(   )r)   r*   r+   r,   r7   r   r-   r.   r/   r8   r&   r0   r'   r(   r1   r    r   r6   r6   h   s           )-D(5$
%,,,*.FHU&'...=AM8E%"3S"89:AAA:>Ju0#567>>>FJHU5+<c+A%BCJJJJJr    r6   c                   ^     e Zd ZdZ fdZdeej                 deej	                 fdZ
 xZS )DinatEmbeddingsz6
    Construct the patch and position embeddings.
    c                     t                                                       t          |          | _        t	          j        |j                  | _        t	          j        |j	                  | _
        d S r   )super__init__DinatPatchEmbeddingspatch_embeddingsr   	LayerNorm	embed_dimnormDropouthidden_dropout_probdropoutselfconfig	__class__s     r   r=   zDinatEmbeddings.__init__   sU     4V < <L!122	z&"<==r    pixel_valuesreturnc                     |                      |          }|                     |          }|                     |          }|S r   )r?   rB   rE   )rG   rJ   
embeddingss      r   forwardzDinatEmbeddings.forward   s=    **<88
YYz**
\\*--
r    )r)   r*   r+   r,   r=   r   r-   r.   r0   TensorrN   __classcell__rI   s   @r   r:   r:      ss         > > > > >HU->$? E%,DW        r    r:   c                   R     e Zd ZdZ fdZdeej                 dej        fdZ	 xZ
S )r>   z
    This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
    `hidden_states` (patch embeddings) of shape `(batch_size, height, width, hidden_size)` to be consumed by a
    Transformer.
    c           
      R   t                                                       |j        }|j        |j        }}|| _        |dk    rnt          d          t          j        t          j        | j        |dz  ddd          t          j        |dz  |ddd                    | _	        d S )N   z2Dinat only supports patch size of 4 at the moment.   r   r   rU   rU   r   r   )kernel_sizestridepadding)
r<   r=   
patch_sizenum_channelsrA   
ValueErrorr   
SequentialConv2d
projection)rG   rH   r\   r]   hidden_sizerI   s        r   r=   zDinatPatchEmbeddings.__init__   s    &
$*$79Ik(?? QRRR-Id')9vV\flmmmIkQ&PV`fggg
 
r    rJ   rK   c                     |j         \  }}}}|| j        k    rt          d          |                     |          }|                    dddd          }|S )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.r   rU   r   r   )shaper]   r^   ra   permute)rG   rJ   _r]   heightwidthrM   s          r   rN   zDinatPatchEmbeddings.forward   sh    )5);&<4,,,w   __\22
''1a33
r    )r)   r*   r+   r,   r=   r   r-   r.   rO   rN   rP   rQ   s   @r   r>   r>      sn         
 
 
 
 
"	HU->$? 	EL 	 	 	 	 	 	 	 	r    r>   c                   l     e Zd ZdZej        fdedej        ddf fdZde	j
        de	j
        fdZ xZS )	DinatDownsamplerz
    Convolutional Downsampling Layer.

    Args:
        dim (`int`):
            Number of input channels.
        norm_layer (`nn.Module`, *optional*, defaults to `nn.LayerNorm`):
            Normalization layer class.
    dim
norm_layerrK   Nc                     t                                                       || _        t          j        |d|z  dddd          | _         |d|z            | _        d S )NrU   rV   rW   rX   F)rY   rZ   r[   bias)r<   r=   rk   r   r`   	reductionrB   )rG   rk   rl   rI   s      r   r=   zDinatDownsampler.__init__   s]    3CVF\binoooJq3w''			r    input_featurec                     |                      |                    dddd                                        dddd          }|                     |          }|S )Nr   r   r   rU   )ro   re   rB   )rG   rp   s     r   rN   zDinatDownsampler.forward   sV    }'<'<Q1a'H'HIIQQRSUVXY[\]]		-00r    )r)   r*   r+   r,   r   r@   intModuler=   r-   rO   rN   rP   rQ   s   @r   rj   rj      s          :< ( (C (RY ($ ( ( ( ( ( (U\ el        r    rj           Finput	drop_probtrainingrK   c                     |dk    s|s| S d|z
  }| j         d         fd| j        dz
  z  z   }|t          j        || j        | j                  z   }|                                 |                     |          |z  }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
    rt   r   r   )r   )dtypedevice)rd   ndimr-   randry   rz   floor_div)ru   rv   rw   	keep_probrd   random_tensoroutputs          r   	drop_pathr      s     CxII[^
Q 77E
5EL Y Y YYMYYy!!M1FMr    c                   j     e Zd ZdZd	dee         ddf fdZdej        dej        fdZ	de
fdZ xZS )
DinatDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nrv   rK   c                 V    t                                                       || _        d S r   )r<   r=   rv   )rG   rv   rI   s     r   r=   zDinatDropPath.__init__   s$    "r    r&   c                 8    t          || j        | j                  S r   )r   rv   rw   rG   r&   s     r   rN   zDinatDropPath.forward   s    FFFr    c                     d| j          S )Nzp=)rv   rG   s    r   
extra_reprzDinatDropPath.extra_repr   s    $DN$$$r    r   )r)   r*   r+   r,   r   floatr=   r-   rO   rN   strr   rP   rQ   s   @r   r   r      s        bb# #(5/ #T # # # # # #GU\ Gel G G G G%C % % % % % % % %r    r   c                   b     e Zd Z fdZ	 ddej        dee         deej                 fdZ	 xZ
S )NeighborhoodAttentionc                    t                                                       ||z  dk    rt          d| d| d          || _        t	          ||z            | _        | j        | j        z  | _        || _        || _        t          j
        t          j        |d| j        z  dz
  d| j        z  dz
                      | _        t          j        | j        | j        |j                  | _        t          j        | j        | j        |j                  | _        t          j        | j        | j        |j                  | _        t          j        |j                  | _        d S )Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()rU   r   )rn   )r<   r=   r^   num_attention_headsrr   attention_head_sizeall_head_sizerY   dilationr   	Parameterr-   zerosrpbLinearqkv_biasquerykeyvaluerC   attention_probs_dropout_probrE   rG   rH   rk   	num_headsrY   r   rI   s         r   r=   zNeighborhoodAttention.__init__   sG   ?akCkk_hkkk   $- #&sY#7#7 !58PP&  <ID<L8Lq8PTUX\XhThklTl n nooYt143EFO\\\
9T/1C&/ZZZYt143EFO\\\
z&"EFFr    Fr&   output_attentionsrK   c                    |j         \  }}}|                     |                              |d| j        | j                                      dd          }|                     |                              |d| j        | j                                      dd          }|                     |                              |d| j        | j                                      dd          }|t          j	        | j                  z  }t          ||| j        | j        | j                  }	t          j                            |	d          }
|                     |
          }
t%          |
|| j        | j                  }|                    ddddd                                          }|                                d d         | j        fz   }|                    |          }|r||
fn|f}|S )	Nr   rU   rk   r   r   rT   )rd   r   viewr   r   	transposer   r   mathsqrtr   r   rY   r   r   
functionalsoftmaxrE   r   re   
contiguoussizer   )rG   r&   r   
batch_size
seq_lengthrf   query_layer	key_layervalue_layerattention_scoresattention_probscontext_layernew_context_layer_shapeoutputss                 r   rN   zNeighborhoodAttention.forward  s   
 %2$7!
JJJ}%%T*b$":D<TUUYq!__ 	 HH]##T*b$":D<TUUYq!__ 	 JJ}%%T*b$":D<TUUYq!__ 	 "DId.F$G$GG )i4K[]a]jkk -//0@b/II ,,77"?KAQSWS`aa%--aAq!<<GGII"/"4"4"6"6ss";t?Q>S"S%**+BCC6G]=/22mM]r    Fr)   r*   r+   r=   r-   rO   r   boolr0   rN   rP   rQ   s   @r   r   r      s        G G G G G2 -2, ,|, $D>, 
u|		, , , , , , , ,r    r   c                   P     e Zd Z fdZdej        dej        dej        fdZ xZS )NeighborhoodAttentionOutputc                     t                                                       t          j        ||          | _        t          j        |j                  | _        d S r   )r<   r=   r   r   denserC   r   rE   rG   rH   rk   rI   s      r   r=   z$NeighborhoodAttentionOutput.__init__?  sD    YsC((
z&"EFFr    r&   input_tensorrK   c                 Z    |                      |          }|                     |          }|S r   r   rE   )rG   r&   r   s      r   rN   z#NeighborhoodAttentionOutput.forwardD  s*    

=11]33r    r)   r*   r+   r=   r-   rO   rN   rP   rQ   s   @r   r   r   >  sn        G G G G G
U\  RWR^        r    r   c                   h     e Zd Z fdZd Z	 ddej        dee         de	ej                 fdZ
 xZS )	NeighborhoodAttentionModulec                     t                                                       t          |||||          | _        t	          ||          | _        t                      | _        d S r   )r<   r=   r   rG   r   r   setpruned_headsr   s         r   r=   z$NeighborhoodAttentionModule.__init__L  sS    )&#y+xXX	1&#>>EEr    c                    t          |          dk    rd S t          || j        j        | j        j        | j                  \  }}t          | j        j        |          | j        _        t          | j        j        |          | j        _        t          | j        j	        |          | j        _	        t          | j
        j        |d          | j
        _        | j        j        t          |          z
  | j        _        | j        j        | j        j        z  | j        _        | j                            |          | _        d S )Nr   r   r   )lenr   rG   r   r   r   r   r   r   r   r   r   r   union)rG   headsindexs      r   prune_headsz'NeighborhoodAttentionModule.prune_headsR  s    u::??F7490$)2OQUQb
 
u
 -TY_eDD	*49=%@@	,TY_eDD	.t{/@%QOOO )-	(EE

(R	%"&)"?$)B_"_	 -33E::r    Fr&   r   rK   c                     |                      ||          }|                     |d         |          }|f|dd          z   }|S Nr   r   )rG   r   )rG   r&   r   self_outputsattention_outputr   s         r   rN   z#NeighborhoodAttentionModule.forwardd  sK    
 yy0ABB;;|AFF#%QRR(88r    r   )r)   r*   r+   r=   r   r-   rO   r   r   r0   rN   rP   rQ   s   @r   r   r   K  s        " " " " "; ; ;* -2 | $D> 
u|		       r    r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )DinatIntermediatec                 $   t                                                       t          j        |t	          |j        |z                      | _        t          |j        t                    rt          |j                 | _        d S |j        | _        d S r   )r<   r=   r   r   rr   	mlp_ratior   
isinstance
hidden_actr   r   intermediate_act_fnr   s      r   r=   zDinatIntermediate.__init__p  sx    YsC(83(>$?$?@@
f'-- 	9'-f.?'@D$$$'-'8D$$$r    r&   rK   c                 Z    |                      |          }|                     |          }|S r   )r   r   r   s     r   rN   zDinatIntermediate.forwardx  s,    

=1100??r    r   rQ   s   @r   r   r   o  s^        9 9 9 9 9U\ el        r    r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )DinatOutputc                     t                                                       t          j        t	          |j        |z            |          | _        t          j        |j                  | _	        d S r   )
r<   r=   r   r   rr   r   r   rC   rD   rE   r   s      r   r=   zDinatOutput.__init__  sT    Ys6#3c#9::C@@
z&"<==r    r&   rK   c                 Z    |                      |          }|                     |          }|S r   r   r   s     r   rN   zDinatOutput.forward  s*    

=11]33r    r   rQ   s   @r   r   r   ~  s^        > > > > >
U\ el        r    r   c            	       x     e Zd Zd	 fd	Zd Z	 d
dej        dee         de	ej        ej        f         fdZ
 xZS )
DinatLayerrt   c                    t                                                       |j        | _        |j        | _        || _        | j        | j        z  | _        t          j        ||j                  | _	        t          |||| j        | j                  | _        |dk    rt          |          nt          j                    | _        t          j        ||j                  | _        t!          ||          | _        t%          ||          | _        |j        dk    r2t          j        |j        t-          j        d|f          z  d          nd | _        d S )Neps)rY   r   rt   r   rU   T)requires_grad)r<   r=   chunk_size_feed_forwardrY   r   window_sizer   r@   layer_norm_epslayernorm_beforer   	attentionr   Identityr   layernorm_afterr   intermediater   r   layer_scale_init_valuer   r-   oneslayer_scale_parameters)rG   rH   rk   r   r   drop_path_raterI   s         r   r=   zDinatLayer.__init__  s?   '-'E$!- +dm; "Sf6K L L L4C0@4=
 
 
 ;I3:N:N~666TVT_TaTa!|CV5JKKK-fc::!&#.. ,q00 L6QH9M9MM]abbbb 	###r    c                     | j         }d}||k     s||k     rRdx}}t          d||z
            }t          d||z
            }	dd||||	f}t          j                            ||          }||fS )N)r   r   r   r   r   r   r   )r   maxr   r   pad)
rG   r&   rg   rh   r   
pad_valuespad_lpad_tpad_rpad_bs
             r   	maybe_padzDinatLayer.maybe_pad  s    &'
K5;#6#6EE;.//E;/00EQueU;JM--mZHHMj((r    Fr&   r   rK   c                    |                                 \  }}}}|}|                     |          }|                     |||          \  }}|j        \  }	}
}}	|                     ||          }|d         }|d         dk    p|d         dk    }|r&|d d d |d |d d f                                         }| j        | j        d         |z  }||                     |          z   }|                     |          }| 	                    | 
                    |                    }| j        | j        d         |z  }||                     |          z   }|r
||d         fn|f}|S )N)r   r   r      r   )r   r   r   rd   r   r   r   r   r   r   r   )rG   r&   r   r   rg   rh   channelsshortcutr   rf   
height_pad	width_padattention_outputsr   
was_paddedlayer_outputlayer_outputss                    r   rN   zDinatLayer.forward  s   
 /<.@.@.B.B+
FE8 --m<<$(NN=&%$P$P!z&3&9#:y! NN=L]N^^,Q/]Q&;*Q-!*;
 	T/7F7FUFAAA0EFQQSS&2#:1=@PP 4>>2B#C#CC++M::{{4#4#4\#B#BCC&26q9LHL$t~~l'C'CC@Qf'8';<<XdWfr    )rt   r   )r)   r*   r+   r=   r   r-   rO   r   r   r0   rN   rP   rQ   s   @r   r   r     s        
 
 
 
 
 
(	) 	) 	) -2$ $|$ $D>$ 
u|U\)	*	$ $ $ $ $ $ $ $r    r   c                   b     e Zd Z fdZ	 ddej        dee         deej                 fdZ	 xZ
S )
DinatStagec                 4   t                                                       | _        | _        t	          j        fdt          |          D                       | _        | |t          j                  | _	        nd | _	        d| _
        d S )Nc           
      P    g | ]"}t          |         |                    #S ))rH   rk   r   r   r   )r   ).0irH   	dilationsrk   r   r   s     r   
<listcomp>z'DinatStage.__init__.<locals>.<listcomp>  sR     	 	 	  !'&q\#1!#4  	 	 	r    )rk   rl   F)r<   r=   rH   rk   r   
ModuleListrangelayersr@   
downsamplepointing)	rG   rH   rk   depthr   r	  r   r  rI   s	    `` ``` r   r=   zDinatStage.__init__  s    m	 	 	 	 	 	 	 	 u	 	 	
 
 !(jSR\JJJDOO"DOr    Fr&   r   rK   c                     |                                 \  }}}}t          | j                  D ]\  }} |||          }|d         }|}	| j        |                     |	          }||	f}
|r|
|dd          z  }
|
S r   )r   	enumerater  r  )rG   r&   r   rf   rg   rh   r  layer_moduler  !hidden_states_before_downsamplingstage_outputss              r   rN   zDinatStage.forward  s    
 ,002265!(55 	- 	-OA|(L8IJJM)!,MM,9)?& OO,MNNM&(IJ 	/]122..Mr    r   r   rQ   s   @r   r  r    s            8 -2 | $D> 
u|		       r    r  c                        e Zd Z fdZ	 	 	 	 ddej        dee         dee         dee         dee         d	ee	e
f         fd
Z xZS )DinatEncoderc                 r    t                                                       t          j                   _         _        d t          j        dj        t          j                  d          D             t          j         fdt           j                  D                        _        d S )Nc                 6    g | ]}|                                 S r1   )item)r  xs     r   r
  z)DinatEncoder.__init__.<locals>.<listcomp>  s     lllAqvvxxlllr    r   cpu)rz   c                 V   g | ]}t          t          j        d |z  z            j        |         j        |         j        |         t          j        d|                   t          j        d|dz                               |j        dz
  k     rt          nd          S )rU   Nr   )rH   rk   r  r   r	  r   r  )	r  rr   rA   depthsr   r	  sum
num_levelsrj   )r  i_layerrH   dprrG   s     r   r
  z)DinatEncoder.__init__.<locals>.<listcomp>  s         !F,q'z9:: -0$.w7$.w7#&s6='+B'C'Cc&-XeZadeZeXeJfFgFg'g#h4;doPQ>Q4Q4Q//X\    r    )r<   r=   r   r  r   rH   r-   linspacer   r  r   r  r  levels)rG   rH   r"  rI   s   ``@r   r=   zDinatEncoder.__init__  s    fm,,ll63H#fmJ\J\ej!k!k!klllm       %T_55  
 
r    FTr&   r   output_hidden_states(output_hidden_states_before_downsamplingreturn_dictrK   c                     |rdnd }|rdnd }|rdnd }|r$|                     dddd          }	||fz  }||	fz  }t          | j                  D ]\  }
} |||          }|d         }|d         }|r'|r%|                     dddd          }	||fz  }||	fz  }n(|r&|s$|                     dddd          }	||fz  }||	fz  }|r||dd          z  }|st          d |||fD                       S t	          ||||          S )Nr1   r   r   r   rU   c              3      K   | ]}||V  	d S r   r1   )r  vs     r   	<genexpr>z'DinatEncoder.forward.<locals>.<genexpr><  s(      mmq_`_l_l_l_l_lmmr    )r%   r&   r'   r(   )re   r  r$  r0   r$   )rG   r&   r   r%  r&  r'  all_hidden_statesall_reshaped_hidden_statesall_self_attentionsreshaped_hidden_stater  r  r  r  s                 r   rN   zDinatEncoder.forward  s    #7@BBD+?%IRRT"$5?bb4 	C$1$9$9!Q1$E$E!-!11&+@*BB&(55 	9 	9OA|(L8IJJM)!,M0=a0@-# 	G(P 	G(I(Q(QRSUVXY[\(](]%!&G%II!*/D.FF**% G.V G(5(=(=aAq(I(I%!m%55!*/D.FF*  9#}QRR'88# 	nmm]4EGZ$[mmmmmm!++*#=	
 
 
 	
r    )FFFT)r)   r*   r+   r=   r-   rO   r   r   r   r0   r$   rN   rP   rQ   s   @r   r  r     s        
 
 
 
 
. -2/4CH&*.
 .
|.
 $D>.
 'tn	.

 3;4..
 d^.
 
u((	).
 .
 .
 .
 .
 .
 .
 .
r    r  c                   (    e Zd ZU eed<   dZdZd ZdS )DinatPreTrainedModelrH   dinatrJ   c                    t          |t          j        t          j        f          rT|j        j                            d| j        j                   |j	         |j	        j        
                                 dS dS t          |t          j                  r?|j	        j        
                                 |j        j                            d           dS dS )zInitialize the weightsrt   )meanstdNg      ?)r   r   r   r`   weightdatanormal_rH   initializer_rangern   zero_r@   fill_)rG   modules     r   _init_weightsz"DinatPreTrainedModel._init_weightsL  s    fry")455 	* M&&CT[5R&SSS{& &&((((( '&-- 	*K""$$$M$$S)))))	* 	*r    N)r)   r*   r+   r   r/   base_model_prefixmain_input_namer=  r1   r    r   r1  r1  F  s<         $O
* 
* 
* 
* 
*r    r1  c                        e Zd Zd fd	Zd Zd Ze	 	 	 	 ddeej	                 dee
         dee
         d	ee
         d
eeef         f
d            Z xZS )
DinatModelTc                    t                                          |           t          | dg           || _        t	          |j                  | _        t          |j        d| j        dz
  z  z            | _	        t          |          | _        t          |          | _        t          j        | j	        |j                  | _        |rt          j        d          nd| _        |                                  dS )zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        nattenrU   r   r   N)r<   r=   r   rH   r   r  r   rr   rA   num_featuresr:   rM   r  encoderr   r@   r   	layernormAdaptiveAvgPool1dpooler	post_init)rG   rH   add_pooling_layerrI   s      r   r=   zDinatModel.__init__[  s    
 	   $
+++fm,, 0119L3M MNN)&11#F++d&7V=RSSS1BLb*1--- 	r    c                     | j         j        S r   rM   r?   r   s    r   get_input_embeddingszDinatModel.get_input_embeddingsq      //r    c                     |                                 D ]/\  }}| j        j        |         j                            |           0dS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsrE  layerr   r   )rG   heads_to_prunerQ  r   s       r   _prune_headszDinatModel._prune_headst  sU    
 +0022 	C 	CLE5Lu%/;;EBBBB	C 	Cr    NrJ   r   r%  r'  rK   c                 <   ||n| j         j        }||n| j         j        }||n| j         j        }|t	          d          |                     |          }|                     ||||          }|d         }|                     |          }d }| j        R|                     |	                    dd          
                    dd                    }t          j	        |d          }|s||f|dd          z   }	|	S t          |||j        |j        |j                  S )Nz You have to specify pixel_valuesr   r%  r'  r   r   rU   )r%   r4   r&   r'   r(   )rH   r   r%  use_return_dictr^   rM   rE  rF  rH  flattenr   r-   r3   r&   r'   r(   )
rG   rJ   r   r%  r'  embedding_outputencoder_outputssequence_outputpooled_outputr   s
             r   rN   zDinatModel.forward|  sW    2C1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+B]?@@@??<88,,/!5#	 ' 
 
 *!,..99;" KK(?(?1(E(E(O(OPQST(U(UVVM!M-;;M 	%}58KKFM-')7&1#2#I
 
 
 	
r    )T)NNNN)r)   r*   r+   r=   rM  rS  r   r   r-   r.   r   r   r0   r3   rN   rP   rQ   s   @r   rA  rA  Y  s             ,0 0 0C C C  59,0/3&*,
 ,
u01,
 $D>,
 'tn	,

 d^,
 
u&&	',
 ,
 ,
 ^,
 ,
 ,
 ,
 ,
r    rA  z
    Dinat Model transformer with an image classification head on top (a linear layer on top of the final hidden state
    of the [CLS] token) e.g. for ImageNet.
    c                        e Zd Z fdZe	 	 	 	 	 d
deej                 deej                 dee	         dee	         dee	         de
eef         fd	            Z xZS )DinatForImageClassificationc                 b   t                                          |           t          | dg           |j        | _        t	          |          | _        |j        dk    r$t          j        | j        j        |j                  nt          j	                    | _
        |                                  d S )NrC  r   )r<   r=   r   
num_labelsrA  r2  r   r   rD  r   
classifierrI  rF   s     r   r=   z$DinatForImageClassification.__init__  s       $
+++ +''
 FLEVYZEZEZBIdj-v/@AAA`b`k`m`m 	
 	r    NrJ   labelsr   r%  r'  rK   c                 H   ||n| j         j        }|                     ||||          }|d         }|                     |          }d}	||                     ||| j                   }	|s|f|dd         z   }
|	|	f|
z   n|
S t          |	||j        |j        |j                  S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        NrU  r   rU   )r7   r8   r&   r'   r(   )	rH   rV  r2  r`  loss_functionr6   r&   r'   r(   )rG   rJ   ra  r   r%  r'  r   r[  r8   r7   r   s              r   rN   z#DinatForImageClassification.forward  s     &1%<kk$+B]**/!5#	  
 
  
//%%ffdkBBD 	FY,F)-)9TGf$$vE)!/)#*#A
 
 
 	
r    )NNNNN)r)   r*   r+   r=   r   r   r-   r.   
LongTensorr   r   r0   r6   rN   rP   rQ   s   @r   r]  r]    s               59-1,0/3&*)
 )
u01)
 )*)
 $D>	)

 'tn)
 d^)
 
u00	1)
 )
 )
 ^)
 )
 )
 )
 )
r    r]  zL
    NAT backbone, to be used with frameworks like DETR and MaskFormer.
    c                        e Zd Z fdZd Ze	 	 	 d
dej        dee	         dee	         dee	         de
f
d	            Z xZS )DinatBackbonec                 B   t                                                     t                                                     t          | dg           t	                    | _        t                    | _        j        gfdt          t          j                            D             z   | _        i }t          | j        | j                  D ]\  }}t!          j        |          ||<   t!          j        |          | _        |                                  d S )NrC  c                 D    g | ]}t          j        d |z  z            S )rU   )rr   rA   )r  r  rH   s     r   r
  z*DinatBackbone.__init__.<locals>.<listcomp>  s.    1r1r1rST#f6FA6M2N2N1r1r1rr    )r<   r=   _init_backboner   r:   rM   r  rE  rA   r  r   r  rD  zip_out_featuresr   r   r@   
ModuleDicthidden_states_normsrI  )rG   rH   rm  stager]   rI   s    `   r   r=   zDinatBackbone.__init__  s      v&&&$
+++)&11#F++#-.1r1r1r1rX]^abhbo^p^pXqXq1r1r1rr !#&t'94=#I#I 	D 	DE<)+l)C)C&&#%=1D#E#E  	r    c                     | j         j        S r   rL  r   s    r   rM  z"DinatBackbone.get_input_embeddings	  rN  r    NrJ   r%  r   r'  rK   c                    ||n| j         j        }||n| j         j        }||n| j         j        }|                     |          }|                     ||ddd          }|j        }d}t          | j        |          D ]\  }	}
|	| j	        v r|
j
        \  }}}}|
                    dddd                                          }
|
                    |||z  |          }
 | j        |	         |
          }
|
                    ||||          }
|
                    dddd                                          }
||
fz  }|s|f}|r||j        fz  }|S t!          ||r|j        nd|j        	          S )
a/  
        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoBackbone
        >>> import torch
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> processor = AutoImageProcessor.from_pretrained("shi-labs/nat-mini-in1k-224")
        >>> model = AutoBackbone.from_pretrained(
        ...     "shi-labs/nat-mini-in1k-224", out_features=["stage1", "stage2", "stage3", "stage4"]
        ... )

        >>> inputs = processor(image, return_tensors="pt")

        >>> outputs = model(**inputs)

        >>> feature_maps = outputs.feature_maps
        >>> list(feature_maps[-1].shape)
        [1, 512, 7, 7]
        ```NT)r   r%  r&  r'  r1   r   rU   r   r   )feature_mapsr&   r'   )rH   rV  r%  r   rM   rE  r(   rj  stage_namesout_featuresrd   re   r   r   rm  r&   r	   r'   )rG   rJ   r%  r   r'  rX  r   r&   rq  rn  hidden_stater   r]   rg   rh   r   s                   r   rN   zDinatBackbone.forward  s   B &1%<kk$+B]$8$D  $+Jj 	 2C1N--TXT_Tq??<88,,/!%59  
 
  6#&t'7#G#G 	0 	0E<))):F:L7
L&%+33Aq!Q??JJLL+00Ve^\ZZ>t7>|LL+00VULYY+33Aq!Q??JJLL/ 	"_F# 37022M%3GQ'//T)
 
 
 	
r    )NNN)r)   r*   r+   r=   rM  r   r-   rO   r   r   r	   rN   rP   rQ   s   @r   rf  rf    s            &0 0 0  04,0&*G
 G
lG
 'tnG
 $D>	G

 d^G
 
G
 G
 G
 ^G
 G
 G
 G
 G
r    rf  )r]  rA  r1  rf  )rt   F)<r,   r   dataclassesr   typingr   r   r-   r   activationsr   modeling_outputsr	   modeling_utilsr
   pytorch_utilsr   r   utilsr   r   r   r   r   r   utils.backbone_utilsr   configuration_dinatr   natten.functionalr   r   
get_loggerr)   loggerr$   r3   r6   rs   r:   r>   rj   rO   r   r   r   r   r   r   r   r   r   r   r  r  r1  rA  r]  rf  __all__r1   r    r   <module>r     s@   @ ?  ! ! ! ! ! ! " " " " " " " "        ! ! ! ! ! ! . . . . . . - - - - - - Q Q Q Q Q Q Q Q                2 1 1 1 1 1 , , , , , ,  /;;;;;;;;;/ / // / / 
	H	%	%   
K K K K K K K  K    
K K K K K{ K K  K&   
K K K K K K K  K*    bi   ,! ! ! ! !29 ! ! !H    ry   0 U\ e T V[Vb    *% % % % %BI % % %C C C C CBI C C CL
 
 
 
 
") 
 
 
! ! ! ! !") ! ! !H    	   	 	 	 	 	") 	 	 	D D D D D D D DN, , , , , , , ,^C
 C
 C
 C
 C
29 C
 C
 C
L * * * * *? * * *$ O
 O
 O
 O
 O
% O
 O
 O
d   ;
 ;
 ;
 ;
 ;
"6 ;
 ;
 ;
|   
_
 _
 _
 _
 _
(- _
 _
 
_
D a
`
`r    