
     `iF              	          d Z ddlZddlmZ ddlmZmZ ddlZddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
lmZmZ ddlmZmZmZmZmZmZmZmZmZ ddlmZ ddlmZ  e            r	ddl m!Z!m"Z" nd Z"d Z! ej#        e$          Z%dZ&dZ'g dZ(dZ)dZ*e G d de                      Z+e G d de                      Z,e G d de                      Z- G d dej.                  Z/ G d dej.                  Z0 G d  d!ej.                  Z1dId$ej2        d%e3d&e4d'ej2        fd(Z5 G d) d*ej.                  Z6 G d+ d,ej.                  Z7 G d- d.ej.                  Z8 G d/ d0ej.                  Z9 G d1 d2ej.                  Z: G d3 d4ej.                  Z; G d5 d6ej.                  Z< G d7 d8ej.                  Z= G d9 d:ej.                  Z> G d; d<e          Z?d=Z@d>ZA ed?e@           G d@ dAe?                      ZB edBe@           G dC dDe?                      ZC edEe@           G dF dGe?e                      ZDg dHZEdS )Jz1PyTorch Neighborhood Attention Transformer model.    N)	dataclass)OptionalUnion)nn   )ACT2FN)BackboneOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)	ModelOutputOptionalDependencyNotAvailableadd_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardis_natten_availableloggingreplace_return_docstringsrequires_backends)BackboneMixin   )	NatConfig)
natten2davnatten2dqkrpbc                      t                      Nr   argskwargss     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/deprecated/nat/modeling_nat.pyr   r   /       ,...    c                      t                      r   r   r   s     r!   r   r   2   r"   r#   r   zshi-labs/nat-mini-in1k-224)r      r%   i   z	tiger catc                       e Zd ZU dZdZeej                 ed<   dZ	ee
ej        df                  ed<   dZee
ej        df                  ed<   dZee
ej        df                  ed<   dS )NatEncoderOutputa  
    Nat encoder's outputs, with potential hidden states and attentions.

    Args:
        last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
        reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
            shape `(batch_size, hidden_size, height, width)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
            include the spatial dimensions.
    Nlast_hidden_state.hidden_states
attentionsreshaped_hidden_states)__name__
__module____qualname____doc__r(   r   torchFloatTensor__annotations__r)   tupler*   r+    r#   r!   r'   r'   G   s          2 6:x 12999=AM8E%"3S"89:AAA:>Ju0#567>>>FJHU5+<c+A%BCJJJJJr#   r'   c                       e Zd ZU dZdZeej                 ed<   dZ	eej                 ed<   dZ
eeej        df                  ed<   dZeeej        df                  ed<   dZeeej        df                  ed<   dS )	NatModelOutputaS  
    Nat model's outputs that also contains a pooling of the last hidden states.

    Args:
        last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        pooler_output (`torch.FloatTensor` of shape `(batch_size, hidden_size)`, *optional*, returned when `add_pooling_layer=True` is passed):
            Average pooling of the last layer hidden-state.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
        reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
            shape `(batch_size, hidden_size, height, width)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
            include the spatial dimensions.
    Nr(   pooler_output.r)   r*   r+   )r,   r-   r.   r/   r(   r   r0   r1   r2   r7   r)   r3   r*   r+   r4   r#   r!   r6   r6   h   s          6 6:x 1299915M8E-.555=AM8E%"3S"89:AAA:>Ju0#567>>>FJHU5+<c+A%BCJJJJJr#   r6   c                       e Zd ZU dZdZeej                 ed<   dZ	eej                 ed<   dZ
eeej        df                  ed<   dZeeej        df                  ed<   dZeeej        df                  ed<   dS )	NatImageClassifierOutputa   
    Nat outputs for image classification.

    Args:
        loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
            Classification (or regression if config.num_labels==1) loss.
        logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
            Classification (or regression if config.num_labels==1) scores (before SoftMax).
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
        reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
            shape `(batch_size, hidden_size, height, width)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
            include the spatial dimensions.
    Nlosslogits.r)   r*   r+   )r,   r-   r.   r/   r:   r   r0   r1   r2   r;   r)   r3   r*   r+   r4   r#   r!   r9   r9      s          6 )-D(5$
%,,,*.FHU&'...=AM8E%"3S"89:AAA:>Ju0#567>>>FJHU5+<c+A%BCJJJJJr#   r9   c                   ^     e Zd ZdZ fdZdeej                 deej	                 fdZ
 xZS )NatEmbeddingsz6
    Construct the patch and position embeddings.
    c                     t                                                       t          |          | _        t	          j        |j                  | _        t	          j        |j	                  | _
        d S r   )super__init__NatPatchEmbeddingspatch_embeddingsr   	LayerNorm	embed_dimnormDropouthidden_dropout_probdropoutselfconfig	__class__s     r!   r@   zNatEmbeddings.__init__   sU     26 : :L!122	z&"<==r#   pixel_valuesreturnc                     |                      |          }|                     |          }|                     |          }|S r   )rB   rE   rH   )rJ   rM   
embeddingss      r!   forwardzNatEmbeddings.forward   s=    **<88
YYz**
\\*--
r#   )r,   r-   r.   r/   r@   r   r0   r1   r3   TensorrQ   __classcell__rL   s   @r!   r=   r=      ss         > > > > >HU->$? E%,DW        r#   r=   c                   R     e Zd ZdZ fdZdeej                 dej        fdZ	 xZ
S )rA   z
    This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
    `hidden_states` (patch embeddings) of shape `(batch_size, height, width, hidden_size)` to be consumed by a
    Transformer.
    c           
      R   t                                                       |j        }|j        |j        }}|| _        |dk    rnt          d          t          j        t          j        | j        |dz  ddd          t          j        |dz  |ddd                    | _	        d S )Nr   z2Dinat only supports patch size of 4 at the moment.      rY   rW   rW   r   r   )kernel_sizestridepadding)
r?   r@   
patch_sizenum_channelsrD   
ValueErrorr   
SequentialConv2d
projection)rJ   rK   r_   r`   hidden_sizerL   s        r!   r@   zNatPatchEmbeddings.__init__   s    &
$*$79Ik(?? QRRR-Id')9vV\flmmmIkQ&PV`fggg
 
r#   rM   rN   c                     |j         \  }}}}|| j        k    rt          d          |                     |          }|                    dddd          }|S )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.r   rW   rY   r   )shaper`   ra   rd   permute)rJ   rM   _r`   heightwidthrP   s          r!   rQ   zNatPatchEmbeddings.forward   sh    )5);&<4,,,w   __\22
''1a33
r#   )r,   r-   r.   r/   r@   r   r0   r1   rR   rQ   rS   rT   s   @r!   rA   rA      sn         
 
 
 
 
"	HU->$? 	EL 	 	 	 	 	 	 	 	r#   rA   c                   l     e Zd ZdZej        fdedej        ddf fdZde	j
        de	j
        fdZ xZS )	NatDownsamplerz
    Convolutional Downsampling Layer.

    Args:
        dim (`int`):
            Number of input channels.
        norm_layer (`nn.Module`, *optional*, defaults to `nn.LayerNorm`):
            Normalization layer class.
    dim
norm_layerrN   Nc                     t                                                       || _        t          j        |d|z  dddd          | _         |d|z            | _        d S )NrW   rX   rZ   r[   F)r\   r]   r^   bias)r?   r@   rn   r   rc   	reductionrE   )rJ   rn   ro   rL   s      r!   r@   zNatDownsampler.__init__   s]    3CVF\binoooJq3w''			r#   input_featurec                     |                      |                    dddd                                        dddd          }|                     |          }|S )Nr   rY   r   rW   )rr   rh   rE   )rJ   rs   s     r!   rQ   zNatDownsampler.forward   sV    }'<'<Q1a'H'HIIQQRSUVXY[\]]		-00r#   )r,   r-   r.   r/   r   rC   intModuler@   r0   rR   rQ   rS   rT   s   @r!   rm   rm      s          :< ( (C (RY ($ ( ( ( ( ( (U\ el        r#   rm           Finput	drop_probtrainingrN   c                     |dk    s|s| S d|z
  }| j         d         fd| j        dz
  z  z   }|t          j        || j        | j                  z   }|                                 |                     |          |z  }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
    rw   r   r   )r   )dtypedevice)rg   ndimr0   randr|   r}   floor_div)rx   ry   rz   	keep_probrg   random_tensoroutputs          r!   	drop_pathr     s     CxII[^
Q 77E
5EL Y Y YYMYYy!!M1FMr#   c                   j     e Zd ZdZd	dee         ddf fdZdej        dej        fdZ	de
fdZ xZS )
NatDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nry   rN   c                 V    t                                                       || _        d S r   )r?   r@   ry   )rJ   ry   rL   s     r!   r@   zNatDropPath.__init__  s$    "r#   r)   c                 8    t          || j        | j                  S r   )r   ry   rz   rJ   r)   s     r!   rQ   zNatDropPath.forward  s    FFFr#   c                     d| j          S )Nzp=)ry   rJ   s    r!   
extra_reprzNatDropPath.extra_repr  s    $DN$$$r#   r   )r,   r-   r.   r/   r   floatr@   r0   rR   rQ   strr   rS   rT   s   @r!   r   r     s        bb# #(5/ #T # # # # # #GU\ Gel G G G G%C % % % % % % % %r#   r   c                   h     e Zd Z fdZd Z	 ddej        dee         de	ej                 fdZ
 xZS )	NeighborhoodAttentionc                    t                                                       ||z  dk    rt          d| d| d          || _        t	          ||z            | _        | j        | j        z  | _        || _        t          j	        t          j        |d| j        z  dz
  d| j        z  dz
                      | _        t          j        | j        | j        |j                  | _        t          j        | j        | j        |j                  | _        t          j        | j        | j        |j                  | _        t          j        |j                  | _        d S )Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()rW   r   )rq   )r?   r@   ra   num_attention_headsru   attention_head_sizeall_head_sizer\   r   	Parameterr0   zerosrpbLinearqkv_biasquerykeyvaluerF   attention_probs_dropout_probrH   rJ   rK   rn   	num_headsr\   rL   s        r!   r@   zNeighborhoodAttention.__init__$  s@   ?akCkk_hkkk   $- #&sY#7#7 !58PP& <ID<L8Lq8PTUX\XhThklTl n nooYt143EFO\\\
9T/1C&/ZZZYt143EFO\\\
z&"EFFr#   c                     |                                 d d         | j        | j        fz   }|                    |          }|                    ddddd          S )Nr   rY   r   rW   r   )sizer   r   viewrh   )rJ   xnew_x_shapes      r!   transpose_for_scoresz*NeighborhoodAttention.transpose_for_scores9  sR    ffhhssmt'?AY&ZZFF;yyAq!Q'''r#   Fr)   output_attentionsrN   c                    |                      |                     |                    }|                      |                     |                    }|                      |                     |                    }|t	          j        | j                  z  }t          ||| j        | j	        d          }t          j                            |d          }|                     |          }t          ||| j	        d          }|                    ddddd                                          }|                                d d         | j        fz   }	|                    |	          }|r||fn|f}
|
S )	Nr   r   rn   r   rW   rY   r   )r   r   r   r   mathsqrtr   r   r   r\   r   
functionalsoftmaxrH   r   rh   
contiguousr   r   r   )rJ   r)   r   query_layer	key_layervalue_layerattention_scoresattention_probscontext_layernew_context_layer_shapeoutputss              r!   rQ   zNeighborhoodAttention.forward>  s\   
 //

=0I0IJJ--dhh}.E.EFF	//

=0I0IJJ
 "DId.F$G$GG )i4K[]^__ -//0@b/II ,,77"?KAQSTUU%--aAq!<<GGII"/"4"4"6"6ss";t?Q>S"S%**+BCC6G]=/22mM]r#   F)r,   r-   r.   r@   r   r0   rR   r   boolr3   rQ   rS   rT   s   @r!   r   r   #  s        G G G G G*( ( ( -2 | $D> 
u|		       r#   r   c                   P     e Zd Z fdZdej        dej        dej        fdZ xZS )NeighborhoodAttentionOutputc                     t                                                       t          j        ||          | _        t          j        |j                  | _        d S r   )r?   r@   r   r   denserF   r   rH   rJ   rK   rn   rL   s      r!   r@   z$NeighborhoodAttentionOutput.__init__a  sD    YsC((
z&"EFFr#   r)   input_tensorrN   c                 Z    |                      |          }|                     |          }|S r   r   rH   )rJ   r)   r   s      r!   rQ   z#NeighborhoodAttentionOutput.forwardf  s*    

=11]33r#   r,   r-   r.   r@   r0   rR   rQ   rS   rT   s   @r!   r   r   `  sn        G G G G G
U\  RWR^        r#   r   c                   h     e Zd Z fdZd Z	 ddej        dee         de	ej                 fdZ
 xZS )	NeighborhoodAttentionModulec                     t                                                       t          ||||          | _        t	          ||          | _        t                      | _        d S r   )r?   r@   r   rJ   r   r   setpruned_headsr   s        r!   r@   z$NeighborhoodAttentionModule.__init__n  sQ    )&#y+NN	1&#>>EEr#   c                    t          |          dk    rd S t          || j        j        | j        j        | j                  \  }}t          | j        j        |          | j        _        t          | j        j        |          | j        _        t          | j        j	        |          | j        _	        t          | j
        j        |d          | j
        _        | j        j        t          |          z
  | j        _        | j        j        | j        j        z  | j        _        | j                            |          | _        d S )Nr   r   r   )lenr   rJ   r   r   r   r   r   r   r   r   r   r   union)rJ   headsindexs      r!   prune_headsz'NeighborhoodAttentionModule.prune_headst  s    u::??F7490$)2OQUQb
 
u
 -TY_eDD	*49=%@@	,TY_eDD	.t{/@%QOOO )-	(EE

(R	%"&)"?$)B_"_	 -33E::r#   Fr)   r   rN   c                     |                      ||          }|                     |d         |          }|f|dd          z   }|S Nr   r   )rJ   r   )rJ   r)   r   self_outputsattention_outputr   s         r!   rQ   z#NeighborhoodAttentionModule.forward  sK    
 yy0ABB;;|AFF#%QRR(88r#   r   )r,   r-   r.   r@   r   r0   rR   r   r   r3   rQ   rS   rT   s   @r!   r   r   m  s        " " " " "; ; ;* -2 | $D> 
u|		       r#   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )NatIntermediatec                 $   t                                                       t          j        |t	          |j        |z                      | _        t          |j        t                    rt          |j                 | _        d S |j        | _        d S r   )r?   r@   r   r   ru   	mlp_ratior   
isinstance
hidden_actr   r   intermediate_act_fnr   s      r!   r@   zNatIntermediate.__init__  sx    YsC(83(>$?$?@@
f'-- 	9'-f.?'@D$$$'-'8D$$$r#   r)   rN   c                 Z    |                      |          }|                     |          }|S r   )r   r   r   s     r!   rQ   zNatIntermediate.forward  s,    

=1100??r#   r   rT   s   @r!   r   r     s^        9 9 9 9 9U\ el        r#   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )	NatOutputc                     t                                                       t          j        t	          |j        |z            |          | _        t          j        |j                  | _	        d S r   )
r?   r@   r   r   ru   r   r   rF   rG   rH   r   s      r!   r@   zNatOutput.__init__  sT    Ys6#3c#9::C@@
z&"<==r#   r)   rN   c                 Z    |                      |          }|                     |          }|S r   r   r   s     r!   rQ   zNatOutput.forward  s*    

=11]33r#   r   rT   s   @r!   r   r     s^        > > > > >
U\ el        r#   r   c            	       x     e Zd Zd	 fd	Zd Z	 d
dej        dee         de	ej        ej        f         fdZ
 xZS )NatLayerrw   c                 h   t                                                       |j        | _        |j        | _        t	          j        ||j                  | _        t          |||| j                  | _	        |dk    rt          |          nt	          j                    | _        t	          j        ||j                  | _        t          ||          | _        t!          ||          | _        |j        dk    r2t	          j        |j        t)          j        d|f          z  d          nd | _        d S )Neps)r\   rw   r   rW   T)requires_grad)r?   r@   chunk_size_feed_forwardr\   r   rC   layer_norm_epslayernorm_beforer   	attentionr   Identityr   layernorm_afterr   intermediater   r   layer_scale_init_valuer   r0   oneslayer_scale_parameters)rJ   rK   rn   r   drop_path_raterL   s        r!   r@   zNatLayer.__init__  s   '-'E$!- "Sf6K L L L4VS)Y]Yijjj8F8L8L^444RTR]R_R_!|CV5JKKK+FC88,, ,q00 L6QH9M9MM]abbbb 	###r#   c                     | j         }d}||k     s||k     rRdx}}t          d||z
            }t          d||z
            }	dd||||	f}t          j                            ||          }||fS )N)r   r   r   r   r   r   r   )r\   maxr   r   pad)
rJ   r)   rj   rk   window_size
pad_valuespad_lpad_tpad_rpad_bs
             r!   	maybe_padzNatLayer.maybe_pad  s    &'
K5;#6#6EE;.//E;/00EQueU;JM--mZHHMj((r#   Fr)   r   rN   c                    |                                 \  }}}}|}|                     |          }|                     |||          \  }}|j        \  }	}
}}	|                     ||          }|d         }|d         dk    p|d         dk    }|r&|d d d |d |d d f                                         }| j        | j        d         |z  }||                     |          z   }|                     |          }| 	                    | 
                    |                    }| j        | j        d         |z  }||                     |          z   }|r
||d         fn|f}|S )N)r   r   rY      r   )r   r   r   rg   r   r   r   r   r   r   r   )rJ   r)   r   
batch_sizerj   rk   channelsshortcutr   ri   
height_pad	width_padattention_outputsr   
was_paddedlayer_outputlayer_outputss                    r!   rQ   zNatLayer.forward  s   
 /<.@.@.B.B+
FE8 --m<<$(NN=&%$P$P!z&3&9#:y! NN=L]N^^,Q/]Q&;*Q-!*;
 	T/7F7FUFAAA0EFQQSS&2#:1=@PP 4>>2B#C#CC++M::{{4#4#4\#B#BCC&26q9LHL$t~~l'C'CC@Qf'8';<<XdWfr#   )rw   r   )r,   r-   r.   r@   r   r0   rR   r   r   r3   rQ   rS   rT   s   @r!   r   r     s        
 
 
 
 
 
 	) 	) 	) -2$ $|$ $D>$ 
u|U\)	*	$ $ $ $ $ $ $ $r#   r   c                   b     e Zd Z fdZ	 ddej        dee         deej                 fdZ	 xZ
S )NatStagec                 0   t                                                       | _        | _        t	          j        fdt          |          D                       | _        | |t          j                  | _	        nd | _	        d| _
        d S )Nc           	      B    g | ]}t          |                    S ))rK   rn   r   r   )r   ).0irK   rn   r   r   s     r!   
<listcomp>z%NatStage.__init__.<locals>.<listcomp>  sK         !'#1!#4	    r#   )rn   ro   F)r?   r@   rK   rn   r   
ModuleListrangelayersrC   
downsamplepointing)rJ   rK   rn   depthr   r   r  rL   s    `` `` r!   r@   zNatStage.__init__  s    m       u  

 

 !(jSR\JJJDOO"DOr#   Fr)   r   rN   c                     |                                 \  }}}}t          | j                  D ]\  }} |||          }|d         }|}	| j        |                     |	          }||	f}
|r|
|dd          z  }
|
S r   )r   	enumerater  r  )rJ   r)   r   ri   rj   rk   r  layer_moduler  !hidden_states_before_downsamplingstage_outputss              r!   rQ   zNatStage.forward  s    
 ,002265!(55 	- 	-OA|(L8IJJM)!,MM,9)?& OO,MNNM&(IJ 	/]122..Mr#   r   )r,   r-   r.   r@   r0   rR   r   r   r3   rQ   rS   rT   s   @r!   r  r    s            6 -2 | $D> 
u|		       r#   r  c                        e Zd Z fdZ	 	 	 	 ddej        dee         dee         dee         dee         d	ee	e
f         fd
Z xZS )
NatEncoderc                 r    t                                                       t          j                   _         _        d t          j        dj        t          j                  d          D             t          j         fdt           j                  D                        _        d S )Nc                 6    g | ]}|                                 S r4   )item)r
  r   s     r!   r  z'NatEncoder.__init__.<locals>.<listcomp>"  s     lllAqvvxxlllr#   r   cpu)r}   c                 >   g | ]}t          t          j        d |z  z            j        |         j        |         t          j        d|                   t          j        d|dz                               |j        dz
  k     rt          nd          S )rW   Nr   )rK   rn   r  r   r   r  )r  ru   rD   depthsr   sum
num_levelsrm   )r
  i_layerrK   dprrJ   s     r!   r  z'NatEncoder.__init__.<locals>.<listcomp>$  s     
 
 
  !F,q'z9:: -0$.w7#&s6='+B'C'Cc&-XeZadeZeXeJfFgFg'g#h29DOa<O2O2O~~VZ  
 
 
r#   )r?   r@   r   r  r!  rK   r0   linspacer   r   r   r  r  levels)rJ   rK   r#  rL   s   ``@r!   r@   zNatEncoder.__init__  s    fm,,ll63H#fmJ\J\ej!k!k!klllm
 
 
 
 
 
  %T_55
 
 

 
r#   FTr)   r   output_hidden_states(output_hidden_states_before_downsamplingreturn_dictrN   c                     |rdnd }|rdnd }|rdnd }|r$|                     dddd          }	||fz  }||	fz  }t          | j                  D ]\  }
} |||          }|d         }|d         }|r'|r%|                     dddd          }	||fz  }||	fz  }n(|r&|s$|                     dddd          }	||fz  }||	fz  }|r||dd          z  }|st          d |||fD                       S t	          ||||          S )Nr4   r   rY   r   rW   c              3      K   | ]}||V  	d S r   r4   )r
  vs     r!   	<genexpr>z%NatEncoder.forward.<locals>.<genexpr>X  s(      mmq_`_l_l_l_l_lmmr#   )r(   r)   r*   r+   )rh   r  r%  r3   r'   )rJ   r)   r   r&  r'  r(  all_hidden_statesall_reshaped_hidden_statesall_self_attentionsreshaped_hidden_stater  r  r  r  s                 r!   rQ   zNatEncoder.forward1  s    #7@BBD+?%IRRT"$5?bb4 	C$1$9$9!Q1$E$E!-!11&+@*BB&(55 	9 	9OA|(L8IJJM)!,M0=a0@-# 	G(P 	G(I(Q(QRSUVXY[\(](]%!&G%II!*/D.FF**% G.V G(5(=(=aAq(I(I%!m%55!*/D.FF*  9#}QRR'88# 	nmm]4EGZ$[mmmmmm++*#=	
 
 
 	
r#   )FFFT)r,   r-   r.   r@   r0   rR   r   r   r   r3   r'   rQ   rS   rT   s   @r!   r  r    s        
 
 
 
 
, -2/4CH&*.
 .
|.
 $D>.
 'tn	.

 3;4..
 d^.
 
u&&	'.
 .
 .
 .
 .
 .
 .
 .
r#   r  c                   ,    e Zd ZU dZeed<   dZdZd ZdS )NatPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    rK   natrM   c                    t          |t          j        t          j        f          rT|j        j                            d| j        j                   |j	         |j	        j        
                                 dS dS t          |t          j                  r?|j	        j        
                                 |j        j                            d           dS dS )zInitialize the weightsrw   )meanstdNg      ?)r   r   r   rc   weightdatanormal_rK   initializer_rangerq   zero_rC   fill_)rJ   modules     r!   _init_weightsz NatPreTrainedModel._init_weightsl  s    fry")455 	* M&&CT[5R&SSS{& &&((((( '&-- 	*K""$$$M$$S)))))	* 	*r#   N)	r,   r-   r.   r/   r   r2   base_model_prefixmain_input_namer>  r4   r#   r!   r2  r2  b  sH          
 $O
* 
* 
* 
* 
*r#   r2  aF  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class. Use
    it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`NatConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See [`ViTImageProcessor.__call__`]
            for details.

        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
z]The bare Nat Model transformer outputting raw hidden-states without any specific head on top.c                        e Zd Zd fd	Zd Zd Z ee           ee	e
ede          	 	 	 	 ddeej                 d	ee         d
ee         dee         deee
f         f
d                        Z xZS )NatModelTc                    t                                          |           t          | dg           || _        t	          |j                  | _        t          |j        d| j        dz
  z  z            | _	        t          |          | _        t          |          | _        t          j        | j	        |j                  | _        |rt          j        d          nd | _        |                                  d S )NnattenrW   r   r   )r?   r@   r   rK   r   r  r!  ru   rD   num_featuresr=   rP   r  encoderr   rC   r   	layernormAdaptiveAvgPool1dpooler	post_init)rJ   rK   add_pooling_layerrL   s      r!   r@   zNatModel.__init__  s       $
+++fm,, 0119L3M MNN'//!&))d&7V=RSSS1BLb*1--- 	r#   c                     | j         j        S r   rP   rB   r   s    r!   get_input_embeddingszNatModel.get_input_embeddings      //r#   c                     |                                 D ]/\  }}| j        j        |         j                            |           0dS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsrF  layerr   r   )rJ   heads_to_prunerR  r   s       r!   _prune_headszNatModel._prune_heads  sU    
 +0022 	C 	CLE5Lu%/;;EBBBB	C 	Cr#   vision)
checkpointoutput_typeconfig_classmodalityexpected_outputNrM   r   r&  r(  rN   c                 <   ||n| j         j        }||n| j         j        }||n| j         j        }|t	          d          |                     |          }|                     ||||          }|d         }|                     |          }d }| j        R|                     |	                    dd          
                    dd                    }t          j	        |d          }|s||f|dd          z   }	|	S t          |||j        |j        |j                  S )Nz You have to specify pixel_valuesr   r&  r(  r   r   rW   )r(   r7   r)   r*   r+   )rK   r   r&  use_return_dictra   rP   rF  rG  rI  flatten	transposer0   r6   r)   r*   r+   )
rJ   rM   r   r&  r(  embedding_outputencoder_outputssequence_outputpooled_outputr   s
             r!   rQ   zNatModel.forward  sW    2C1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+B]?@@@??<88,,/!5#	 ' 
 
 *!,..99;" KK(?(?1(E(E(O(OPQST(U(UVVM!M-;;M 	%}58KKFM-')7&1#2#I
 
 
 	
r#   )T)NNNN)r,   r-   r.   r@   rN  rT  r   NAT_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr6   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr   r0   r1   r   r   r3   rQ   rS   rT   s   @r!   rB  rB    s       
     $0 0 0C C C +*+?@@&"$.   59,0/3&*,
 ,
u01,
 $D>,
 'tn	,

 d^,
 
un$	%,
 ,
 ,
  A@,
 ,
 ,
 ,
 ,
r#   rB  z
    Nat Model transformer with an image classification head on top (a linear layer on top of the final hidden state of
    the [CLS] token) e.g. for ImageNet.
    c                        e Zd Z fdZ ee           eeee	e
          	 	 	 	 	 ddeej                 deej                 dee         dee         dee         d	eeef         fd
                        Z xZS )NatForImageClassificationc                 b   t                                          |           t          | dg           |j        | _        t	          |          | _        |j        dk    r$t          j        | j        j        |j                  nt          j	                    | _
        |                                  d S )NrD  r   )r?   r@   r   
num_labelsrB  r3  r   r   rE  r   
classifierrJ  rI   s     r!   r@   z"NatForImageClassification.__init__  s       $
+++ +F## DJCTWXCXCXBIdh+V->???^`^i^k^k 	
 	r#   )rV  rW  rX  rZ  NrM   labelsr   r&  r(  rN   c                 H   ||n| j         j        }|                     ||||          }|d         }|                     |          }d}	||                     ||| j                   }	|s|f|dd         z   }
|	|	f|
z   n|
S t          |	||j        |j        |j                  S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr\  r   rW   )r:   r;   r)   r*   r+   )	rK   r]  r3  rl  loss_functionr9   r)   r*   r+   )rJ   rM   rm  r   r&  r(  r   rc  r;   r:   r   s              r!   rQ   z!NatForImageClassification.forward  s    * &1%<kk$+B]((/!5#	  
 
  
//%%ffdkBBD 	FY,F)-)9TGf$$vE'!/)#*#A
 
 
 	
r#   )NNNNN)r,   r-   r.   r@   r   rd  r   _IMAGE_CLASS_CHECKPOINTr9   rf  _IMAGE_CLASS_EXPECTED_OUTPUTr   r0   r1   
LongTensorr   r   r3   rQ   rS   rT   s   @r!   ri  ri    s             +*+?@@*,$4	   59-1,0/3&*)
 )
u01)
 )*)
 $D>	)

 'tn)
 d^)
 
u..	/)
 )
 )
  A@)
 )
 )
 )
 )
r#   ri  zBNAT backbone, to be used with frameworks like DETR and MaskFormer.c                        e Zd Z fdZd Z ee           eee	          	 	 	 dde
j        dee         dee         dee         d	ef
d
                        Z xZS )NatBackbonec                 B   t                                                     t                                                     t          | dg           t	                    | _        t                    | _        j        gfdt          t          j                            D             z   | _        i }t          | j        | j                  D ]\  }}t!          j        |          ||<   t!          j        |          | _        |                                  d S )NrD  c                 D    g | ]}t          j        d |z  z            S )rW   )ru   rD   )r
  r  rK   s     r!   r  z(NatBackbone.__init__.<locals>.<listcomp>G  s.    1r1r1rST#f6FA6M2N2N1r1r1rr#   )r?   r@   _init_backboner   r=   rP   r  rF  rD   r  r   r  rE  zipout_featuresr   r   rC   
ModuleDicthidden_states_normsrJ  )rJ   rK   r{  stager`   rL   s    `   r!   r@   zNatBackbone.__init__?  s      v&&&$
+++'//!&))#-.1r1r1r1rX]^abhbo^p^pXqXq1r1r1rr !#&t'8$-#H#H 	D 	DE<)+l)C)C&&#%=1D#E#E  	r#   c                     | j         j        S r   rM  r   s    r!   rN  z NatBackbone.get_input_embeddingsR  rO  r#   )rW  rX  NrM   r&  r   r(  rN   c                    ||n| j         j        }||n| j         j        }||n| j         j        }|                     |          }|                     ||ddd          }|j        }d}t          | j        |          D ]\  }	}
|	| j	        v r|
j
        \  }}}}|
                    dddd                                          }
|
                    |||z  |          }
 | j        |	         |
          }
|
                    ||||          }
|
                    dddd                                          }
||
fz  }|s|f}|r||j        fz  }|S t!          ||r|j        nd|j        	          S )
aA  
        Returns:

        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoBackbone
        >>> import torch
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> processor = AutoImageProcessor.from_pretrained("shi-labs/nat-mini-in1k-224")
        >>> model = AutoBackbone.from_pretrained(
        ...     "shi-labs/nat-mini-in1k-224", out_features=["stage1", "stage2", "stage3", "stage4"]
        ... )

        >>> inputs = processor(image, return_tensors="pt")

        >>> outputs = model(**inputs)

        >>> feature_maps = outputs.feature_maps
        >>> list(feature_maps[-1].shape)
        [1, 512, 7, 7]
        ```NT)r   r&  r'  r(  r4   r   rW   rY   r   )feature_mapsr)   r*   )rK   r]  r&  r   rP   rF  r+   rx  stage_namesry  rg   rh   r   r   r{  r)   r	   r*   )rJ   rM   r&  r   r(  r`  r   r)   r  r|  hidden_stater   r`   rj   rk   r   s                   r!   rQ   zNatBackbone.forwardU  s   H &1%<kk$+B]$8$D  $+Jj 	 2C1N--TXT_Tq??<88,,/!%59  
 
  6#&t'7#G#G 		0 		0E<))):F:L7
L&%+33Aq!Q??JJLL+00Ve^\ZZ>t7>|LL+00VULYY+33Aq!Q??JJLL/ 	"_F# 37022M%3GQ'//T)
 
 
 	
r#   )NNN)r,   r-   r.   r@   rN  r   rd  r   r	   rf  r0   rR   r   r   rQ   rS   rT   s   @r!   rt  rt  :  s        
    &0 0 0 +*+?@@>XXX 04,0&*J
 J
lJ
 'tnJ
 $D>	J

 d^J
 
J
 J
 J
 YX A@J
 J
 J
 J
 J
r#   rt  )ri  rB  r2  rt  )rw   F)Fr/   r   dataclassesr   typingr   r   r0   r   activationsr   modeling_outputsr	   modeling_utilsr
   pytorch_utilsr   r   utilsr   r   r   r   r   r   r   r   r   utils.backbone_utilsr   configuration_natr   natten.functionalr   r   
get_loggerr,   loggerrf  re  rg  rp  rq  r'   r6   r9   rv   r=   rA   rm   rR   r   r   r   r   r   r   r   r   r   r   r  r  r2  NAT_START_DOCSTRINGrd  rB  ri  rt  __all__r4   r#   r!   <module>r     s6   8 7  ! ! ! ! ! ! " " " " " " " "        " " " " " " / / / / / / . . . . . . R R R R R R R R
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 3 2 2 2 2 2 ( ( ( ( ( (  /;;;;;;;;;/ / // / / 
	H	%	%  3 '  7 *  K K K K K{ K K K@  K  K  K  K  K[  K  K  KF  K  K  K  K  K{  K  K  KF    BI   ,! ! ! ! ! ! ! !H    RY   . U\ e T V[Vb    (% % % % %") % % %: : : : :BI : : :z
 
 
 
 
") 
 
 
! ! ! ! !") ! ! !H    bi   	 	 	 	 		 	 	 	@ @ @ @ @ry @ @ @F+ + + + +ry + + +\B
 B
 B
 B
 B
 B
 B
 B
J* * * * * * * *.	  " c R
 R
 R
 R
 R
! R
 R
	 R
j   A
 A
 A
 A
 A
 2 A
 A
 A
H H c
 c
 c
 c
 c
$m c
 c
	 c
L Y
X
Xr#   