
     `iqo              	          d Z ddlZddlZddlmZ ddlZddlZddlmZm	Z	 ddl
mZ ddlmZmZmZmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZ  ej        e          Zd:deeef         fdZ G d de	j                   Z! G d de	j"                  Z# G d de	j$                  Z% G d de	j&                  Z' G d de	j$                  Z(d;dej        de)dedej        fdZ* G d  d!e	j$                  Z+d<d#Z, G d$ d%e	j$                  Z- G d& d'e	j$                  Z. G d( d)e	j$                  Z/ G d* d+e	j$                  Z0 G d, d-e	j$                  Z1e G d. d/e                      Z2e G d0 d1e2                      Z3 ed23           G d4 d5e2                      Z4 ed63           G d7 d8e2e                      Z5g d9Z6dS )=z9PyTorch BiT model. Also supports backbone for ViT hybrid.    N)Optional)Tensornn   )ACT2FN)BackboneOutputBaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)auto_docstringlogging)BackboneMixin   )	BitConfig   returnc                 &   d}| |dz
  ||dz
  z  z   dz  } | |fS t          | t                    r`|                                 } | dk    r,|dk    r!||dz
  z  dz  dk    r|dz
  ||dz
  z  z   dz  } nd} d}n| dk    rd} n|dz
  ||dz
  z  z   dz  } | |fS )	al  
    Utility function to get the tuple padding value given the kernel_size and padding.

    Args:
        padding (Union[`str`, `int`], *optional*):
            Padding value, can be either `"same"`, `"valid"`. If a different value is provided the default padding from
            PyTorch is used.
        kernel_size (`int`, *optional*, defaults to 7):
            Kernel size of the convolution layers.
        stride (`int`, *optional*, defaults to 1):
            Stride value of the convolution layers.
        dilation (`int`, *optional*, defaults to 1):
            Dilation value of the convolution layers.
    FNr      samer   Tvalid)
isinstancestrlower)paddingkernel_sizestridedilationdynamics        x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/bit/modeling_bit.pyget_padding_valuer!   )   s     GQJ(kAo">>1D'3 I--//f{{K!O <AQFF"QJ(kAo*FF1L GG 
h+/&BBqHGG    c                   6     e Zd ZdZ	 	 	 	 	 	 d fd	Zd Z xZS )	WeightStandardizedConv2dzConv2d with Weight Standardization. Includes TensorFlow compatible SAME padding. Used for ViT Hybrid model.

    Paper: [Micro-Batch Training with Batch-Channel Normalization and Weight
    Standardization](https://huggingface.co/papers/1903.10520v2)
    r   SAMEFư>c
           
          t          ||||          \  }}
t                                          ||||||||           |
rt          |||          | _        nd | _        |	| _        d S )N)r   r   )r   r   r   groupsbias)r!   super__init__DynamicPad2dpadeps)self
in_channelout_channelsr   r   r   r   r(   r)   r.   
is_dynamic	__class__s              r    r+   z!WeightStandardizedConv2d.__init__Y   s     0V^fggg 	 		
 		
 		
  	#KBBDHHDHr"   c           	      |   | j         |                      |          }t          j                            | j                            d| j        d          d d dd| j                                      | j                  }t          j        	                    ||| j
        | j        | j        | j        | j                  }|S )Nr   T        )trainingmomentumr.   )r-   r   
functional
batch_normweightreshaper1   r.   
reshape_asconv2dr)   r   r   r   r(   )r/   hidden_stater;   s      r    forwardz WeightStandardizedConv2d.forwardv   s    888L11L))K4#4b994PT_bhlhp * 
 

*T[
!
! 	 }++&$)T[$,W[Wb
 
 r"   )r   r%   r   r   Fr&   __name__
__module____qualname____doc__r+   r@   __classcell__r3   s   @r    r$   r$   R   sj               :	 	 	 	 	 	 	r"   r$   c                   *     e Zd ZdZd fd	Zd Z xZS )BitGroupNormActivationzQ
    A module that combines group normalization with an activation function.
    h㈵>Tc                     t                                          |j        |||           |rt          |j                 | _        d S t          j                    | _        d S )N)r.   affine)r*   r+   
num_groupsr   
hidden_act
activationr   Identity)r/   confignum_channelsr.   rL   apply_activationr3   s         r    r+   zBitGroupNormActivation.__init__   sS    *Lc&QQQ 	,$V%67DOOO kmmDOOOr"   c                     t           j                            || j        | j        | j        | j                  }|                     |          }|S N)r   r9   
group_normrM   r;   r)   r.   rO   )r/   r?   s     r    r@   zBitGroupNormActivation.forward   sB    }//dot{\`\egkgopp|44r"   )rJ   TTrA   rG   s   @r    rI   rI      sV         , , , , , ,      r"   rI   c                   *     e Zd ZdZd fd	Zd Z xZS )r,   z
    A module that wraps dynamic padding of any input, given the parameters of the convolutional layer and the input
    hidden states.
    r   c                 *   t                                                       t          |t                    r||f}t          |t                    r||f}t          |t                    r||f}|| _        || _        || _        || _        d }|| _        d S )Nc                 v    t          t          j        | |z            dz
  |z  |dz
  |z  z   dz   | z
  d          S )Nr   r   )maxmathceil)xr   r   r   s       r    compute_paddingz.DynamicPad2d.__init__.<locals>.compute_padding   sF    	!f*--1V;{QRZ>ZZ]^^abbdefffr"   )	r*   r+   r   intr   r   r   valuer^   )r/   r   r   r   r`   r^   r3   s         r    r+   zDynamicPad2d.__init__   s    k3'' 	5&4Kfc"" 	&f%Fh$$ 	, (+H& 
	g 	g 	g  /r"   c           	         |                                 dd          \  }}|                     || j        d         | j        d         | j        d                   }|                     || j        d         | j        d         | j        d                   }|dk    s|dk    r=t
          j                            ||dz  ||dz  z
  |dz  ||dz  z
  g| j                  }|S )Nr   r   r   )r`   )	sizer^   r   r   r   r   r9   r-   r`   )r/   inputinput_heightinput_widthpadding_heightpadding_widths         r    r@   zDynamicPad2d.forward   s    $)JJLL$5!k --lD<LQ<OQUQ\]^Q_aeanopaqrr,,[$:J1:Mt{[\~_c_lmn_opp A!2!2M%%!Q&!MQ$66"a'"^q%88	 j & 	 	E r"   )r   rA   rG   s   @r    r,   r,      sV         
/ / / / / /,      r"   r,   c                   <     e Zd ZdZ	 	 	 	 	 	 ddef fd	Zd
 Z xZS )BitMaxPool2dz1Tensorflow like 'SAME' wrapper for 2D max poolingNr   Fr   r   r   Tr   c                    t          |t          j        j                  r|n||f}t          |t          j        j                  r|n||f}t          |t          j        j                  r|n||f}t	                                          |||||           |rt          ||||          | _        d S t          j	                    | _        d S rU   )
r   collectionsabcIterabler*   r+   r,   r-   r   rP   )	r/   r   r   r   	ceil_moder   padding_valueuse_dynamic_paddingr3   s	           r    r+   zBitMaxPool2d.__init__   s     &0[_=U%V%Vvkk]hju\v%fko.FGG]fV\M])(KO4LMMg88T\^fSgfgxKKK 	%#K=QQDHHH{}}DHHHr"   c                     |                      |          }t          j                            || j        | j        | j        | j        | j                  S rU   )	r-   r   r9   
max_pool2dr   r   r   r   rp   r/   hidden_statess     r    r@   zBitMaxPool2d.forward   sG    //}''4+T[$,W[We
 
 	
r"   )Nr   Frk   r   T)rB   rC   rD   rE   r_   r+   r@   rF   rG   s   @r    rj   rj      ss        ;;
  % %% % % % % %&
 
 
 
 
 
 
r"   rj   c                   8     e Zd ZdZdef fdZdedefdZ xZS )BitEmbeddingszL
    BiT Embeddings (stem) composed of a single aggressive convolution.
    rQ   c                    t                                                       t          |j        |j        ddd|j                  | _        t          dd|j                  | _	        |j        6|j        
                                dk    rt          j                    | _        nt          j        dd	
          | _        |j        dk    rt!          ||j                  | _        nt          j                    | _        |j        | _        d S )Nr   r   :0yE>)r   r   r.   r   r   )r   r   rr   r%   )r   r   r   r   r6   )r   r`   preactivationrR   )r*   r+   r$   rR   embedding_sizeglobal_paddingconvolutionrj   embedding_dynamic_paddingpoolerupperr   rP   r-   ConstantPad2d
layer_typerI   normr/   rQ   r3   s     r    r+   zBitEmbeddings.__init__   s    3!)
 
 
 #qPVPpqqq  ,1F1L1L1N1NRX1X1X{}}DHH'CHHHDH//.vFDYZZZDIIDI"/r"   pixel_valuesr   c                     |j         d         }|| j        k    rt          d          |                     |          }|                     |          }|                     |          }|                     |          }|S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)shaperR   
ValueErrorr   r-   r   r   )r/   r   rR   	embeddings       r    r@   zBitEmbeddings.forward  s    #)!,4,,,w   $$\22	HHY''	IIi((	KK	**	r"   )	rB   rC   rD   rE   r   r+   r   r@   rF   rG   s   @r    rx   rx      sp         0y 0 0 0 0 0 06F v        r"   rx   r6   Frd   	drop_probr7   c                     |dk    s|s| S d|z
  }| j         d         fd| j        dz
  z  z   }|t          j        || j        | j                  z   }|                                 |                     |          |z  }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
    r6   r   r   )r   )dtypedevice)r   ndimtorchrandr   r   floor_div)rd   r   r7   	keep_probr   random_tensoroutputs          r    	drop_pathr     s     CxII[^
Q 77E
5EL Y Y YYMYYy!!M1FMr"   c                   j     e Zd ZdZd	dee         ddf fdZdej        dej        fdZ	de
fdZ xZS )
BitDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                 V    t                                                       || _        d S rU   )r*   r+   r   )r/   r   r3   s     r    r+   zBitDropPath.__init__/  s$    "r"   rv   c                 8    t          || j        | j                  S rU   )r   r   r7   ru   s     r    r@   zBitDropPath.forward3  s    FFFr"   c                     d| j          S )Nzp=)r   )r/   s    r    
extra_reprzBitDropPath.extra_repr6  s    $DN$$$r"   rU   )rB   rC   rD   rE   r   floatr+   r   r   r@   r   r   rF   rG   s   @r    r   r   ,  s        bb# #(5/ #T # # # # # #GU\ Gel G G G G%C % % % % % % % %r"   r      c                 x    |}t          |t          | |dz  z             |z  |z            }|d| z  k     r||z  }|S )Nr   g?)rZ   r_   )r`   divisor	min_value	new_values       r    make_divr   :  sP    IIs57Q;#6777BWLMMI3;W	r"   c                   :     e Zd ZdZ	 	 	 	 	 	 	 	 d	 fd	Zd Z xZS )
BitPreActivationBottleneckLayera  Pre-activation (v2) bottleneck block.
    Follows the implementation of "Identity Mappings in Deep Residual Networks":
    https://github.com/KaimingHe/resnet-1k-layers/blob/master/resnet-pre-act.lua

    Except it puts the stride on 3x3 conv when available.
    N      ?r   r6   Fc           	      T   t                                                       |p|}|p|}t          ||z            }|
rt          ||||d          | _        nd | _        t          ||          | _        t          ||dd|j                  | _	        t          ||          | _
        t          ||d||d|j                  | _        t          ||          | _        t          ||dd|j                  | _        |	d	k    rt          |	          nt          j                    | _        d S )
NTr   preactr   rz   r.   r   r|   r   )r   r(   r.   r   r   )r*   r+   r   BitDownsampleConv
downsamplerI   norm1r$   r~   conv1norm2conv2norm3conv3r   r   rP   r   )r/   rQ   in_channelsr1   bottle_ratior   r   first_dilationr(   drop_path_rateis_first_layermid_channelsr3   s               r    r+   z(BitPreActivationBottleneckLayer.__init__J  sD    	'38#2{| ;<< 		#/  DOO #DO+FK@@
-k<PT^d^sttt
+FNNN
-,&T[a[p
 
 

 ,FLAA
-lL!QU_e_tuuu
8F8J8J^444PRP[P]P]r"   c                 f   |                      |          }|}| j        |                     |          }|                     |          }|                     |                     |                    }|                     |                     |                    }|                     |          }||z   S rU   )r   r   r   r   r   r   r   r   )r/   rv   hidden_states_preactshortcuts       r    r@   z'BitPreActivationBottleneckLayer.forwardv  s    #zz-88 !?&';<<H 

#788

4::m#<#<==

4::m#<#<==}55x''r"   Nr   r   r   Nr   r6   FrA   rG   s   @r    r   r   B  sw          *^ *^ *^ *^ *^ *^X( ( ( ( ( ( (r"   r   c                   :     e Zd ZdZ	 	 	 	 	 	 	 	 d	 fd	Zd Z xZS )
BitBottleneckLayerz\Non Pre-activation bottleneck block, equivalent to V1.5/V1b bottleneck. Used for ViT Hybrid.Nr   r   r6   Fc           
         t                                                       |p|}|p|}t          ||z            }|
rt          ||||d          | _        nd | _        t          ||dd|j                  | _        t          ||          | _	        t          ||d|||d|j                  | _
        t          ||          | _        t          ||dd|j                  | _        t          ||d	          | _        |	d
k    rt          |	          nt          j                    | _        t$          |j                 | _        d S )NFr   r   rz   r   r|   r   )r   r   r(   r.   r   rR   rS   r   )r*   r+   r   r   r   r$   r~   r   rI   r   r   r   r   r   r   r   rP   r   r   rN   rO   )r/   rQ   r   r1   r   r   r   r   r(   r   r   mid_chsr3   s               r    r+   zBitBottleneckLayer.__init__  sb    	'38#2{<,677 		#/  DOO #DO-k7A4Y_Ynooo
+FIII
-#)	
 	
 	

 ,FIII
-g|QDZ`Zoppp
+F`efff
8F8J8J^444PRP[P]P] !23r"   c                    |}| j         |                      |          }|                     |          }|                     |          }|                     |          }|                     |          }|                     |          }|                     |          }|                     |          }|                     ||z             }|S rU   )	r   r   r   r   r   r   r   r   rO   )r/   rv   r   s      r    r@   zBitBottleneckLayer.forward  s     ?&}55H 

=11

=11

=11

=11

=11

=11}55(@AAr"   r   rA   rG   s   @r    r   r     sm        ff /4 /4 /4 /4 /4 /4b      r"   r   c                   *     e Zd Z	 	 d fd	Zd Z xZS )r   r   Tc                     t                                                       t          ||d|d|j                  | _        |rt          j                    nt          ||d          | _        d S )Nr   rz   )r   r.   r   Fr   )	r*   r+   r$   r~   convr   rP   rI   r   )r/   rQ   r   r1   r   r   r3   s         r    r+   zBitDownsampleConv.__init__  st     	,qT6K`
 
 
	
 cBKMMM'\\abbb 				r"   c                 R    |                      |                     |                    S rU   )r   r   )r/   r]   s     r    r@   zBitDownsampleConv.forward  s    yy1&&&r"   )r   T)rB   rC   rD   r+   r@   rF   rG   s   @r    r   r     sT         
 
 
 
 
 
$' ' ' ' ' ' 'r"   r   c                   >     e Zd ZdZ	 	 d	 fd	Zd ZdedefdZ xZS )
BitStagez7
    A ResNet v2 stage composed by stacked layers.
    r   Nc	                    t                                                       |dv rdnd}	|j        dk    rt          }
nt          }
|}t          j                    | _        t          |          D ][}| 	                    |||          \  }}}| j        
                    t          |           |
|||||||	||	  	                   |}|}	\d S )N)r   r   r   r   
bottleneck)r   r   r   r   r   r   )r*   r+   r   r   r   r   
Sequentiallayersrange_get_updated_hyperparameters
add_moduler   )r/   rQ   r   r1   r   r   depthr   layer_dropoutr   	layer_clsprev_chs	layer_idxr   r   r3   s                  r    r+   zBitStage.__init__  s     	&&00a ,,*II7Imoou 	& 	&I595V5V6=6 62FNN K""I	 !%!-#1#1#1
 
 
   $H%NN+	& 	&r"   c                 B    |r	||         }nd}|dk    rd}|dk    }|||fS )zt
        Get the new hyper-parameters with respect to the previous ones and the index of the current layer.
        r6   r   r    )r/   r   r   r   r   r   s         r    r   z%BitStage._get_updated_hyperparameters  sA      	!*95NN N>>F"a~~55r"   rd   r   c                 T    |}t          | j                  D ]\  }} ||          }|S rU   )	enumerater   )r/   rd   r?   _layers        r    r@   zBitStage.forward)  s;    !$+.. 	/ 	/HAu 5..LLr"   )r   N)	rB   rC   rD   rE   r+   r   r   r@   rF   rG   s   @r    r   r     s          ,& ,& ,& ,& ,& ,&\6 6 6 V         r"   r   c            	       F     e Zd Zdef fdZd Z	 ddededed	efd
Z	 xZ
S )
BitEncoderrQ   c           
      z   t                                                       t          j        g           | _        |j        }d}d}d t          j        t          j	        d|j
        t          |j                                                          |j                  D             }t          t          |j        |j        |                    D ]k\  }\  }}}	|                     |||||          \  }
}}t%          |||
||||	          }|
}||z  }| j                            t)          |          |           ld S )N   r   c                 6    g | ]}|                                 S r   )tolist).0r]   s     r    
<listcomp>z'BitEncoder.__init__.<locals>.<listcomp>;  s0     
 
 
 HHJJ
 
 
r"   r   )r   r   r   r   )r*   r+   r   
ModuleListstagesr}   r   r   nplinspacer   sumdepthssplitr   ziphidden_sizesr   r   r   r   )r/   rQ   r   current_strider   layer_dropouts	stage_idxcurrent_depthcurrent_hidden_sizer   r1   r   stager3   s                r    r+   zBitEncoder.__init__1  sd   mB''( 
 
\"+a1FFMHZHZ"["[\\bbcicpqq
 
 

 OXv2NCCO
 O
 	: 	:JIJ':M .2-N-N>+>&. .*L&( !#+  E $Hf$NK""3y>>59999+	: 	:r"   c                 r    t          ||j        z            }|dk    rdnd}||j        k    r||z  }d}|||fS )Nr   r   r   )r   width_factoroutput_stride)r/   r   r   r   r   rQ   r1   r   s           r    r   z'BitEncoder._get_updated_hyperparametersW  sS     3f6I IJJ1nn!V111HFVX--r"   FTr?   output_hidden_statesreturn_dictr   c                     |rdnd }| j         D ]}|r||fz   } ||          }|r||fz   }|st          d ||fD                       S t          ||          S )Nr   c              3      K   | ]}||V  	d S rU   r   )r   vs     r    	<genexpr>z%BitEncoder.forward.<locals>.<genexpr>n  s"      SSqQ]]]]]SSr"   )last_hidden_staterv   )r   tupler	   )r/   r?   r   r   rv   stage_modules         r    r@   zBitEncoder.forward_  s     3< K 	6 	6L# @ - ?'<55LL 	<)\O;M 	TSS\=$ASSSSSS-*'
 
 
 	
r"   )FT)rB   rC   rD   r   r+   r   r   boolr	   r@   rF   rG   s   @r    r   r   0  s        $:y $: $: $: $: $: $:L. . . ]a
 
"
:>
UY
	'
 
 
 
 
 
 
 
r"   r   c                   .    e Zd ZU eed<   dZdZdgZd ZdS )BitPreTrainedModelrQ   bitr   rx   c                    t          |t          j                  r)t          j                            |j        dd           d S t          |t          j                  rt          j                            |j        t          j	        d                     |j
        ot          j                            |j                  \  }}|dk    rdt          j	        |          z  nd}t          j                            |j
        | |           d S d S t          |t          j        t          j        f          rLt          j                            |j        d           t          j                            |j
        d           d S d S )Nfan_outrelu)modenonlinearity   )ar   r   )r   r   Conv2dinitkaiming_normal_r;   Linearkaiming_uniform_r[   sqrtr)   _calculate_fan_in_and_fan_outuniform_BatchNorm2d	GroupNorm	constant_)r/   modulefan_inr   bounds        r    _init_weightsz BitPreTrainedModel._init_weights}  sF   fbi(( 	.G##FM	PV#WWWWW	** 	.G$$V]dill$CCC{&GAA&-PP	17!DIf----  ufe<<<<< '&  >?? 	.GfmQ///Gfk1-----	. 	.r"   N)	rB   rC   rD   r   __annotations__base_model_prefixmain_input_name_no_split_modulesr  r   r"   r    r  r  v  sE         $O(). . . . .r"   r  c            
       b     e Zd Z fdZe	 ddedee         dee         defd            Z	 xZ
S )	BitModelc                    t                                          |           || _        t          |          | _        t          |          | _        |j        dk    rt          ||j	        d                   nt          j                    | _        t          j        d          | _        |                                  d S )Nr{   r5   r|   )r   r   )r*   r+   rQ   rx   embedderr   encoderr   rI   r   r   rP   r   AdaptiveAvgPool2dr   	post_initr   s     r    r+   zBitModel.__init__  s       %f--!&))  O33 #68KB8OPPPP 		 *622r"   Nr   r   r   r   c                 P   ||n| j         j        }||n| j         j        }|                     |          }|                     |||          }|d         }|                     |          }|                     |          }|s||f|dd          z   S t          |||j                  S )Nr   r   r   r   )r   pooler_outputrv   )	rQ   r   use_return_dictr!  r"  r   r   r
   rv   )r/   r   r   r   embedding_outputencoder_outputsr   pooled_outputs           r    r@   zBitModel.forward  s    
 %9$D  $+Jj 	 &1%<kk$+B]==66,,3GU` ' 
 
 ,A. II&788$566 	L%}58KKK7/')7
 
 
 	
r"   NN)rB   rC   rD   r+   r   r   r   r   r
   r@   rF   rG   s   @r    r  r    s            " os
 
"
:B4.
^fgk^l
	1
 
 
 ^
 
 
 
 
r"   r  z
    BiT Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    )custom_introc                        e Zd Z fdZe	 	 	 	 d	deej                 deej                 dee	         dee	         de
f
d            Z xZS )
BitForImageClassificationc                    t                                          |           |j        | _        t          |          | _        t          j        t          j                    |j        dk    r%t          j        |j	        d         |j                  nt          j
                              | _        |                                  d S )Nr   r5   )r*   r+   
num_labelsr  r  r   r   Flattenr  r   rP   
classifierr$  r   s     r    r+   z"BitForImageClassification.__init__  s        +F##-JLLEKEVYZEZEZBIf)"-v/@AAA`b`k`m`m
 

 	r"   Nr   labelsr   r   r   c                 @   ||n| j         j        }|                     |||          }|r|j        n|d         }|                     |          }d}||                     ||| j                   }|s|f|dd         z   }	||f|	z   n|	S t          |||j                  S )a0  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr&  r   r   )losslogitsrv   )rQ   r(  r  r'  r3  loss_functionr   rv   )
r/   r   r4  r   r   outputsr+  r7  r6  r   s
             r    r@   z!BitForImageClassification.forward  s     &1%<kk$+B]((<>R`k(ll1<L--'!*//%%ffdkBBD 	DY,F'+'7D7V##VC3f\c\qrrrrr"   )NNNN)rB   rC   rD   r+   r   r   r   FloatTensor
LongTensorr   r   r@   rF   rG   s   @r    r/  r/    s        
 
 
 
 
  59-1/3&*s su01s )*s 'tn	s
 d^s 
.s s s ^s s s s sr"   r/  zL
    BiT backbone, to be used with frameworks like DETR and MaskFormer.
    c            
       f     e Zd ZdZ fdZe	 d	dedee         dee         de	fd            Z
 xZS )
BitBackboneFc                    t                                          |           t                                          |           t          |          | _        |j        g|j        z   | _        |                                  d S rU   )	r*   r+   _init_backboner  r  r}   r   num_featuresr$  r   s     r    r+   zBitBackbone.__init__  sp       v&&&F###23f6II 	r"   Nr   r   r   r   c                 @   ||n| j         j        }||n| j         j        }|                     |dd          }|j        }d}t          | j                  D ]\  }}|| j        v r|||         fz  }|s|f}	|r|	|j        fz  }	|	S t          ||r|j        ndd          S )aN  
        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoBackbone
        >>> import torch
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> processor = AutoImageProcessor.from_pretrained("google/bit-50")
        >>> model = AutoBackbone.from_pretrained("google/bit-50")

        >>> inputs = processor(image, return_tensors="pt")
        >>> outputs = model(**inputs)
        ```NTr&  r   )feature_mapsrv   
attentions)	rQ   r(  r   r  rv   r   stage_namesout_featuresr   )
r/   r   r   r   r9  rv   rB  idxr   r   s
             r    r@   zBitBackbone.forward  s    , &1%<kk$+B]$8$D  $+Jj 	 ((<dPT(UU-#D$455 	6 	6JC)))s!3 55 	"_F# 37022M%3GQ'//T
 
 
 	
r"   r,  )rB   rC   rD   has_attentionsr+   r   r   r   r   r   r@   rF   rG   s   @r    r=  r=    s         N     os-
 -
"-
:B4.-
^fgk^l-
	-
 -
 -
 ^-
 -
 -
 -
 -
r"   r=  )r/  r  r  r=  )Nr   r   r   )r6   F)r   )7rE   rm   r[   typingr   numpyr   r   r   r   activationsr   modeling_outputsr   r	   r
   r   modeling_utilsr   utilsr   r   utils.backbone_utilsr   configuration_bitr   
get_loggerrB   loggerr   r   r!   r  r$   r  rI   Moduler,   	MaxPool2drj   rx   r   r   r   r   r   r   r   r   r   r  r  r/  r=  __all__r   r"   r    <module>rU     s   @ ?                         ! ! ! ! ! !            . - - - - - , , , , , , , , 1 1 1 1 1 1 ( ( ( ( ( ( 
	H	%	%& &ERWY]R]L^ & & & &R- - - - -ry - - -`    R\   $0 0 0 0 029 0 0 0f
 
 
 
 
2< 
 
 
:/ / / / /BI / / /f U\ e T V[Vb    *% % % % %") % % %   A( A( A( A( A(bi A( A( A(HF F F F F F F FR' ' ' ' '	 ' ' '.G G G G Gry G G GTC
 C
 C
 C
 C
 C
 C
 C
L . . . . . . . .* .
 .
 .
 .
 .
! .
 .
 .
b   +s +s +s +s +s 2 +s +s +s\   
;
 ;
 ;
 ;
 ;
$m ;
 ;
 
;
| Y
X
Xr"   