
     `i>                        d Z ddlZddlmZ ddlZddlmZmZ ddlmZ ddl	m
Z
mZmZmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZ  ej        e          Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z  G d dej                  Z!e G d de                      Z"e G d de"                      Z# ed            G d! d"e"                      Z$ ed#            G d$ d%e"e                      Z%g d&Z&dS )'zPyTorch ResNet model.    N)Optional)Tensornn   )ACT2FN)BackboneOutputBaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)auto_docstringlogging)BackboneMixin   )ResNetConfigc                   H     e Zd Z	 ddededededef
 fd	Zd
edefdZ xZS )ResNetConvLayerr   r   reluin_channelsout_channelskernel_sizestride
activationc                    t                                                       t          j        |||||dz  d          | _        t          j        |          | _        |t          |         nt          j                    | _	        d S )N   F)r   r   paddingbias)
super__init__r   Conv2dconvolutionBatchNorm2dnormalizationr   Identityr   )selfr   r   r   r   r   	__class__s         ~/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/resnet/modeling_resnet.pyr   zResNetConvLayer.__init__(   s}     	9;vWbfgWgns
 
 
  ^L990:0F&,,BKMM    inputreturnc                     |                      |          }|                     |          }|                     |          }|S N)r!   r#   r   r%   r)   hidden_states      r'   forwardzResNetConvLayer.forward2   s?    ''..)),77|44r(   )r   r   r   )	__name__
__module____qualname__intstrr   r   r/   __classcell__r&   s   @r'   r   r   '   s        lrZ ZZ.1Z@CZQTZfiZ Z Z Z Z ZV         r(   r   c                   8     e Zd ZdZdef fdZdedefdZ xZS )ResNetEmbeddingszO
    ResNet Embeddings (stem) composed of a single aggressive convolution.
    configc                     t                                                       t          |j        |j        dd|j                  | _        t          j        ddd          | _	        |j        | _        d S )N   r   )r   r   r   r   r   )r   r   r   )
r   r   r   num_channelsembedding_size
hidden_actembedderr   	MaxPool2dpoolerr%   r9   r&   s     r'   r   zResNetEmbeddings.__init__>   so    '!6Aa\b\m
 
 
 lqAFFF"/r(   pixel_valuesr*   c                     |j         d         }|| j        k    rt          d          |                     |          }|                     |          }|S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)shaper<   
ValueErrorr?   rA   )r%   rC   r<   	embeddings       r'   r/   zResNetEmbeddings.forwardF   s\    #)!,4,,,w   MM,//	KK	**	r(   )	r0   r1   r2   __doc__r   r   r   r/   r5   r6   s   @r'   r8   r8   9   sp         0| 0 0 0 0 0 0F v        r(   r8   c                   B     e Zd ZdZd
dededef fdZdedefd	Z xZS )ResNetShortCutz
    ResNet shortcut, used to project the residual features to the correct size. If needed, it is also used to
    downsample the input using `stride=2`.
    r   r   r   r   c                     t                                                       t          j        ||d|d          | _        t          j        |          | _        d S )Nr   F)r   r   r   )r   r   r   r    r!   r"   r#   )r%   r   r   r   r&   s       r'   r   zResNetShortCut.__init__W   sP    9[,AV\chiii^L99r(   r)   r*   c                 Z    |                      |          }|                     |          }|S r,   )r!   r#   r-   s      r'   r/   zResNetShortCut.forward\   s.    ''..)),77r(   )r   )	r0   r1   r2   rH   r3   r   r   r/   r5   r6   s   @r'   rJ   rJ   Q   s         
: :C :s :C : : : : : :
V         r(   rJ   c            	       <     e Zd ZdZd
dedededef fdZd	 Z xZS )ResNetBasicLayerzO
    A classic ResNet's residual layer composed by two `3x3` convolutions.
    r   r   r   r   r   r   c                 P   t                                                       ||k    p|dk    }|rt          |||          nt          j                    | _        t          j        t          |||          t          ||d                     | _        t          |         | _
        d S )Nr   r   r   r   r   rJ   r   r$   shortcut
Sequentialr   layerr   r   )r%   r   r   r   r   should_apply_shortcutr&   s         r'   r   zResNetBasicLayer.__init__g   s     +| ; Jv{H]pN;VDDDDcecncpcp 	 ]KfEEEL,4HHH
 

 !,r(   c                     |}|                      |          }|                     |          }||z  }|                     |          }|S r,   rU   rS   r   r%   r.   residuals      r'   r/   zResNetBasicLayer.forwards   J    zz,//==** |44r(   )r   r   )	r0   r1   r2   rH   r3   r4   r   r/   r5   r6   s   @r'   rN   rN   b   sx         
- 
-C 
-s 
-C 
-Y\ 
- 
- 
- 
- 
- 
-      r(   rN   c                   L     e Zd ZdZ	 	 	 	 ddededed	ed
edef fdZd Z xZ	S )ResNetBottleNeckLayera  
    A classic ResNet's bottleneck layer composed by three `3x3` convolutions.

    The first `1x1` convolution reduces the input by a factor of `reduction` in order to make the second `3x3`
    convolution faster. The last `1x1` convolution remaps the reduced features to `out_channels`. If
    `downsample_in_bottleneck` is true, downsample will be in the first layer instead of the second layer.
    r   r      Fr   r   r   r   	reductiondownsample_in_bottleneckc           
         t                                                       ||k    p|dk    }||z  }|rt          |||          nt          j                    | _        t          j        t          ||d|r|nd          t          |||s|nd          t          ||dd                     | _        t          |         | _
        d S )Nr   rP   )r   r   )r   r   rR   )
r%   r   r   r   r   r_   r`   rV   reduces_channelsr&   s
            r'   r   zResNetBottleNeckLayer.__init__   s     	 +| ; Jv{'94H]pN;VDDDDcecncpcp 	 ]-1OgEnVVmn   ,.>UmGtvvstuuu,lVZ[[[
 

 !,r(   c                     |}|                      |          }|                     |          }||z  }|                     |          }|S r,   rX   rY   s      r'   r/   zResNetBottleNeckLayer.forward   r[   r(   )r   r   r^   F)
r0   r1   r2   rH   r3   r4   boolr   r/   r5   r6   s   @r'   r]   r]   |   s           ).- -- - 	-
 - - #'- - - - - -0      r(   r]   c                   N     e Zd ZdZ	 	 ddededededef
 fdZd	ed
efdZ xZ	S )ResNetStagez4
    A ResNet stage composed by stacked layers.
    r   r9   r   r   r   depthc                 ^   t                                                       j        dk    rt          nt          j        dk    r ||j        j                  }n ||j                  }t          j        |gfdt          |dz
            D             R  | _
        d S )N
bottleneck)r   r   r`   )r   r   c                 6    g | ]} j                    S )rQ   )r>   ).0_r9   rU   r   s     r'   
<listcomp>z(ResNetStage.__init__.<locals>.<listcomp>   s.    uuu_`55|HYZZZuuur(   r   )r   r   
layer_typer]   rN   r>   r`   r   rT   rangelayers)	r%   r9   r   r   r   rg   first_layerrU   r&   s	    ` `   @r'   r   zResNetStage.__init__   s     	)/):l)J)J%%P`,,%!,)/)H  KK  %\&U[UfgggKm
uuuuuudijorsjsdtdtuuu
 
 
r(   r)   r*   c                 4    |}| j         D ]} ||          }|S r,   )rp   )r%   r)   r.   rU   s       r'   r/   zResNetStage.forward   s/    [ 	/ 	/E 5..LLr(   )r   r   )
r0   r1   r2   rH   r   r3   r   r   r/   r5   r6   s   @r'   rf   rf      s          
 

 
 	

 
 
 
 
 
 
 
4V         r(   rf   c            	       @     e Zd Zdef fdZ	 d
dedededefd	Z xZ	S )ResNetEncoderr9   c           
          t                                                       t          j        g           | _        | j                            t          ||j        |j        d         |j	        rdnd|j
        d                              t          |j        |j        dd                    }t          ||j
        dd                    D ]3\  \  }}}| j                            t          ||||                     4d S )Nr   r   r   )r   rg   )rg   )r   r   r   
ModuleListstagesappendrf   r=   hidden_sizesdownsample_in_first_stagedepthszip)r%   r9   in_out_channelsr   r   rg   r&   s         r'   r   zResNetEncoder.__init__   s   mB''%#A&"<Cqq!mA&  	
 	
 	
 f163Fqrr3JKK25ov}UVUWUWGX2Y2Y 	\ 	\.'[,K{6;TYZZZ[[[[	\ 	\r(   FTr.   output_hidden_statesreturn_dictr*   c                     |rdnd }| j         D ]}|r||fz   } ||          }|r||fz   }|st          d ||fD                       S t          ||          S )N c              3      K   | ]}||V  	d S r,   r   )rk   vs     r'   	<genexpr>z(ResNetEncoder.forward.<locals>.<genexpr>   s"      SSqQ]]]]]SSr(   )last_hidden_statehidden_states)rw   tupler	   )r%   r.   r~   r   r   stage_modules         r'   r/   zResNetEncoder.forward   s     3< K 	6 	6L# @ - ?'<55LL 	<)\O;M 	TSS\=$ASSSSSS-*'
 
 
 	
r(   )FT)
r0   r1   r2   r   r   r   rd   r	   r/   r5   r6   s   @r'   rt   rt      s        \| \ \ \ \ \ \$ ]a
 
"
:>
UY
	'
 
 
 
 
 
 
 
r(   rt   c                   0    e Zd ZU eed<   dZdZddgZd ZdS )ResNetPreTrainedModelr9   resnetrC   r   rJ   c                    t          |t          j                  r)t          j                            |j        dd           d S t          |t          j                  rt          j                            |j        t          j	        d                     |j
        ot          j                            |j                  \  }}|dk    rdt          j	        |          z  nd}t          j                            |j
        | |           d S d S t          |t          j        t          j        f          rLt          j                            |j        d           t          j                            |j
        d           d S d S )Nfan_outr   )modenonlinearity   )ar   r   )
isinstancer   r    initkaiming_normal_weightLinearkaiming_uniform_mathsqrtr   _calculate_fan_in_and_fan_outuniform_r"   	GroupNorm	constant_)r%   modulefan_inrl   bounds        r'   _init_weightsz#ResNetPreTrainedModel._init_weights   sF   fbi(( 	.G##FM	PV#WWWWW	** 	.G$$V]dill$CCC{&GAA&-PP	17!DIf----  ufe<<<<< '&  >?? 	.GfmQ///Gfk1-----	. 	.r(   N)	r0   r1   r2   r   __annotations__base_model_prefixmain_input_name_no_split_modulesr   r   r(   r'   r   r      sH          $O*,<=. . . . .r(   r   c            
       b     e Zd Z fdZe	 ddedee         dee         defd            Z	 xZ
S )	ResNetModelc                    t                                          |           || _        t          |          | _        t          |          | _        t          j        d          | _	        | 
                                 d S )N)r   r   )r   r   r9   r8   r?   rt   encoderr   AdaptiveAvgPool2drA   	post_initrB   s     r'   r   zResNetModel.__init__  sh       (00$V,,*622r(   NrC   r~   r   r*   c                 &   ||n| j         j        }||n| j         j        }|                     |          }|                     |||          }|d         }|                     |          }|s||f|dd          z   S t          |||j                  S )Nr~   r   r   r   )r   pooler_outputr   )r9   r~   use_return_dictr?   r   rA   r
   r   )r%   rC   r~   r   embedding_outputencoder_outputsr   pooled_outputs           r'   r/   zResNetModel.forward  s    
 %9$D  $+Jj 	 &1%<kk$+B]==66,,3GU` ' 
 
 ,A.$566 	L%}58KKK7/')7
 
 
 	
r(   NN)r0   r1   r2   r   r   r   r   rd   r
   r/   r5   r6   s   @r'   r   r     s             os
 
"
:B4.
^fgk^l
	1
 
 
 ^
 
 
 
 
r(   r   z
    ResNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    )custom_introc                        e Zd Z fdZe	 	 	 	 d	deej                 deej                 dee	         dee	         de
f
d            Z xZS )
ResNetForImageClassificationc                    t                                          |           |j        | _        t          |          | _        t          j        t          j                    |j        dk    r%t          j        |j	        d         |j                  nt          j
                              | _        |                                  d S )Nr   )r   r   
num_labelsr   r   r   rT   Flattenr   ry   r$   
classifierr   rB   s     r'   r   z%ResNetForImageClassification.__init__:  s        +!&))-JLLEKEVYZEZEZBIf)"-v/@AAA`b`k`m`m
 

 	r(   NrC   labelsr~   r   r*   c                 @   ||n| j         j        }|                     |||          }|r|j        n|d         }|                     |          }d}||                     ||| j                   }|s|f|dd         z   }	||f|	z   n|	S t          |||j                  S )a0  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   r   )losslogitsr   )r9   r   r   r   r   loss_functionr   r   )
r%   rC   r   r~   r   outputsr   r   r   outputs
             r'   r/   z$ResNetForImageClassification.forwardF  s     &1%<kk$+B]++lAUcn+oo1<L--'!*//%%ffdkBBD 	DY,F'+'7D7V##VC3f\c\qrrrrr(   )NNNN)r0   r1   r2   r   r   r   torchFloatTensor
LongTensorrd   r   r/   r5   r6   s   @r'   r   r   3  s        
 
 
 
 
  59-1/3&*s su01s )*s 'tn	s
 d^s 
.s s s ^s s s s sr(   r   zO
    ResNet backbone, to be used with frameworks like DETR and MaskFormer.
    c            
       f     e Zd ZdZ fdZe	 d	dedee         dee         de	fd            Z
 xZS )
ResNetBackboneFc                 .   t                                          |           t                                          |           |j        g|j        z   | _        t          |          | _        t          |          | _	        | 
                                 d S r,   )r   r   _init_backboner=   ry   num_featuresr8   r?   rt   r   r   rB   s     r'   r   zResNetBackbone.__init__o  s       v&&&#23f6II(00$V,, 	r(   NrC   r~   r   r*   c                 j   ||n| j         j        }||n| j         j        }|                     |          }|                     |dd          }|j        }d}t          | j                  D ]\  }}	|	| j        v r|||         fz  }|s|f}
|r|
|j        fz  }
|
S t          ||r|j        ndd          S )a!  
        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoBackbone
        >>> import torch
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
        >>> model = AutoBackbone.from_pretrained(
        ...     "microsoft/resnet-50", out_features=["stage1", "stage2", "stage3", "stage4"]
        ... )

        >>> inputs = processor(image, return_tensors="pt")

        >>> outputs = model(**inputs)
        >>> feature_maps = outputs.feature_maps
        >>> list(feature_maps[-1].shape)
        [1, 2048, 7, 7]
        ```NTr   r   )feature_mapsr   
attentions)
r9   r   r~   r?   r   r   	enumeratestage_namesout_featuresr   )r%   rC   r~   r   r   r   r   r   idxstager   s              r'   r/   zResNetBackbone.forwardz  s   8 &1%<kk$+B]$8$D  $+Jj 	  ==66,,/dX\,]]-#D$455 	6 	6JC)))s!3 55 	"_F# 37022M%3GQ'//T
 
 
 	
r(   r   )r0   r1   r2   has_attentionsr   r   r   r   rd   r   r/   r5   r6   s   @r'   r   r   g  s         N	 	 	 	 	 os5
 5
"5
:B4.5
^fgk^l5
	5
 5
 5
 ^5
 5
 5
 5
 5
r(   r   )r   r   r   r   )'rH   r   typingr   r   r   r   activationsr   modeling_outputsr   r	   r
   r   modeling_utilsr   utilsr   r   utils.backbone_utilsr   configuration_resnetr   
get_loggerr0   loggerModuler   r8   rJ   rN   r]   rf   rt   r   r   r   r   __all__r   r(   r'   <module>r      sf                     ! ! ! ! ! !            . - - - - - , , , , , , , , 1 1 1 1 1 1 . . . . . . 
	H	%	%    bi   $    ry   0    RY   "    ry   4' ' ' ' 'BI ' ' 'T# # # # #") # # #L&
 &
 &
 &
 &
BI &
 &
 &
R . . . . .O . . .* $
 $
 $
 $
 $
' $
 $
 $
N   +s +s +s +s +s#8 +s +s +s\   
D
 D
 D
 D
 D
*M D
 D
 
D
N e
d
dr(   