
    %`i                     d   d dl Z d dlmZ d dlmZmZmZ d dlZd dlm	c m
Z d dlm	Z	mZ ddlmZmZ ddlmZ ddlmZmZ dd	lmZ d
dlmZmZmZ d
dlmZ d
dlmZmZ g dZ dej        dej        fdZ!ej"        #                    d           dej        dej        de$e%         dej        fdZ&ej"        #                    d            G d de	j'                  Z( G d de	j'                  Z)	 	 	 	 	 	 dTdeded ed!ede$e%         d"e%d#e$e%         d$e*d%e*d&ee         d'ee         d(eej                 d)e+defd*Z,ej"        #                    d+            G d, d-e	j'                  Z- G d. d/e-          Z. G d0 d1e	j'                  Z/ G d2 d3e/          Z0 G d4 d5e	j'                  Z1d6e$e%         d7e%d8e$e%         d"e$e%         de$e%         d9e*d:ee         d;e+d<ede1fd=Z2d>eiZ3 G d? d@e          Z4 G dA dBe          Z5 G dC dDe          Z6 G dE dFe          Z7 G dG dHe          Z8 G dI dJe          Z9 e             edKe4j:        fL          dddMd:ee4         d;e+d<ede1fdN                        Z; e             edKe5j:        fL          dddMd:ee5         d;e+d<ede1fdO                        Z< e             edKe6j:        fL          dddMd:ee6         d;e+d<ede1fdP                        Z= e             edKe7j:        fL          dddMd:ee7         d;e+d<ede1fdQ                        Z> e             edKe8j:        fL          dddMd:ee8         d;e+d<ede1fdR                        Z? e             edKe9j:        fL          dddMd:ee9         d;e+d<ede1fdS                        Z@dS )U    N)partial)AnyCallableOptional)nnTensor   )MLPPermute)StochasticDepth)ImageClassificationInterpolationMode)_log_api_usage_once   )register_modelWeightsWeightsEnum)_IMAGENET_CATEGORIES)_ovewrite_named_paramhandle_legacy_interface)SwinTransformerSwin_T_WeightsSwin_S_WeightsSwin_B_WeightsSwin_V2_T_WeightsSwin_V2_S_WeightsSwin_V2_B_Weightsswin_tswin_sswin_b	swin_v2_t	swin_v2_s	swin_v2_bxreturnc           
      @   | j         dd          \  }}}t          j        | ddd|dz  d|dz  f          } | ddd ddd dd d f         }| ddd ddd dd d f         }| ddd ddd dd d f         }| ddd ddd dd d f         }t          j        ||||gd          } | S )Nr   r	   .r   )shapeFpadtorchcat)r$   HW_x0x1x2x3s           w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchvision/models/swin_transformer.py_patch_merging_padr6   #   s    gbcclGAq!	a!Q1q5!QU+,,A	
31addAAA	B	
31addAAA	B	
31addAAA	B	
31addAAA	B	2r2r"B''AH    r6   relative_position_bias_tablerelative_position_indexwindow_sizec                     |d         |d         z  }| |         }|                     ||d          }|                    ddd                                                              d          }|S )Nr   r   r(   r	   )viewpermute
contiguous	unsqueeze)r8   r9   r:   Nrelative_position_biass        r5   _get_relative_position_biasrB   1   ss     	AQ'A9:QR388ArBB3;;Aq!DDOOQQ[[\]^^!!r7   rB   c                   `     e Zd ZdZej        fdededej        f         f fdZ	de
fdZ xZS )PatchMergingzPatch Merging Layer.
    Args:
        dim (int): Number of input channels.
        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
    dim
norm_layer.c                     t                                                       t          |            || _        t	          j        d|z  d|z  d          | _         |d|z            | _        d S N   r	   Fbiassuper__init__r   rE   r   Linear	reductionnormselfrE   rF   	__class__s      r5   rN   zPatchMerging.__init__E   g    D!!!1s7AG%@@@Jq3w''			r7   r$   c                 x    t          |          }|                     |          }|                     |          }|S z
        Args:
            x (Tensor): input tensor with expected layout of [..., H, W, C]
        Returns:
            Tensor with layout of [..., H/2, W/2, 2*C]
        )r6   rQ   rP   rS   r$   s     r5   forwardzPatchMerging.forwardL   s6     q!!IIaLLNN1r7   __name__
__module____qualname____doc__r   	LayerNormintr   ModulerN   r   rY   __classcell__rT   s   @r5   rD   rD   >             IK ( (C (Xc29n-E ( ( ( ( ( (
 
 
 
 
 
 
 
 
r7   rD   c                   `     e Zd ZdZej        fdededej        f         f fdZ	de
fdZ xZS )PatchMergingV2zPatch Merging Layer for Swin Transformer V2.
    Args:
        dim (int): Number of input channels.
        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
    rE   rF   .c                     t                                                       t          |            || _        t	          j        d|z  d|z  d          | _         |d|z            | _        d S rH   rL   rR   s      r5   rN   zPatchMergingV2.__init__`   rU   r7   r$   c                 x    t          |          }|                     |          }|                     |          }|S rW   )r6   rP   rQ   rX   s     r5   rY   zPatchMergingV2.forwardg   s6     q!!NN1IIaLLr7   rZ   rc   s   @r5   rf   rf   Y   rd   r7   rf           Tinput
qkv_weightproj_weightrA   	num_heads
shift_sizeattention_dropoutdropoutqkv_bias	proj_biaslogit_scaletrainingc           	         | j         \  }}}}|d         ||d         z  z
  |d         z  }|d         ||d         z  z
  |d         z  }t          j        | ddd|d|f          }|j         \  }}}}|                                }|d         |k    rd|d<   |d         |k    rd|d<   t	          |          dk    r't          j        ||d          |d          fd          }||d         z  ||d         z  z  }|                    |||d         z  |d         ||d         z  |d         |          }|                    dddddd          	                    ||z  |d         |d         z  |          }|L|	J|	
                                }	|	                                dz  }|	|d|z                                            t          j        |||	          }|	                    |                    d          |                    d          d|||z                                ddddd          }|d         |d         |d         }}}|t          j        |d
          t          j        |d
                              dd
          z  }t          j        |t%          j        d                                                    }||z  }n4|||z  dz  z  }|                    |                    dd
                    }||z   }t	          |          dk    r?|                    ||f          }d|d          f|d          |d          f|d          d	ff}d|d          f|d          |d          f|d          d	ff} d}!|D ]/}"| D ]*}#|!||"d         |"d         |#d         |#d         f<   |!dz  }!+0|                    ||d         z  |d         ||d         z  |d                   }|                    dddd          	                    ||d         |d         z            }|                    d          |                    d          z
  }|                    |dk    t3          d                                        |dk    t3          d                    }|                    |                    d          |z  |||                    d          |                    d                    }||                    d                              d          z   }|                    d
||                    d          |                    d                    }t          j        |d
          }t          j        |||          }|                    |                              dd          	                    |                    d          |                    d          |          }t          j        |||
          }t          j        |||          }|                    |||d         z  ||d         z  |d         |d         |          }|                    dddddd          	                    ||||          }t	          |          dk    r%t          j        ||d         |d         fd          }|d	d	d	|d	|d	d	f                                         }|S )a  
    Window based multi-head self attention (W-MSA) module with relative position bias.
    It supports both of shifted and non-shifted window.
    Args:
        input (Tensor[N, H, W, C]): The input tensor or 4-dimensions.
        qkv_weight (Tensor[in_dim, out_dim]): The weight tensor of query, key, value.
        proj_weight (Tensor[out_dim, out_dim]): The weight tensor of projection.
        relative_position_bias (Tensor): The learned relative position bias added to attention.
        window_size (List[int]): Window size.
        num_heads (int): Number of attention heads.
        shift_size (List[int]): Shift size for shifted window attention.
        attention_dropout (float): Dropout ratio of attention weight. Default: 0.0.
        dropout (float): Dropout ratio of output. Default: 0.0.
        qkv_bias (Tensor[out_dim], optional): The bias tensor of query, key, value. Default: None.
        proj_bias (Tensor[out_dim], optional): The bias tensor of projection. Default: None.
        logit_scale (Tensor[out_dim], optional): Logit scale of cosine attention for Swin Transformer V2. Default: None.
        training (bool, optional): Training flag used by the dropout parameters. Default: True.
    Returns:
        Tensor[N, H, W, C]: The output tensor after shifted window attention.
    r   r   )r   r	   )shiftsdims   r	   rI      Nr(   )rE   g      Y@)maxg      g      Yri   )prt   )r)   r*   r+   copysumr,   rollr<   r=   reshapeclonenumelzero_linearsize	normalize	transposeclampmathlogexpmatmul	new_zerosr?   masked_fillfloatsoftmaxrp   r>   )$rj   rk   rl   rA   r:   rm   rn   ro   rp   rq   rr   rs   rt   Br.   r/   Cpad_rpad_br$   r0   pad_Hpad_Wnum_windowslengthqkvqkvattn	attn_maskh_slicesw_slicescounthws$                                       r5   shifted_window_attentionr   t   s   F JAq!Q^a+a.00KNBE^a+a.00KNBE	eaAua/00AAueQ""J1~
11~
1 :Jq:a=.:a=.!AOOO KN*uA/FGK	q%;q>);q>5KPQN;RT_`aTbdeffA			!Q1a##++AO[^kZ[n=\^_``A 8#7>>##!!Q&!f*$%++---
(1j(
+
+C
++affQiiAy!y.
I
I
Q
QRSUVXY[\^_
`
`C!fc!fc!f!qA{1"%%%A2(>(>(>(H(HR(P(PPk+48E??CCCGGIIk!iD((xxB++,,((D
:KK//	Q(KN?Z]N*KzZ[}n^bMcdQ(KN?Z]N*KzZ[}n^bMcd 	 	A  6;	!A$1+qtad{23
 NN5KN#:KNEU`abUcLcepqrestt	%%aAq1199+{ST~XcdeXfGfgg	''**Y-@-@-C-CC	)))q.%--HHTTU^bcUcejkneoeopp	yyk1;	166RS99VWV\V\]^V_V_``i))!,,66q999yyYq		166!99==9Tr"""D9T.BBBDA  A&&..qvvayy!&&))QGGA	K++A		!w222A 	
q%;q>)5KN+BKPQNT_`aTbdeffA			!Q1a##++AueQ??A :Jq*Q-A!?fMMM 	
!!!RaR!QQQ,""$$AHr7   r   c                        e Zd ZdZ	 	 	 	 ddedee         dee         deded	ed
edef fdZd Z	d Z
dej        fdZdedefdZ xZS )ShiftedWindowAttentionz/
    See :func:`shifted_window_attention`.
    Tri   rE   r:   rn   rm   rq   rr   ro   rp   c	                    t                                                       t          |          dk    st          |          dk    rt          d          || _        || _        || _        || _        || _        t          j
        ||dz  |          | _        t          j
        |||          | _        |                                  |                                  d S )Nr	   z.window_size and shift_size must be of length 2rx   rJ   )rM   rN   len
ValueErrorr:   rn   rm   ro   rp   r   rO   r   proj#define_relative_position_bias_tabledefine_relative_position_index)
rS   rE   r:   rn   rm   rq   rr   ro   rp   rT   s
            r5   rN   zShiftedWindowAttention.__init__   s     	{q  C
OOq$8$8MNNN&$"!29S#'999Ic3Y777	00222++-----r7   c                     t          j        t          j        d| j        d         z  dz
  d| j        d         z  dz
  z  | j                            | _        t           j                            | j        d           d S )Nr	   r   r   {Gz?std)	r   	Parameterr,   zerosr:   rm   r8   inittrunc_normal_rS   s    r5   r   z:ShiftedWindowAttention.define_relative_position_bias_table	  sy    ,.LKT-a0014T=Ma=P9PST9TUW[Weff-
 -
) 	d?TJJJJJr7   c                    t          j        | j        d                   }t          j        | j        d                   }t          j        t          j        ||d                    }t          j        |d          }|d d d d d f         |d d d d d f         z
  }|                    ddd                                          }|d d d d dfxx         | j        d         dz
  z  cc<   |d d d d dfxx         | j        d         dz
  z  cc<   |d d d d dfxx         d| j        d         z  dz
  z  cc<   |                    d                                          }| 	                    d|           d S )Nr   r   ijindexingr	   r(   r9   )
r,   aranger:   stackmeshgridflattenr=   r>   r~   register_buffer)rS   coords_hcoords_wcoordscoords_flattenrelative_coordsr9   s          r5   r   z5ShiftedWindowAttention.define_relative_position_index  s   < 0 344< 0 344U^HhNNNOOvq11(AAAt4~aaaqqqj7QQ)11!Q::EEGG111a   D$4Q$7!$;;   111a   D$4Q$7!$;;   111a   A(8(;$;a$??   "1"5"5b"9"9"A"A"C"C68OPPPPPr7   r%   c                 B    t          | j        | j        | j                  S N)rB   r8   r9   r:   r   s    r5   get_relative_position_biasz1ShiftedWindowAttention.get_relative_position_bias  s$    *-t/KTM]
 
 	
r7   r$   c                     |                                  }t          || j        j        | j        j        || j        | j        | j        | j        | j	        | j        j
        | j        j
        | j                  S )
        Args:
            x (Tensor): Tensor with layout of [B, H, W, C]
        Returns:
            Tensor with same layout as input, i.e. [B, H, W, C]
        )rn   ro   rp   rq   rr   rt   )r   r   r   weightr   r:   rm   rn   ro   rp   rK   rt   rS   r$   rA   s      r5   rY   zShiftedWindowAttention.forward#  sp     "&!@!@!B!B'HOI"N"4LX]in]
 
 
 	
r7   TTri   ri   )r[   r\   r]   r^   r`   listboolr   rN   r   r   r,   r   r   rY   rb   rc   s   @r5   r   r      s         #&. .. #Y. I	.
 . . . !. . . . . . .4K K KQ Q Q
EL 
 
 
 


 
F 
 
 
 
 
 
 
 
r7   r   c                        e Zd ZdZ	 	 	 	 ddedee         dee         deded	ed
edef fdZd Z	de
j        fdZdefdZ xZS )ShiftedWindowAttentionV2z2
    See :func:`shifted_window_attention_v2`.
    Tri   rE   r:   rn   rm   rq   rr   ro   rp   c	           
      6   t                                          ||||||||           t          j        t	          j        dt	          j        |ddf          z                      | _        t          j        t          j	        ddd          t          j
        d          t          j	        d|d	                    | _        |rQ| j        j                                        d
z  }	| j        j        |	d|	z           j                                         d S d S )N)rq   rr   ro   rp   
   r   r	   i   TrJ   )inplaceFrx   )rM   rN   r   r   r,   r   onesrs   
SequentialrO   ReLUcpb_mlpr   rK   r   datar   )rS   rE   r:   rn   rm   rq   rr   ro   rp   r   rT   s             r5   rN   z!ShiftedWindowAttentionV2.__init__@  s    	/ 	 		
 		
 		
 <	"uz9aQRBS7T7T2T(U(UVV}Ia4((("'$*?*?*?3PY`eAfAfAf
 
  	<X]((**a/FHM&1v:-.399;;;;;	< 	<r7   c                    t          j        | j        d         dz
   | j        d         t           j                  }t          j        | j        d         dz
   | j        d         t           j                  }t          j        t          j        ||gd                    }|                    ddd                                                              d          }|d d d d d d dfxx         | j        d         dz
  z  cc<   |d d d d d d dfxx         | j        d         dz
  z  cc<   |dz  }t          j	        |          t          j
        t          j        |          dz             z  d	z  }|                     d
|           d S )Nr   r   )dtyper   r   r	      g      ?g      @relative_coords_table)r,   r   r:   float32r   r   r=   r>   r?   signlog2absr   )rS   relative_coords_hrelative_coords_wr   s       r5   r   z<ShiftedWindowAttentionV2.define_relative_position_bias_table_  s   !L4+;A+>+B)CTEUVWEX`e`mnnn!L4+;A+>+B)CTEUVWEX`e`mnnn %EN<MO`;alp,q,q,q r r 5 = =aA F F Q Q S S ] ]^_ ` `aaaAAAqj)))T-=a-@1-DD)))aaaAAAqj)))T-=a-@1-DD)))"J,--
59EZ;[;[^a;a0b0bbehh 	 	46KLLLLLr7   r%   c                     t          |                     | j                                      d| j                  | j        | j                  }dt          j        |          z  }|S )Nr(      )	rB   r   r   r<   rm   r9   r:   r,   sigmoid)rS   rA   s     r5   r   z3ShiftedWindowAttentionV2.get_relative_position_biaso  s`    !<LL34499"dnMM("
 "

 "$em4J&K&K!K%%r7   r$   c                     |                                  }t          || j        j        | j        j        || j        | j        | j        | j        | j	        | j        j
        | j        j
        | j        | j                  S )r   )rn   ro   rp   rq   rr   rs   rt   )r   r   r   r   r   r:   rm   rn   ro   rp   rK   rs   rt   r   s      r5   rY   z ShiftedWindowAttentionV2.forwardx  sv     "&!@!@!B!B'HOI"N"4LX]in(]
 
 
 	
r7   r   )r[   r\   r]   r^   r`   r   r   r   rN   r   r,   r   r   rY   rb   rc   s   @r5   r   r   ;  s          #&< << #Y< I	<
 < < < !< < < < < < <>M M M &EL & & & &
 
 
 
 
 
 
 
 
r7   r   c                        e Zd ZdZddddej        efdededee         dee         de	d	e	d
e	de	de
dej        f         de
dej        f         f fdZdefdZ xZS )SwinTransformerBlocka  
    Swin Transformer Block.
    Args:
        dim (int): Number of input channels.
        num_heads (int): Number of attention heads.
        window_size (List[int]): Window size.
        shift_size (List[int]): Shift size for shifted window attention.
        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4.0.
        dropout (float): Dropout rate. Default: 0.0.
        attention_dropout (float): Attention dropout rate. Default: 0.0.
        stochastic_depth_prob: (float): Stochastic depth rate. Default: 0.0.
        norm_layer (nn.Module): Normalization layer.  Default: nn.LayerNorm.
        attn_layer (nn.Module): Attention layer. Default: ShiftedWindowAttention
          @ri   rE   rm   r:   rn   	mlp_ratiorp   ro   stochastic_depth_probrF   .
attn_layerc                 t   t                                                       t          |             |	|          | _         |
||||||          | _        t          |d          | _         |	|          | _        t          |t          ||z            |gt          j        d |          | _        | j                                        D ]m}t          |t          j                  rQt          j                            |j                   |j        &t          j                            |j        d           nd S )N)ro   rp   row)activation_layerr   rp   gư>r   )rM   rN   r   norm1r   r   stochastic_depthnorm2r
   r`   r   GELUmlpmodules
isinstancerO   r   xavier_uniform_r   rK   normal_)rS   rE   rm   r:   rn   r   rp   ro   r   rF   r   mrT   s               r5   rN   zSwinTransformerBlock.__init__  s+    	D!!!Z__
J/
 
 
	 !00Eu M MZ__
sSy1137"'[_ipqqq!!## 	6 	6A!RY'' 6''1116%GOOAFO555		6 	6r7   r$   c                     ||                      |                     |                     |                              z   }||                      |                     |                     |                              z   }|S r   )r   r   r   r   r   rX   s     r5   rY   zSwinTransformerBlock.forward  sc    %%dii

1&>&>???%%dhhtzz!}}&=&=>>>r7   )r[   r\   r]   r^   r   r_   r   r`   r   r   r   ra   rN   r   rY   rb   rc   s   @r5   r   r     s         * #&'*/1|/E!6 !6!6 !6 #Y	!6
 I!6 !6 !6 !!6  %!6 S")^,!6 S")^,!6 !6 !6 !6 !6 !6F        r7   r   c                        e Zd ZdZddddej        efdededee         dee         de	d	e	d
e	de	de
dej        f         de
dej        f         f fdZdefdZ xZS )SwinTransformerBlockV2a  
    Swin Transformer V2 Block.
    Args:
        dim (int): Number of input channels.
        num_heads (int): Number of attention heads.
        window_size (List[int]): Window size.
        shift_size (List[int]): Shift size for shifted window attention.
        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4.0.
        dropout (float): Dropout rate. Default: 0.0.
        attention_dropout (float): Attention dropout rate. Default: 0.0.
        stochastic_depth_prob: (float): Stochastic depth rate. Default: 0.0.
        norm_layer (nn.Module): Normalization layer.  Default: nn.LayerNorm.
        attn_layer (nn.Module): Attention layer. Default: ShiftedWindowAttentionV2.
    r   ri   rE   rm   r:   rn   r   rp   ro   r   rF   .r   c                 ^    t                                          |||||||||	|

  
         d S )N)r   rp   ro   r   rF   r   )rM   rN   )rS   rE   rm   r:   rn   r   rp   ro   r   rF   r   rT   s              r5   rN   zSwinTransformerBlockV2.__init__  sN     	/"7!! 	 	
 	
 	
 	
 	
r7   r$   c                     ||                      |                     |                     |                              z   }||                      |                     |                     |                              z   }|S r   )r   r   r   r   r   rX   s     r5   rY   zSwinTransformerBlockV2.forward  se     %%djj1&>&>???%%djj!&=&=>>>r7   )r[   r\   r]   r^   r   r_   r   r`   r   r   r   ra   rN   r   rY   rb   rc   s   @r5   r   r     s         * #&'*/1|/G
 

 
 #Y	

 I
 
 
 !
  %
 S")^,
 S")^,
 
 
 
 
 
4        r7   r   c                       e Zd ZdZdddddddefdee         ded	ee         d
ee         dee         dedededededee	de
j        f                  dee	de
j        f                  de	de
j        f         f fdZd Z xZS )r   a;  
    Implements Swin Transformer from the `"Swin Transformer: Hierarchical Vision Transformer using
    Shifted Windows" <https://arxiv.org/abs/2103.14030>`_ paper.
    Args:
        patch_size (List[int]): Patch size.
        embed_dim (int): Patch embedding dimension.
        depths (List(int)): Depth of each Swin Transformer layer.
        num_heads (List(int)): Number of attention heads in different layers.
        window_size (List[int]): Window size.
        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4.0.
        dropout (float): Dropout rate. Default: 0.0.
        attention_dropout (float): Attention dropout rate. Default: 0.0.
        stochastic_depth_prob (float): Stochastic depth rate. Default: 0.1.
        num_classes (int): Number of classes for classification head. Default: 1000.
        block (nn.Module, optional): SwinTransformer Block. Default: None.
        norm_layer (nn.Module, optional): Normalization layer. Default: None.
        downsample_layer (nn.Module): Downsample layer (patch merging). Default: PatchMerging.
    r   ri   g?i  N
patch_size	embed_dimdepthsrm   r:   r   rp   ro   r   num_classesrF   .blockdownsample_layerc                    t                                                       t          |            |
| _        |t          }|t          t          j        d          }g }|                    t          j	        t          j
        d||d         |d         f|d         |d         f          t          g d           ||                               t          |          }d}t          t          |                    D ]}g }|d|z  z  }t          ||                   D ]X|	t          |          z  |dz
  z  }|                     ||||         |fd	|D             |||||
	  	                   |dz  }Y|                    t          j	        |            |t          |          dz
  k     r|                     |||                     t          j	        | | _        |dt          |          dz
  z  z  } ||          | _        t          g d          | _        t          j        d          | _        t          j        d          | _        t          j        ||
          | _        |                                 D ]m}t5          |t          j                  rQt          j                            |j        d           |j        $t          j                            |j                   nd S )Ngh㈵>)epsrx   r   r   )kernel_sizestride)r   r	   rx   r   r	   c                 0    g | ]}d z  dk    rdn|d z  S )r	   r    ).0r   i_layers     r5   
<listcomp>z,SwinTransformer.__init__.<locals>.<listcomp>C  s/    #[#[#[!1)9)9AAqAv#[#[#[r7   )r:   rn   r   rp   ro   r   rF   )r   rx   r   r	   r   r   ) rM   rN   r   r  r   r   r   r_   appendr   Conv2dr   r~   ranger   r   featuresrQ   r=   AdaptiveAvgPool2davgpoolFlattenr   rO   headr   r   r   r   r   rK   zeros_)rS   r   r   r  rm   r:   r   rp   ro   r   r  rF   r  r  layerstotal_stage_blocksstage_block_idi_stagestagerE   sd_probnum_featuresr   r  rT   s                          @r5   rN   zSwinTransformer.__init__  s     	D!!!&=(E 4888J"$M	yz!}jm.LV`abVceopqerUs   %%
9%% 	
 	
 	
 ![[S[[)) 	A 	AG%'Eaj(C 11 $ $/%2G2GGK]`aKabE!'*$/#[#[#[#[{#[#[#["+ '*;.5#-
 
 
   !#MM"-/000#f++/**..sJ??@@@v. 1Vq#99J|,,	|||,,+A..z!}}IlK88	 	+ 	+A!RY'' +%%ahD%9996%GNN16***		+ 	+r7   c                    |                      |          }|                     |          }|                     |          }|                     |          }|                     |          }|                     |          }|S r   )r  rQ   r=   r  r   r  rX   s     r5   rY   zSwinTransformer.forward_  sc    MM!IIaLLLLOOLLOOLLOOIIaLLr7   )r[   r\   r]   r^   rD   r   r`   r   r   r   r   ra   rN   rY   rb   rc   s   @r5   r   r     sE        4 #&'*9=485AM+ M+IM+ M+ S		M+
 9M+ #YM+ M+ M+ !M+  %M+ M+ Xc29n56M+ bi01M+ #3	>2M+ M+ M+ M+ M+ M+^      r7   r   r   r   r  r   weightsprogresskwargsc           
          |)t          |dt          |j        d                              t          d| |||||d|}	|*|	                    |                    |d                     |	S )Nr  
categories)r   r   r  rm   r:   r   T)r   
check_hashr
  )r   r   metar   load_state_dictget_state_dict)
r   r   r  rm   r:   r   r  r   r!  models
             r5   _swin_transformerr)  i  s     fmSl9S5T5TUUU 3   E g44hSW4XXYYYLr7   r#  c                   t    e Zd Z ed eeddej                  i eddddd	d
didddd          Z	e	Z
dS )r   z7https://download.pytorch.org/models/swin_t-704ceda3.pth      	crop_sizeresize_sizeinterpolationibr+  r+  Uhttps://github.com/pytorch/vision/tree/main/references/classification#swintransformerImageNet-1KguV^T@glW@zacc@1zacc@5gX9@g\([@YThese weights reproduce closely the results of the paper using a similar training recipe.
num_paramsmin_sizerecipe_metrics_ops
_file_size_docsurl
transformsr%  Nr[   r\   r]   r   r   r   r   BICUBIC_COMMON_METAIMAGENET1K_V1DEFAULTr
  r7   r5   r   r     s        GE73CO`Oh
 
 


""m##     t
 
 
  M* GGGr7   r   c                   t    e Zd Z ed eeddej                  i eddddd	d
didddd          Z	e	Z
dS )r   z7https://download.pytorch.org/models/swin_s-5e29d889.pthr+     r-  irr1  r2  r3  gCT@gףp=
X@r4  gZd{!@gx&g@r5  r6  r>  NrA  r
  r7   r5   r   r     s        GE73CO`Oh
 
 


""m##    !t
 
 
  M* GGGr7   r   c                   t    e Zd Z ed eeddej                  i eddddd	d
didddd          Z	e	Z
dS )r   z7https://download.pytorch.org/models/swin_b-68c6b09e.pthr+     r-  i<;r1  r2  r3  gh|?T@g)\(X@r4  g&1.@gt@r5  r6  r>  NrA  r
  r7   r5   r   r     s        GE73CO`Oh
 
 


""m##    !t
 
 
  M* GGGr7   r   c                   t    e Zd Z ed eeddej                  i eddddd	d
didddd          Z	e	Z
dS )r   z:https://download.pytorch.org/models/swin_v2_t-b137f0e2.pth     r-  iRrK  rK  Xhttps://github.com/pytorch/vision/tree/main/references/classification#swintransformer-v2r3  gS㥛T@g rX@r4  g(\@gMb([@r5  r6  r>  NrA  r
  r7   r5   r   r     s        GH73CO`Oh
 
 


""p##    !t
 
 
  M* GGGr7   r   c                   t    e Zd Z ed eeddej                  i eddddd	d
didddd          Z	e	Z
dS )r   z:https://download.pytorch.org/models/swin_v2_s-637d8ceb.pthrK  rL  r-  irM  rN  r3  g!rhT@gNbX94X@r4  gd;O'@gg@r5  r6  r>  NrA  r
  r7   r5   r   r             GH73CO`Oh
 
 


""p##    !t
 
 
  M* GGGr7   r   c                   t    e Zd Z ed eeddej                  i eddddd	d
didddd          Z	e	Z
dS )r   z:https://download.pytorch.org/models/swin_v2_b-781e5279.pthrK  i  r-  i=rM  rN  r3  gI+U@gK7X@r4  g33333S4@gˡEu@r5  r6  r>  NrA  r
  r7   r5   r   r   	  rP  r7   r   
pretrained)r  )r  r   c                 p    t                               |           } t          dddgdg dg dddgd| |d|S )	a  
    Constructs a swin_tiny architecture from
    `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows <https://arxiv.org/abs/2103.14030>`_.

    Args:
        weights (:class:`~torchvision.models.Swin_T_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.Swin_T_Weights` below for
            more details, and possible values. By default, no pre-trained
            weights are used.
        progress (bool, optional): If True, displays a progress bar of the
            download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.swin_transformer.SwinTransformer``
            base class. Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/swin_transformer.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.Swin_T_Weights
        :members:
    rI   `   r	   r	      r	   rx   rV           皙?r   r   r  rm   r:   r   r  r   r
  )r   verifyr)  r  r   r!  s      r5   r   r   "  se    . ##G,,G 
q6|| ..F!
 
 
 
 
r7   c                 p    t                               |           } t          dddgdg dg dddgd| |d|S )	a  
    Constructs a swin_small architecture from
    `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows <https://arxiv.org/abs/2103.14030>`_.

    Args:
        weights (:class:`~torchvision.models.Swin_S_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.Swin_S_Weights` below for
            more details, and possible values. By default, no pre-trained
            weights are used.
        progress (bool, optional): If True, displays a progress bar of the
            download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.swin_transformer.SwinTransformer``
            base class. Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/swin_transformer.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.Swin_S_Weights
        :members:
    rI   rT  r	   r	      r	   rW  rZ  333333?r\  r
  )r   r]  r)  r^  s      r5   r   r   H  se    . ##G,,G 
q6}} ..F!
 
 
 
 
r7   c                 p    t                               |           } t          dddgdg dg dddgd| |d|S )	a  
    Constructs a swin_base architecture from
    `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows <https://arxiv.org/abs/2103.14030>`_.

    Args:
        weights (:class:`~torchvision.models.Swin_B_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.Swin_B_Weights` below for
            more details, and possible values. By default, no pre-trained
            weights are used.
        progress (bool, optional): If True, displays a progress bar of the
            download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.swin_transformer.SwinTransformer``
            base class. Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/swin_transformer.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.Swin_B_Weights
        :members:
    rI      r`  rI   r   r       rZ        ?r\  r
  )r   r]  r)  r^  s      r5   r    r    n  se    . ##G,,G 
q6}} ..F!
 
 
 
 
r7   c                     t                               |           } t          dddgdg dg dddgd| |t          t          d
|S )	a  
    Constructs a swin_v2_tiny architecture from
    `Swin Transformer V2: Scaling Up Capacity and Resolution <https://arxiv.org/abs/2111.09883>`_.

    Args:
        weights (:class:`~torchvision.models.Swin_V2_T_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.Swin_V2_T_Weights` below for
            more details, and possible values. By default, no pre-trained
            weights are used.
        progress (bool, optional): If True, displays a progress bar of the
            download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.swin_transformer.SwinTransformer``
            base class. Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/swin_transformer.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.Swin_V2_T_Weights
        :members:
    rI   rT  rU  rW  r   r[  
r   r   r  rm   r:   r   r  r   r  r  r
  )r   r]  r)  r   rf   r^  s      r5   r!   r!     sk    .  &&w//G q6|| ..F!$'    r7   c                     t                               |           } t          dddgdg dg dddgd| |t          t          d
|S )	a  
    Constructs a swin_v2_small architecture from
    `Swin Transformer V2: Scaling Up Capacity and Resolution <https://arxiv.org/abs/2111.09883>`_.

    Args:
        weights (:class:`~torchvision.models.Swin_V2_S_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.Swin_V2_S_Weights` below for
            more details, and possible values. By default, no pre-trained
            weights are used.
        progress (bool, optional): If True, displays a progress bar of the
            download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.swin_transformer.SwinTransformer``
            base class. Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/swin_transformer.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.Swin_V2_S_Weights
        :members:
    rI   rT  r`  rW  r   rb  ri  r
  )r   r]  r)  r   rf   r^  s      r5   r"   r"     sk    .  &&w//G q6}} ..F!$'    r7   c                     t                               |           } t          dddgdg dg dddgd| |t          t          d
|S )	a  
    Constructs a swin_v2_base architecture from
    `Swin Transformer V2: Scaling Up Capacity and Resolution <https://arxiv.org/abs/2111.09883>`_.

    Args:
        weights (:class:`~torchvision.models.Swin_V2_B_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.Swin_V2_B_Weights` below for
            more details, and possible values. By default, no pre-trained
            weights are used.
        progress (bool, optional): If True, displays a progress bar of the
            download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.swin_transformer.SwinTransformer``
            base class. Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/swin_transformer.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.Swin_V2_B_Weights
        :members:
    rI   rd  r`  re  r   rg  ri  r
  )r   r]  r)  r   rf   r^  s      r5   r#   r#     sk    .  &&w//G q6}} ..F!$'    r7   )ri   ri   NNNT)Ar   	functoolsr   typingr   r   r   r,   torch.nn.functionalr   
functionalr*   r   ops.miscr
   r   ops.stochastic_depthr   transforms._presetsr   r   utilsr   _apir   r   r   _metar   _utilsr   r   __all__r6   fxwrapr   r`   rB   ra   rD   rf   r   r   r   r   r   r   r   r   r)  rC  r   r   r   r   r   r   rD  r   r   r    r!   r"   r#   r
  r7   r5   <module>rz     s          * * * * * * * * * *                   # # # # # # # # 2 2 2 2 2 2 H H H H H H H H ' ' ' ' ' ' 6 6 6 6 6 6 6 6 6 6 ' ' ' ' ' ' B B B B B B B B  "%, 5<     " # # #""',"IN"dhildm"
\" " " " + , , ,    29   6    RY   F  #!%"&*.p ppp p #	p
 cp p S	p p p vp p %,'p p p p p pf ( ) ) )N
 N
 N
 N
 N
RY N
 N
 N
bS
 S
 S
 S
 S
5 S
 S
 S
l6 6 6 6 629 6 6 6r/ / / / /1 / / /dj j j j jbi j j jZS	 I Cy	
 c ! k"      > &
    [   2    [   2    [   2       2       2       2 ,0L!MNNN26 ! ! !x/ !$ !Y\ !ap ! ! ! ON !H ,0L!MNNN26 ! ! !x/ !$ !Y\ !ap ! ! ! ON !H ,0L!MNNN26 ! ! !x/ !$ !Y\ !ap ! ! ! ON !H ,0A0O!PQQQ8<t # # #(#45 # #_b #gv # # # RQ #L ,0A0O!PQQQ8<t # # #(#45 # #_b #gv # # # RQ #L ,0A0O!PQQQ8<t # # #(#45 # #_b #gv # # # RQ # # #r7   