
     `i$              	          d Z ddlZddlZddlmZ ddlmZmZ ddl	Z	ddl	m
Z
 ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZmZ ddlmZmZmZ ddlmZ  ej        e          Z e ed           G d de                                  Z!d Z"d Z#dHde	j$        de%de&de	j$        fdZ' G d de
j(                  Z) G d de
j(                  Z* G d  d!e
j(                  Z+ G d" d#e
j(                  Z, G d$ d%e
j(                  Z- G d& d'e
j(                  Z. G d( d)e
j(                  Z/ G d* d+e
j(                  Z0 G d, d-e
j(                  Z1 G d. d/e
j(                  Z2 G d0 d1e
j(                  Z3 G d2 d3e          Z4 G d4 d5e
j(                  Z5e G d6 d7e                      Z6e G d8 d9e6                      Z7 G d: d;e
j(                  Z8 G d< d=e
j(                  Z9 G d> d?e
j(                  Z: G d@ dAe
j(                  Z; G dB dCe
j(                  Z< edD           G dE dFe6                      Z=g dGZ>dS )Iz"PyTorch Swin2SR Transformer model.    N)	dataclass)OptionalUnion)nn   )ACT2FN)GradientCheckpointingLayer)BaseModelOutputImageSuperResolutionOutput)PreTrainedModel) find_pruneable_heads_and_indicesmeshgridprune_linear_layer)ModelOutputauto_docstringlogging   )Swin2SRConfigzQ
    Swin2SR encoder's outputs, with potential hidden states and attentions.
    )custom_introc                       e Zd ZU dZeej                 ed<   dZee	ej                          ed<   dZ
ee	ej                          ed<   dS )Swin2SREncoderOutputNlast_hidden_statehidden_states
attentions)__name__
__module____qualname__r   r   torchFloatTensor__annotations__r   tupler        /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/swin2sr/modeling_swin2sr.pyr   r   %   sg          6:x 129998<M8E%"345<<<59Ju01299999r#   r   c                     | j         \  }}}}|                     |||z  |||z  ||          } |                     dddddd                                                              d|||          }|S )z2
    Partitions the given input into windows.
    r   r   r            shapeviewpermute
contiguous)input_featurewindow_size
batch_sizeheightwidthnum_channelswindowss          r$   window_partitionr6   2   s     /<.A+J|!&&Fk);8Lk[g M ##Aq!Q155@@BBGGKYdfrssGNr#   c                     | j         d         }|                     d||z  ||z  |||          } |                     dddddd                                                              d|||          } | S )z?
    Merges windows to produce higher resolution features.
    r)   r   r   r   r&   r'   r(   r*   )r5   r0   r2   r3   r4   s        r$   window_reverser8   ?   sx     =$Lll2v4e{6JKYdfrssGooaAq!Q//::<<AA"feUabbGNr#           Finput	drop_probtrainingreturnc                     |dk    s|s| S d|z
  }| j         d         fd| j        dz
  z  z   }|t          j        || j        | j                  z   }|                                 |                     |          |z  }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
    r9   r   r   )r   )dtypedevice)r+   ndimr   randr?   r@   floor_div)r:   r;   r<   	keep_probr+   random_tensoroutputs          r$   	drop_pathrH   J   s     CxII[^
Q 77E
5EL Y Y YYMYYy!!M1FMr#   c                   j     e Zd ZdZd	dee         ddf fdZdej        dej        fdZ	de
fdZ xZS )
Swin2SRDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr;   r=   c                 V    t                                                       || _        d S N)super__init__r;   )selfr;   	__class__s     r$   rN   zSwin2SRDropPath.__init__b   s$    "r#   r   c                 8    t          || j        | j                  S rL   )rH   r;   r<   rO   r   s     r$   forwardzSwin2SRDropPath.forwardf   s    FFFr#   c                     d| j          S )Nzp=)r;   rO   s    r$   
extra_reprzSwin2SRDropPath.extra_repri   s    $DN$$$r#   rL   )r   r   r   __doc__r   floatrN   r   TensorrS   strrV   __classcell__rP   s   @r$   rJ   rJ   _   s        bb# #(5/ #T # # # # # #GU\ Gel G G G G%C % % % % % % % %r#   rJ   c                   ^     e Zd ZdZ fdZdeej                 deej	                 fdZ
 xZS )Swin2SREmbeddingsz?
    Construct the patch and optional position embeddings.
    c                 d   t                                                       t          |          | _        | j        j        }|j        r6t          j        t          j	        d|dz   |j
                            | _        nd | _        t          j        |j                  | _        |j        | _        d S )Nr   )rM   rN   Swin2SRPatchEmbeddingspatch_embeddingsnum_patchesuse_absolute_embeddingsr   	Parameterr   zeros	embed_dimposition_embeddingsDropouthidden_dropout_probdropoutr0   )rO   configrb   rP   s      r$   rN   zSwin2SREmbeddings.__init__r   s     6v > >+7) 	,')|EK;QR?TZTd4e4e'f'fD$$'+D$z&"<==!-r#   pixel_valuesr=   c                     |                      |          \  }}| j        
|| j        z   }|                     |          }||fS rL   )ra   rg   rj   )rO   rl   
embeddingsoutput_dimensionss       r$   rS   zSwin2SREmbeddings.forward   sN    (,(=(=l(K(K%
%#/#d&>>J\\*--
,,,r#   )r   r   r   rW   rN   r   r   r   r!   rY   rS   r[   r\   s   @r$   r^   r^   m   ss         . . . . .-HU->$? -E%,DW - - - - - - - -r#   r^   c                   l     e Zd Zd fd	Zdeej                 deej        ee	         f         fdZ
 xZS )r`   Tc                    t                                                       |j        }|j        |j        }}t          |t          j        j                  r|n||f}t          |t          j        j                  r|n||f}|d         |d         z  |d         |d         z  g}|| _	        |d         |d         z  | _
        t          j        ||j        ||          | _        |rt          j        |j                  nd | _        d S )Nr   r   )kernel_sizestride)rM   rN   rf   
image_size
patch_size
isinstancecollectionsabcIterablepatches_resolutionrb   r   Conv2d
projection	LayerNorm	layernorm)rO   rk   normalize_patchesr4   rt   ru   rz   rP   s          r$   rN   zSwin2SRPatchEmbeddings.__init__   s    '!'!2F4EJ
#-j+/:R#S#SqZZZdfpYq
#-j+/:R#S#SqZZZdfpYq
(mz!}<jmzZ[}>\]"4-a03Ea3HH)L&2BPZcmnnn;LVf&6777RVr#   rn   r=   c                     |                      |          }|j        \  }}}}||f}|                    d                              dd          }| j        |                     |          }||fS )Nr&   r   )r|   r+   flatten	transposer~   )rO   rn   _r2   r3   ro   s         r$   rS   zSwin2SRPatchEmbeddings.forward   sv    __Z00
(.1fe#UO''**44Q::
>%
33J,,,r#   )T)r   r   r   rN   r   r   r   r!   rY   intrS   r[   r\   s   @r$   r`   r`      s        W W W W W W	-(5+<"= 	-%V[\_V`H`Ba 	- 	- 	- 	- 	- 	- 	- 	-r#   r`   c                   (     e Zd ZdZ fdZd Z xZS )Swin2SRPatchUnEmbeddingszImage to Patch Unembeddingc                 `    t                                                       |j        | _        d S rL   )rM   rN   rf   )rO   rk   rP   s     r$   rN   z!Swin2SRPatchUnEmbeddings.__init__   s'    )r#   c                     |j         \  }}}|                    dd                              || j        |d         |d                   }|S )Nr   r&   r   )r+   r   r,   rf   )rO   rn   x_sizer1   height_widthr4   s         r$   rS   z Swin2SRPatchUnEmbeddings.forward   sP    1;1A.
L,))!Q//44ZQWXYQZ\bcd\eff
r#   r   r   r   rW   rN   rS   r[   r\   s   @r$   r   r      sM        %%* * * * *
      r#   r   c            	            e Zd ZdZej        fdee         dedej        ddf fdZ	d Z
d	ej        d
eeef         dej        fdZ xZS )Swin2SRPatchMerginga'  
    Patch Merging Layer.

    Args:
        input_resolution (`tuple[int]`):
            Resolution of input feature.
        dim (`int`):
            Number of input channels.
        norm_layer (`nn.Module`, *optional*, defaults to `nn.LayerNorm`):
            Normalization layer class.
    input_resolutiondim
norm_layerr=   Nc                     t                                                       || _        || _        t	          j        d|z  d|z  d          | _         |d|z            | _        d S )Nr'   r&   Fbias)rM   rN   r   r   r   Linear	reductionnorm)rO   r   r   r   rP   s       r$   rN   zSwin2SRPatchMerging.__init__   sa     01s7AG%@@@Jq3w''			r#   c                     |dz  dk    p|dz  dk    }|r.ddd|dz  d|dz  f}t           j                            ||          }|S )Nr&   r   r   )r   
functionalpad)rO   r/   r2   r3   
should_pad
pad_valuess         r$   	maybe_padzSwin2SRPatchMerging.maybe_pad   s\    qjAo:519>
 	IQ519a!<JM--mZHHMr#   r/   input_dimensionsc                    |\  }}|j         \  }}}|                    ||||          }|                     |||          }|d d dd ddd dd d f         }|d d dd ddd dd d f         }	|d d dd ddd dd d f         }
|d d dd ddd dd d f         }t          j        ||	|
|gd          }|                    |dd|z            }|                     |          }|                     |          }|S )Nr   r&   r   r)   r'   )r+   r,   r   r   catr   r   )rO   r/   r   r2   r3   r1   r   r4   input_feature_0input_feature_1input_feature_2input_feature_3s               r$   rS   zSwin2SRPatchMerging.forward   sD   ((5(;%
C%**:vulSS}feDD'14a4Aqqq(89'14a4Aqqq(89'14a4Aqqq(89'14a4Aqqq(89	?O_Ve"fhjkk%**:r1|;KLL}55		-00r#   )r   r   r   rW   r   r}   r!   r   ModulerN   r   r   rY   rS   r[   r\   s   @r$   r   r      s        
 
 XZWc ( (s (# (29 (hl ( ( ( ( ( (  U\ U3PS8_ Y^Ye        r#   r   c                        e Zd Zddgf fd	Z	 	 	 ddej        deej                 deej                 dee         d	e	ej                 f
d
Z
 xZS )Swin2SRSelfAttentionr   c           
         t                                                       ||z  dk    rt          d| d| d          || _        t	          ||z            | _        | j        | j        z  | _        t          |t          j	        j
                  r|n||f| _        || _        t          j        t          j        dt          j        |ddf          z                      | _        t          j        t          j        ddd	
          t          j        d	          t          j        d|d
                    | _        t          j        | j        d         dz
   | j        d         t          j                                                  }t          j        | j        d         dz
   | j        d         t          j                                                  }t          j        t7          ||gd                                        ddd                                                              d          }|d         dk    rG|d d d d d d dfxx         |d         dz
  z  cc<   |d d d d d d dfxx         |d         dz
  z  cc<   nV|dk    rP|d d d d d d dfxx         | j        d         dz
  z  cc<   |d d d d d d dfxx         | j        d         dz
  z  cc<   |dz  }t          j        |          t          j         t          j!        |          dz             z  tE          j         d          z  }|#                    tI          | j        %                                          j&                  }| '                    d|d           t          j        | j        d                   }	t          j        | j        d                   }
t          j        t7          |	|
gd                    }t          j(        |d          }|d d d d d f         |d d d d d f         z
  }|                    ddd                                          }|d d d d dfxx         | j        d         dz
  z  cc<   |d d d d dfxx         | j        d         dz
  z  cc<   |d d d d dfxx         d| j        d         z  dz
  z  cc<   |)                    d          }| '                    d|d           t          j        | j        | j        |j*        
          | _+        t          j        | j        | j        d
          | _,        t          j        | j        | j        |j*        
          | _-        t          j.        |j/                  | _0        d S )Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()
   r   r&   i   Tr   inplaceFr?   ij)indexing         ?relative_coords_table
persistentr)   relative_position_index)1rM   rN   
ValueErrornum_attention_headsr   attention_head_sizeall_head_sizerv   rw   rx   ry   r0   pretrained_window_sizer   rd   r   logoneslogit_scale
Sequentialr   ReLUcontinuous_position_bias_mlparangeint64rX   stackr   r-   r.   	unsqueezesignlog2absmathtonext
parametersr?   register_bufferr   sumqkv_biasquerykeyvaluerh   attention_probs_dropout_probrj   )rO   rk   r   	num_headsr0   r   relative_coords_hrelative_coords_wr   coords_hcoords_wcoordscoords_flattenrelative_coordsr   rP   s                  r$   rN   zSwin2SRSelfAttention.__init__   s   ?akCkk_hkkk   $- #&sY#7#7 !58PP%k;?3KLLlKKS^`kRl 	 '=#<	"uz9aQRBS7T7T2T(U(UVV,.MIa4((("'$*?*?*?3PY`eAfAfAf-
 -
)
 "L4+;A+>+B)CTEUVWEX`e`klllrrtt!L4+;A+>+B)CTEUVWEX`e`klllrrttK"35F!GRVWWWXXWQ1Z\\Yq\\	 	 "!$q((!!!!QQQ1*---1G1JQ1NN---!!!!QQQ1*---1G1JQ1NN----1__!!!!QQQ1*---1A!1Dq1HH---!!!!QQQ1*---1A!1Dq1HH---"J,--
59EZ;[;[^a;a0b0bbeienopeqeqq 	 !6 8 8d>_>j>j>l>l9m9m9s t t46KX]^^^ < 0 344< 0 344Xx&:TJJJKKvq11(AAAt4~aaaqqqj7QQ)11!Q::EEGG111a   D$4Q$7!$;;   111a   D$4Q$7!$;;   111a   A(8(;$;a$??   "1"5"5b"9"968O\abbbYt143EFO\\\
9T/1C%PPPYt143EFO\\\
z&"EFFr#   NFr   attention_mask	head_maskoutput_attentionsr=   c                    |j         \  }}}|                     |                              |d| j        | j                                      dd          }|                     |                              |d| j        | j                                      dd          }	|                     |                              |d| j        | j                                      dd          }
t          j	        
                    |d          t          j	        
                    |	d                              dd          z  }t          j        | j        t          j        d                                                    }||z  }|                     | j                                      d| j                  }|| j                            d                                       | j        d         | j        d         z  | j        d         | j        d         z  d          }|                    ddd                                          }d	t          j        |          z  }||                    d          z   }||j         d         }|                    ||z  || j        ||          |                    d                              d          z   }||                    d                              d          z   }|                    d| j        ||          }t          j	                            |d          }|                     |          }|||z  }t          j        ||
          }|                    dddd
                                          }|                                d d         | j        fz   }|                    |          }|r||fn|f}|S )Nr)   r   r&   r   g      Y@)maxr      r   )r+   r   r,   r   r   r   r   r   r   r   	normalizer   clampr   r   r   expr   r   r   r0   r-   r.   sigmoidr   softmaxrj   matmulsizer   )rO   r   r   r   r   r1   r   r4   query_layer	key_layervalue_layerattention_scoresr   relative_position_bias_tablerelative_position_bias
mask_shapeattention_probscontext_layernew_context_layer_shapeoutputss                       r$   rS   zSwin2SRSelfAttention.forward*  s    )6(;%
CJJ}%%T*b$":D<TUUYq!__ 	 HH]##T*b$":D<TUUYq!__ 	 JJ}%%T*b$":D<TUUYq!__ 	 =22;B2GG"-JaJa2 Kb K
 K

)B

 k$"28L8LMMMQQSS+k9'+'H'HIc'd'd'i'i((
 (
$ ">d>Z>_>_`b>c>c!d!i!iQ$"21"55t7G7JTM]^_M`7`bd"
 "
 "8!?!?1a!H!H!S!S!U!U!#em4J&K&K!K+.D.N.Nq.Q.QQ%'-a0J/44j(*d6NPSUX   ((++55a88 9  0.2J2J12M2M2W2WXY2Z2ZZ/44R9QSVX[\\ -//0@b/II ,,77  -	9O_kBB%--aAq99DDFF"/"4"4"6"6ss";t?Q>S"S%**+BCC6G]=/22mM]r#   NNF)r   r   r   rN   r   rY   r   r   boolr!   rS   r[   r\   s   @r$   r   r      s        TUWXSY ;G ;G ;G ;G ;G ;G@ 7;15,1E E|E !!23E E-.	E
 $D>E 
u|	E E E E E E E Er#   r   c                   P     e Zd Z fdZdej        dej        dej        fdZ xZS )Swin2SRSelfOutputc                     t                                                       t          j        ||          | _        t          j        |j                  | _        d S rL   )rM   rN   r   r   denserh   r   rj   rO   rk   r   rP   s      r$   rN   zSwin2SRSelfOutput.__init__t  sD    YsC((
z&"EFFr#   r   input_tensorr=   c                 Z    |                      |          }|                     |          }|S rL   r   rj   )rO   r   r   s      r$   rS   zSwin2SRSelfOutput.forwardy  s*    

=11]33r#   r   r   r   rN   r   rY   rS   r[   r\   s   @r$   r   r   s  sn        G G G G G
U\  RWR^        r#   r   c                        e Zd Zd fd	Zd Z	 	 	 ddej        deej                 deej                 d	ee	         d
e
ej                 f
dZ xZS )Swin2SRAttentionr   c           
         t                                                       t          ||||t          |t          j        j                  r|n||f          | _        t          ||          | _	        t                      | _        d S )Nrk   r   r   r0   r   )rM   rN   r   rv   rw   rx   ry   rO   r   rG   setpruned_heads)rO   rk   r   r   r0   r   rP   s         r$   rN   zSwin2SRAttention.__init__  s    (#0+/2JKK$B#9#9(*@A
 
 
	 (44EEr#   c                    t          |          dk    rd S t          || j        j        | j        j        | j                  \  }}t          | j        j        |          | j        _        t          | j        j        |          | j        _        t          | j        j	        |          | j        _	        t          | j
        j        |d          | j
        _        | j        j        t          |          z
  | j        _        | j        j        | j        j        z  | j        _        | j                            |          | _        d S )Nr   r   r   )lenr   rO   r   r   r  r   r   r   r   rG   r   r   union)rO   headsindexs      r$   prune_headszSwin2SRAttention.prune_heads  s    u::??F7490$)2OQUQb
 
u
 -TY_eDD	*49=%@@	,TY_eDD	.t{/@%QOOO )-	(EE

(R	%"&)"?$)B_"_	 -33E::r#   NFr   r   r   r   r=   c                     |                      ||||          }|                     |d         |          }|f|dd          z   }|S Nr   r   )rO   rG   )rO   r   r   r   r   self_outputsattention_outputr   s           r$   rS   zSwin2SRAttention.forward  sO     yy	K\]];;|AFF#%QRR(88r#   r   r   )r   r   r   rN   r
  r   rY   r   r   r   r!   rS   r[   r\   s   @r$   r   r     s        " " " " " "; ; ;* 7;15,1
 
|
 !!23
 E-.	

 $D>
 
u|	
 
 
 
 
 
 
 
r#   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )Swin2SRIntermediatec                 $   t                                                       t          j        |t	          |j        |z                      | _        t          |j        t                    rt          |j                 | _        d S |j        | _        d S rL   )rM   rN   r   r   r   	mlp_ratior   rv   
hidden_actrZ   r   intermediate_act_fnr   s      r$   rN   zSwin2SRIntermediate.__init__  sx    YsC(83(>$?$?@@
f'-- 	9'-f.?'@D$$$'-'8D$$$r#   r   r=   c                 Z    |                      |          }|                     |          }|S rL   )r   r  rR   s     r$   rS   zSwin2SRIntermediate.forward  s,    

=1100??r#   r   r\   s   @r$   r  r    s^        9 9 9 9 9U\ el        r#   r  c                   B     e Zd Z fdZdej        dej        fdZ xZS )Swin2SROutputc                     t                                                       t          j        t	          |j        |z            |          | _        t          j        |j                  | _	        d S rL   )
rM   rN   r   r   r   r  r   rh   ri   rj   r   s      r$   rN   zSwin2SROutput.__init__  sT    Ys6#3c#9::C@@
z&"<==r#   r   r=   c                 Z    |                      |          }|                     |          }|S rL   r   rR   s     r$   rS   zSwin2SROutput.forward  s*    

=11]33r#   r   r\   s   @r$   r  r    s^        > > > > >
U\ el        r#   r  c                        e Zd Z	 d fd	Zdeeeef         eeef         f         fdZd Zd Z	 	 dd
e	j
        deeef         dee	j                 dee         dee	j
        e	j
        f         f
dZ xZS )Swin2SRLayerr9   r   c           
         t                                                       || _        |                     |j        |j        f||f          \  }}|d         | _        |d         | _        t          |||| j        t          |t          j	        j
                  r|n||f          | _        t          j        ||j                  | _        |dk    rt!          |          nt          j                    | _        t'          ||          | _        t+          ||          | _        t          j        ||j                  | _        d S )Nr   r  epsr9   )rM   rN   r   _compute_window_shiftr0   
shift_sizer   rv   rw   rx   ry   	attentionr   r}   layer_norm_epslayernorm_beforerJ   IdentityrH   r  intermediater  rG   layernorm_after)
rO   rk   r   r   r   drop_path_rater!  r   r0   rP   s
            r$   rN   zSwin2SRLayer.__init__  s?    	 0"&"<"<!34z:6N#
 #
Z 'q>$Q-)(0+/2JKK$B#9#9(*@A
 
 
 !#Sf6K L L L<JS<P<P888VXVaVcVc/<<#FC00!|CV5JKKKr#   r=   c                     d t          | j        |          D             }d t          | j        ||          D             }||fS )Nc                 (    g | ]\  }}||k    r|n|S r"   r"   ).0rws      r$   
<listcomp>z6Swin2SRLayer._compute_window_shift.<locals>.<listcomp>  s(    eeedaAFFqqeeer#   c                 *    g | ]\  }}}||k    rd n|S r  r"   )r+  r,  r-  ss       r$   r.  z6Swin2SRLayer._compute_window_shift.<locals>.<listcomp>  s*    sssWQ1166aaqsssr#   )zipr   )rO   target_window_sizetarget_shift_sizer0   r!  s        r$   r   z"Swin2SRLayer._compute_window_shift  sR    eec$:OQc6d6deeessD<QS^`q8r8rsss
J&&r#   c           	         | j         dk    r\t          j        d||df|          }t          d| j                   t          | j         | j                    t          | j          d           f}t          d| j                   t          | j         | j                    t          | j          d           f}d}|D ]}|D ]}	||d d ||	d d f<   |dz  }t          || j                  }
|
                    d| j        | j        z            }
|
                    d          |
                    d          z
  }|                    |dk    d                              |dk    d          }nd }|S )Nr   r   r   r)   r&   g      Yr9   )	r!  r   re   slicer0   r6   r,   r   masked_fill)rO   r2   r3   r?   img_maskheight_sliceswidth_slicescountheight_slicewidth_slicemask_windows	attn_masks               r$   get_attn_maskzSwin2SRLayer.get_attn_mask  s   ?Q{Avua#8FFFHa$**++t''$/)9::t&--M a$**++t''$/)9::t&--L
 E -  #/  K@EHQQQk111<=QJEE ,Hd6FGGL',,R1ADDT1TUUL$..q11L4J4J14M4MMI!--i1nfEEQQR[_`R`beffIIIr#   c                     | j         || j         z  z
  | j         z  }| j         || j         z  z
  | j         z  }ddd|d|f}t          j                            ||          }||fS )Nr   )r0   r   r   r   )rO   r   r2   r3   	pad_right
pad_bottomr   s          r$   r   zSwin2SRLayer.maybe_pad  sp    %0@(@@DDTT	&$2B)BBdFVV
Ay!Z8
))-DDj((r#   NFr   r   r   r   c                    |\  }}|                                 \  }}}	|}
|                    ||||	          }|                     |||          \  }}|j        \  }}}}| j        dk    r&t          j        || j         | j         fd          }n|}t          || j                  }|                    d| j        | j        z  |	          }| 	                    |||j
                  }||                    |j                  }|                     ||||          }|d         }|                    d| j        | j        |	          }t          || j        ||          }| j        dk    r$t          j        || j        | j        fd          }n|}|d         dk    p|d         dk    }|r&|d d d |d |d d f                                         }|                    |||z  |	          }|                     |          }|
|                     |          z   }|                     |          }|                     |          }||                     |                     |                    z   }|r
||d	         fn|f}|S )
Nr   )r   r&   )shiftsdimsr)   r   )r   r   r(   r   )r   r,   r   r+   r!  r   rollr6   r0   r?  r?   r   r@   r"  r8   r.   r$  rH   r&  rG   r'  )rO   r   r   r   r   r2   r3   r1   r   channelsshortcutr   
height_pad	width_padshifted_hidden_stateshidden_states_windowsr>  attention_outputsr  attention_windowsshifted_windows
was_paddedlayer_outputlayer_outputss                           r$   rS   zSwin2SRLayer.forward  s    )"/"4"4"6"6
Ax  &**:vuhOO$(NN=&%$P$P!z&3&9#:y!?Q$)J}tFVY]YhXhEipv$w$w$w!!$1! !11FHX Y Y 5 : :2t?ORVRb?bdl m m&&z9MDW&XX	 !%:%ABBI NN!9iK\ + 
 
 -Q/,11"d6FHXZbcc():D<LjZcdd ?Q %
?DOUYUdCelr s s s /]Q&;*Q-!*;
 	V 1!!!WfWfufaaa2G H S S U U-22:v~xXX--.?@@ 4>>-#@#@@((77{{<00$t~~d6J6J<6X6X'Y'YY@Qf'8';<<XdWfr#   )r9   r   r   NF)r   r   r   rN   r!   r   r   r?  r   r   rY   r   r   r   rS   r[   r\   s   @r$   r  r    s       qrL L L L L L2'eTYZ]_bZbTcejknpsksetTtNu ' ' ' '
  8) ) ) 26,18 8|8  S/8 E-.	8
 $D>8 
u|U\)	*8 8 8 8 8 8 8 8r#   r  c                        e Zd ZdZd fd	Z	 	 ddej        deeef         de	ej
                 d	e	e         d
eej                 f
dZ xZS )Swin2SRStagezh
    This corresponds to the Residual Swin Transformer Block (RSTB) in the original implementation.
    r   c                    t                                                       | _        | _        t	          j        fdt          |          D                       | _        j        dk    rt	          j	        ddd          | _
        nj        dk    rt	          j        t	          j	        dz  ddd          t	          j        dd	          t	          j	        dz  dz  ddd
          t	          j        dd	          t	          j	        dz  ddd                    | _
        t          d          | _        t                    | _        d S )Nc           
      `    g | ]*}t          |d z  dk    rdn	j        d z            +S )r&   r   )rk   r   r   r   r!  r   )r  r0   )r+  irk   r   r   r   r   s     r$   r.  z)Swin2SRStage.__init__.<locals>.<listcomp>T  sd     
 
 
  !%5'%&UaZZqqf6HA6M+A  
 
 
r#   1convr   r   3convr'   皙?Tnegative_sloper   r   F)r   )rM   rN   rk   r   r   
ModuleListrangelayersresi_connectionr{   convr   	LeakyReLUr`   patch_embedr   patch_unembed)	rO   rk   r   r   depthr   rH   r   rP   s	    ``` ` `r$   rN   zSwin2SRStage.__init__O  sm   m
 
 
 
 
 
 
 
 u
 
 

 
 !W,,	#sAq!44DII#w..	#saxAq11C>>>	#(C1HaA66C>>>	#(CAq11 DI 2&ERRR5f==r#   NFr   r   r   r   r=   c                 N   |}|\  }}t          | j                  D ]'\  }}	|||         nd }
 |	|||
|          }|d         }(||||f}|                     ||          }|                     |          }|                     |          \  }}||z   }||f}|r||dd          z  }|S r  )	enumerater`  re  rb  rd  )rO   r   r   r   r   residualr2   r3   rX  layer_modulelayer_head_maskrR  ro   r   stage_outputss                  r$   rS   zSwin2SRStage.forwardq  s     !((55 	- 	-OA|.7.CillO(L8H/[lmmM)!,MM#UFE:**=:JKK		-00++M::q%0&(9: 	/]122..Mr#   r  rS  )r   r   r   rW   rN   r   rY   r!   r   r   r   r   rS   r[   r\   s   @r$   rU  rU  J  s          >  >  >  >  >  >L 26,1 |  S/ E-.	
 $D> 
u|	       r#   rU  c                        e Zd Z fdZ	 	 	 	 ddej        deeef         deej	                 dee
         d	ee
         d
ee
         deeef         fdZ xZS )Swin2SREncoderc                    t                                                       t          j                  | _        | _        d t          j        dj        t          j                  d          D             t          j        fdt          | j                  D                       | _        d| _        d S )Nc                 6    g | ]}|                                 S r"   )item)r+  xs     r$   r.  z+Swin2SREncoder.__init__.<locals>.<listcomp>  s     lllAqvvxxlllr#   r   cpu)r@   c                    g | ]}t          j        d          d         fj        |         j        |         t	          j        d|                   t	          j        d|dz                               d           S )r   r   N)rk   r   r   rf  r   rH   r   )rU  rf   depthsr   r   )r+  	stage_idxrk   dpr	grid_sizes     r$   r.  z+Swin2SREncoder.__init__.<locals>.<listcomp>  s         !(&/lIaL%A -	2$.y9!#fmJYJ&?"@"@3v}UdW`cdWdUdGeCfCf"fg+,    r#   F)rM   rN   r  ru  
num_stagesrk   r   linspacer(  r   r   r^  r_  stagesgradient_checkpointing)rO   rk   rx  rw  rP   s    ``@r$   rN   zSwin2SREncoder.__init__  s    fm,,ll63H#fmJ\J\ej!k!k!klllm      "'t!7!7  
 
 ',###r#   NFTr   r   r   r   output_hidden_statesreturn_dictr=   c                 p   d}|rdnd }|rdnd }	|r||fz  }t          | j                  D ]\\  }
}|||
         nd } |||||          }|d         }|d         }|d         |d         f}||fz  }|r||fz  }|r|	|dd          z  }	]|st          d |||	fD                       S t          |||	          S )	Nr"   r   r   r   r)   r&   c              3      K   | ]}||V  	d S rL   r"   )r+  vs     r$   	<genexpr>z)Swin2SREncoder.forward.<locals>.<genexpr>  s(      mmq_`_l_l_l_l_lmmr#   r   r   r   )rh  r{  r!   r   )rO   r   r   r   r   r}  r~  all_input_dimensionsall_hidden_statesall_self_attentionsrX  stage_modulerk  rR  ro   s                  r$   rS   zSwin2SREncoder.forward  s@     ""6@BBD$5?bb4 	2-!11(55 	9 	9OA|.7.CillO(L8H/[lmmM)!,M -a 0 1" 57H7LM %5$77 # 6!m%55!  9#}QRR'88# 	nmm]4EGZ$[mmmmmm#++*
 
 
 	
r#   )NFFT)r   r   r   rN   r   rY   r!   r   r   r   r   r   r   rS   r[   r\   s   @r$   rn  rn    s        , , , , ,4 26,1/4&*(
 (
|(
  S/(
 E-.	(

 $D>(
 'tn(
 d^(
 
u**	+(
 (
 (
 (
 (
 (
 (
 (
r#   rn  c                   ,    e Zd ZU eed<   dZdZdZd ZdS )Swin2SRPreTrainedModelrk   swin2srrl   Tc                    t          |t          j        t          j        f          rct          j        j                            |j        j        | j	        j
                   |j         |j        j                                         dS dS t          |t          j                  r?|j        j                                         |j        j                            d           dS dS )zInitialize the weights)stdNr   )rv   r   r   r{   r   inittrunc_normal_weightdatark   initializer_ranger   zero_r}   fill_)rO   modules     r$   _init_weightsz$Swin2SRPreTrainedModel._init_weights  s    fry")455 	*HM''(:@]'^^^{& &&((((( '&-- 	*K""$$$M$$S)))))	* 	*r#   N)	r   r   r   r   r    base_model_prefixmain_input_namesupports_gradient_checkpointingr  r"   r#   r$   r  r    sB         !$O&*#* * * * *r#   r  c                        e Zd Z fdZd Zd Zd Ze	 	 	 	 ddej	        de
ej	                 de
e         d	e
e         d
e
e         deeef         fd            Z xZS )Swin2SRModelc                    t                                          |           || _        |j        dk    r8|j        dk    r-t          j        g d                              dddd          }nt          j        dddd          }| 	                    d|d           |j
        | _
        t          j        |j        |j        ddd          | _        t          |          | _        t#          || j        j        j                  | _        t          j        |j        |j                  | _        t1          |          | _        t          j        |j        |j        ddd          | _        |                                  d S )	Nr   )gw#?g8EGr?gB`"?r   meanFr   )rx  r  )rM   rN   rk   r4   num_channels_outr   tensorr,   re   r   	img_ranger   r{   rf   first_convolutionr^   rn   rn  ra   rz   encoderr}   r#  r~   r   re  conv_after_body	post_init)rO   rk   r  rP   s      r$   rN   zSwin2SRModel.__init__  sW      !##(?1(D(D< 8 8 899>>q!QJJDD;q!Q**DVTe<<<)!#6+>@PRSUVXY!Z!Z+F33%f8X8klllf&6F<QRRR5f==!y)96;KQPQSTUU 	r#   c                     | j         j        S rL   )rn   ra   rU   s    r$   get_input_embeddingsz!Swin2SRModel.get_input_embeddings  s    //r#   c                     |                                 D ]/\  }}| j        j        |         j                            |           0dS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr  layerr"  r
  )rO   heads_to_pruner  r  s       r$   _prune_headszSwin2SRModel._prune_heads  sU    
 +0022 	C 	CLE5Lu%/;;EBBBB	C 	Cr#   c                    |                                 \  }}}}| j        j        }|||z  z
  |z  }|||z  z
  |z  }t          j                            |d|d|fd          }| j                            |          }||z
  | j        z  }|S )Nr   reflect)	r   rk   r0   r   r   r   r  type_asr  )	rO   rl   r   r2   r3   r0   modulo_pad_heightmodulo_pad_widthr  s	            r$   pad_and_normalizezSwin2SRModel.pad_and_normalize	  s    *//111fe k-(6K+??;N'%+*==L}((;KQPa7bdmnn y  ..$t+t~=r#   Nrl   r   r   r}  r~  r=   c                    ||n| j         j        }||n| j         j        }||n| j         j        }|                     |t          | j         j                            }|j        \  }}}}|                     |          }| 	                    |          }	| 
                    |	          \  }
}|                     |
|||||          }|d         }|                     |          }|                     |||f          }|                     |          |	z   }|s|f|dd          z   }|S t          ||j        |j                  S )Nr   r   r}  r~  r   r   r  )rk   r   r}  use_return_dictget_head_maskr  ru  r+   r  r  rn   r  r~   re  r  r
   r   r   )rO   rl   r   r   r}  r~  r   r2   r3   rn   embedding_outputr   encoder_outputssequence_outputrG   s                  r$   rS   zSwin2SRModel.forward  s    2C1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+B] &&y#dk6H2I2IJJ	*01fe --l;;++L99
-1__Z-H-H**,,/!5# ' 
 
 *!,..99,,_vuoNN..??*L 	%'/!""*==FM-)7&1
 
 
 	
r#   )NNNN)r   r   r   rN   r  r  r  r   r   r   r   r   r   r!   r
   rS   r[   r\   s   @r$   r  r    s            .0 0 0C C C    26,0/3&*5
 5
'5
 E-.5
 $D>	5

 'tn5
 d^5
 
uo%	&5
 5
 5
 ^5
 5
 5
 5
 5
r#   r  c                   (     e Zd ZdZ fdZd Z xZS )UpsamplezUpsample module.

    Args:
        scale (`int`):
            Scale factor. Supported scales: 2^n and 3.
        num_features (`int`):
            Channel number of intermediate features.
    c                 2   t                                                       || _        ||dz
  z  dk    rt          t	          t          j        |                              D ]_}|                     d| t          j	        |d|z  ddd                     |                     d| t          j
        d                     `d S |dk    r;t          j	        |d|z  ddd          | _        t          j
        d          | _        d S t          d	| d
          )Nr   r   convolution_r'   r   pixelshuffle_r&   	   zScale z/ is not supported. Supported scales: 2^n and 3.)rM   rN   scaler_  r   r   r   
add_moduler   r{   PixelShuffleconvolutionpixelshuffler   )rO   r  num_featuresrX  rP   s       r$   rN   zUpsample.__init__[  s)   
UQYA%%3ty//0011 I I 2q 2 2BIlAP\L\^_abde4f4fggg 3 3 3R_Q5G5GHHHHI I aZZ!yq<7GAqQQD " 2 2D\e\\\]]]r#   c                    | j         | j         dz
  z  dk    ryt          t          t          j        | j                                       D ]D} |                     d|           |          } |                     d|           |          }En5| j         dk    r*|                     |          }|                     |          }|S )Nr   r   r  r  r   )r  r_  r   r   r   __getattr__r  r  )rO   hidden_staterX  s      r$   rS   zUpsample.forwardj  s    J$*q.)a//3ty445566 S SCt//0Bq0B0BCCLQQDt//0C0C0CDD\RRS Z1__++L99L,,\::Lr#   r   r\   s   @r$   r  r  Q  sV         ^ ^ ^ ^ ^
 
 
 
 
 
 
r#   r  c                   (     e Zd ZdZ fdZd Z xZS )UpsampleOneStepa  UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle)

    Used in lightweight SR to save parameters.

    Args:
        scale (int):
            Scale factor. Supported scales: 2^n and 3.
        in_channels (int):
            Channel number of intermediate features.
        out_channels (int):
            Channel number of output features.
    c                     t                                                       t          j        ||dz  |z  ddd          | _        t          j        |          | _        d S )Nr&   r   r   )rM   rN   r   r{   rb  r  pixel_shuffle)rO   r  in_channelsout_channelsrP   s       r$   rN   zUpsampleOneStep.__init__  sS    IkE1H+DaANN	_U33r#   c                 Z    |                      |          }|                     |          }|S rL   )rb  r  )rO   rr  s     r$   rS   zUpsampleOneStep.forward  s)    IIaLLq!!r#   r   r\   s   @r$   r  r  w  sQ         4 4 4 4 4      r#   r  c                   $     e Zd Z fdZd Z xZS )PixelShuffleUpsamplerc                 8   t                                                       t          j        |j        |ddd          | _        t          j        d          | _        t          |j	        |          | _
        t          j        ||j        ddd          | _        d S Nr   r   Tr   )rM   rN   r   r{   rf   conv_before_upsamplerc  
activationr  upscaleupsampler  final_convolutionrO   rk   r  rP   s      r$   rN   zPixelShuffleUpsampler.__init__  s    $&If.>aQRTU$V$V!,t444 >>!#<9PRSUVXY!Z!Zr#   c                     |                      |          }|                     |          }|                     |          }|                     |          }|S rL   )r  r  r  r  )rO   r  rr  s      r$   rS   zPixelShuffleUpsampler.forward  sO    %%o66OOAMM!""1%%r#   r   r   r   rN   rS   r[   r\   s   @r$   r  r    sL        [ [ [ [ [      r#   r  c                   $     e Zd Z fdZd Z xZS )NearestConvUpsamplerc                    t                                                       |j        dk    rt          d          t	          j        |j        |ddd          | _        t	          j        d          | _	        t	          j        ||ddd          | _
        t	          j        ||ddd          | _        t	          j        ||ddd          | _        t	          j        ||j        ddd          | _        t	          j        dd          | _        d S )	Nr'   zNThe nearest+conv upsampler only supports an upscale factor of 4 at the moment.r   r   Tr   r[  r\  )rM   rN   r  r   r   r{   rf   r  rc  r  conv_up1conv_up2conv_hrr  r  lrelur  s      r$   rN   zNearestConvUpsampler.__init__  s    >Qmnnn$&If.>aQRTU$V$V!,t444	,aAFF	,aAFFy|Q1EE!#<9PRSUVXY!Z!Z\dCCC


r#   c           	         |                      |          }|                     |          }|                     |                     t          j        j                            |dd                              }|                     |                     t          j        j                            |dd                              }| 	                    |                     | 
                    |                              }|S )Nr&   nearest)scale_factormode)r  r  r  r  r   r   r   interpolater  r  r  )rO   r  reconstructions      r$   rS   zNearestConvUpsampler.forward  s    33ODD///::**MM%(-99/XY`i9jjkk
 
 **MM%(-99/XY`i9jjkk
 
 //

4<<;X;X0Y0YZZr#   r  r\   s   @r$   r  r    sL        D D D D D
 
 
 
 
 
 
r#   r  c                   $     e Zd Z fdZd Z xZS )PixelShuffleAuxUpsamplerc           	      ^   t                                                       |j        | _        t          j        |j        |ddd          | _        t          j        |j        |ddd          | _        t          j	        d          | _
        t          j        ||j        ddd          | _        t          j        t          j        d|ddd          t          j	        d                    | _        t          |j        |          | _        t          j        ||j        ddd          | _        d S r  )rM   rN   r  r   r{   r4   conv_bicubicrf   r  rc  r  conv_auxr   conv_after_auxr  r  r  r  r  s      r$   rN   z!PixelShuffleAuxUpsampler.__init__  s    ~If&9<AqQQ$&If.>aQRTU$V$V!,t444	,0CQ1MM mBIaq!Q,O,OQSQ]fjQkQkQkll >>!#<9PRSUVXY!Z!Zr#   c                    |                      |          }|                     |          }|                     |          }|                     |          }|                     |          }|                     |          d d d d d || j        z  d || j        z  f         |d d d d d || j        z  d || j        z  f         z   }|                     |          }||fS rL   )r  r  r  r  r  r  r  r  )rO   r  bicubicr2   r3   auxr  s          r$   rS   z PixelShuffleAuxUpsampler.forward  s    ##G,,33ODD///::mmO,,--c22MM/**111aaa1H6DL3H1HJ`ETXT`L`J`+`aaaa3ft|335Kut|7K5KKLM 	 //@@s""r#   r  r\   s   @r$   r  r    sL        
[ 
[ 
[ 
[ 
[# # # # # # #r#   r  zm
    Swin2SR Model transformer with an upsampler head on top for image super resolution and restoration.
    c                        e Zd Z fdZe	 	 	 	 	 	 ddeej                 deej                 deej                 dee	         dee	         dee	         d	e
eef         fd
            Z xZS )Swin2SRForImageSuperResolutionc                 D   t                                          |           t          |          | _        |j        | _        |j        | _        d}| j        dk    rt          ||          | _        n| j        dk    rt          ||          | _        ny| j        dk    r&t          |j        |j
        |j                  | _        nH| j        dk    rt          ||          | _        n't          j        |j
        |j        ddd          | _        |                                  d S )N@   r  pixelshuffle_auxpixelshuffledirectnearest+convr   r   )rM   rN   r  r  	upsamplerr  r  r  r  r  rf   r  r  r   r{   r  r  r  s      r$   rN   z'Swin2SRForImageSuperResolution.__init__  s      #F++)~ >^++1&,GGDMM^1114V\JJDMM^333+FNF<LfNeffDMM^~--0FFDMM &(Yv/?AXZ[]^`a%b%bD" 	r#   Nrl   r   labelsr   r}  r~  r=   c                    ||n| j         j        }d}|t          d          |j        dd         \  }}	| j         j        dk    r5t
          j                            ||| j        z  |	| j        z  fdd          }
| 	                    |||||          }|d	         }| j        d
v r| 
                    |          }n[| j        dk    r8| 
                    ||
||	          \  }}|| j	        j        z  | j	        j        z   }n||                     |          z   }|| j	        j        z  | j	        j        z   }|ddddd|| j        z  d|	| j        z  f         }|s|f|dd         z   }||f|z   n|S t          |||j        |j                  S )a  
        Example:
         ```python
         >>> import torch
         >>> import numpy as np
         >>> from PIL import Image
         >>> import requests

         >>> from transformers import AutoImageProcessor, Swin2SRForImageSuperResolution

         >>> processor = AutoImageProcessor.from_pretrained("caidas/swin2SR-classical-sr-x2-64")
         >>> model = Swin2SRForImageSuperResolution.from_pretrained("caidas/swin2SR-classical-sr-x2-64")

         >>> url = "https://huggingface.co/spaces/jjourney1125/swin2sr/resolve/main/samples/butterfly.jpg"
         >>> image = Image.open(requests.get(url, stream=True).raw)
         >>> # prepare image for the model
         >>> inputs = processor(image, return_tensors="pt")

         >>> # forward pass
         >>> with torch.no_grad():
         ...     outputs = model(**inputs)

         >>> output = outputs.reconstruction.data.squeeze().float().cpu().clamp_(0, 1).numpy()
         >>> output = np.moveaxis(output, source=0, destination=-1)
         >>> output = (output * 255.0).round().astype(np.uint8)  # float32 to uint8
         >>> # you can visualize `output` with `Image.fromarray`
         ```Nz'Training is not supported at the momentr&   r  r  F)r   r  align_cornersr  r   )r  r  r  r   )lossr  r   r   )rk   r  NotImplementedErrorr+   r  r   r   r  r  r  r  r  r  r  r   r   r   )rO   rl   r   r  r   r}  r~  r  r2   r3   r  r   r  r  r  rG   s                   r$   rS   z&Swin2SRForImageSuperResolution.forward  s   J &1%<kk$+B]%&OPPP$*122.; $666m//t|+UT\-AB#	 0  G ,,/!5#  
 
 "!*>SSS!]]?;;NN^111"&--&RW"X"XNC..1BBCC)D,B,B?,S,SSN'$,*@@4<CTT'111.E0E.EG]QUQ]I]G](]^ 	F$&4F)-)9TGf$$vE))!/)	
 
 
 	
r#   )NNNNNN)r   r   r   rN   r   r   r   r   
LongTensorr   r   r!   r   rS   r[   r\   s   @r$   r  r    s            4  5915-1,0/3&*R
 R
u01R
 E-.R
 )*	R

 $D>R
 'tnR
 d^R
 
u00	1R
 R
 R
 ^R
 R
 R
 R
 R
r#   r  )r  r  r  )r9   F)?rW   collections.abcrw   r   dataclassesr   typingr   r   r   r   activationsr   modeling_layersr	   modeling_outputsr
   r   modeling_utilsr   pytorch_utilsr   r   r   utilsr   r   r   configuration_swin2srr   
get_loggerr   loggerr   r6   r8   rY   rX   r   rH   r   rJ   r^   r`   r   r   r   r   r   r  r  r  rU  rn  r  r  r  r  r  r  r  r  __all__r"   r#   r$   <module>r     s   ) (      ! ! ! ! ! ! " " " " " " " "        ! ! ! ! ! ! 9 9 9 9 9 9 K K K K K K K K - - - - - - [ [ [ [ [ [ [ [ [ [ 9 9 9 9 9 9 9 9 9 9 0 0 0 0 0 0 
	H	%	%   
: : : : :; : :  :	 	 	   U\ e T V[Vb    *% % % % %bi % % %- - - - -	 - - -<- - - - -RY - - -6    ry   3 3 3 3 3") 3 3 3nC C C C C29 C C CN
 
 
 
 
	 
 
 
+ + + + +ry + + +^    ")    	 	 	 	 	BI 	 	 	z z z z z29 z z zzD D D D D- D D DN?
 ?
 ?
 ?
 ?
RY ?
 ?
 ?
D * * * * *_ * * *" h
 h
 h
 h
 h
) h
 h
 h
V# # # # #ry # # #L    bi   6    BI   "    29   6# # # # #ry # # #8   
n
 n
 n
 n
 n
%; n
 n
 
n
b W
V
Vr#   