
     `ie              	          d Z ddlZddlmZ ddlmZmZ ddlZddlm	Z	 ddl
mZmZmZ ddlmZmZ dd	lmZ dd
lmZmZ ddlmZmZ ddlmZ  ej        e          Ze ed           G d de                                  Zd=dej        de de!dej        fdZ" G d de	j#                  Z$ G d de	j#                  Z% G d de	j#                  Z& G d d e	j#                  Z' G d! d"e	j#                  Z( G d# d$e	j#                  Z) G d% d&e	j#                  Z* G d' d(e	j#                  Z+ G d) d*e	j#                  Z, G d+ d,e	j#                  Z- G d- d.e	j#                  Z. G d/ d0e	j#                  Z/ G d1 d2e	j#                  Z0 G d3 d4e	j#                  Z1e G d5 d6e                      Z2e G d7 d8e2                      Z3 ed9           G d: d;e2                      Z4g d<Z5dS )>zPyTorch CvT model.    N)	dataclass)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )$ImageClassifierOutputWithNoAttentionModelOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging   )	CvtConfigzV
    Base class for model's outputs, with potential hidden states and attentions.
    )custom_introc                       e Zd ZU dZdZeej                 ed<   dZ	eej                 ed<   dZ
eeej        df                  ed<   dS )BaseModelOutputWithCLSTokenz
    cls_token_value (`torch.FloatTensor` of shape `(batch_size, 1, hidden_size)`):
        Classification token at the output of the last layer of the model.
    Nlast_hidden_statecls_token_value.hidden_states)__name__
__module____qualname____doc__r   r   torchFloatTensor__annotations__r   r   tuple     x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/cvt/modeling_cvt.pyr   r   #   sq          
 6:x 1299937OXe/0777=AM8E%"3S"89:AAAAAr#   r           Finput	drop_probtrainingreturnc                     |dk    s|s| S d|z
  }| j         d         fd| j        dz
  z  z   }|t          j        || j        | j                  z   }|                                 |                     |          |z  }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
    r%   r   r   )r   )dtypedevice)shapendimr   randr+   r,   floor_div)r&   r'   r(   	keep_probr-   random_tensoroutputs          r$   	drop_pathr5   5   s     CxII[^
Q 77E
5EL Y Y YYMYYy!!M1FMr#   c                   j     e Zd ZdZd	dee         ddf fdZdej        dej        fdZ	de
fdZ xZS )
CvtDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr'   r)   c                 V    t                                                       || _        d S N)super__init__r'   )selfr'   	__class__s     r$   r;   zCvtDropPath.__init__M   s$    "r#   r   c                 8    t          || j        | j                  S r9   )r5   r'   r(   )r<   r   s     r$   forwardzCvtDropPath.forwardQ   s    FFFr#   c                     d| j          S )Nzp=r'   )r<   s    r$   
extra_reprzCvtDropPath.extra_reprT   s    $DN$$$r#   r9   )r   r   r   r   r   floatr;   r   Tensorr?   strrB   __classcell__r=   s   @r$   r7   r7   J   s        bb# #(5/ #T # # # # # #GU\ Gel G G G G%C % % % % % % % %r#   r7   c                   (     e Zd ZdZ fdZd Z xZS )CvtEmbeddingsz'
    Construct the CvT embeddings.
    c                     t                                                       t          |||||          | _        t	          j        |          | _        d S )N)
patch_sizenum_channels	embed_dimstridepadding)r:   r;   CvtConvEmbeddingsconvolution_embeddingsr   Dropoutdropout)r<   rK   rL   rM   rN   rO   dropout_rater=   s          r$   r;   zCvtEmbeddings.__init__]   sT    &7!	Z`jq'
 '
 '
# z,//r#   c                 Z    |                      |          }|                     |          }|S r9   )rQ   rS   )r<   pixel_valueshidden_states      r$   r?   zCvtEmbeddings.forwardd   s,    22<@@||L11r#   r   r   r   r   r;   r?   rF   rG   s   @r$   rI   rI   X   sQ         0 0 0 0 0      r#   rI   c                   (     e Zd ZdZ fdZd Z xZS )rP   z"
    Image to Conv Embedding.
    c                    t                                                       t          |t          j        j                  r|n||f}|| _        t          j        |||||          | _	        t          j
        |          | _        d S )N)kernel_sizerN   rO   )r:   r;   
isinstancecollectionsabcIterablerK   r   Conv2d
projection	LayerNormnormalization)r<   rK   rL   rM   rN   rO   r=   s         r$   r;   zCvtConvEmbeddings.__init__o   sz    #-j+/:R#S#SqZZZdfpYq
$)L)\blsttt\)44r#   c                 <   |                      |          }|j        \  }}}}||z  }|                    |||                              ddd          }| j        r|                     |          }|                    ddd                              ||||          }|S Nr      r   )ra   r-   viewpermuterc   )r<   rV   
batch_sizerL   heightwidthhidden_sizes          r$   r?   zCvtConvEmbeddings.forwardv   s    |442>2D/
L&%un#((\;OOWWXY[\^_`` 	<--l;;L#++Aq!4499*lTZ\abbr#   rX   rG   s   @r$   rP   rP   j   sQ         5 5 5 5 5
 
 
 
 
 
 
r#   rP   c                   $     e Zd Z fdZd Z xZS )CvtSelfAttentionConvProjectionc           	          t                                                       t          j        |||||d|          | _        t          j        |          | _        d S )NF)r[   rO   rN   biasgroups)r:   r;   r   r`   convolutionBatchNorm2drc   )r<   rM   r[   rO   rN   r=   s        r$   r;   z'CvtSelfAttentionConvProjection.__init__   sa    9#
 
 
  ^I66r#   c                 Z    |                      |          }|                     |          }|S r9   )rr   rc   r<   rW   s     r$   r?   z&CvtSelfAttentionConvProjection.forward   s.    ''55)),77r#   r   r   r   r;   r?   rF   rG   s   @r$   rn   rn      sG        7 7 7 7 7      r#   rn   c                       e Zd Zd ZdS ) CvtSelfAttentionLinearProjectionc                     |j         \  }}}}||z  }|                    |||                              ddd          }|S re   )r-   rg   rh   )r<   rW   ri   rL   rj   rk   rl   s          r$   r?   z(CvtSelfAttentionLinearProjection.forward   sN    2>2D/
L&%un#((\;OOWWXY[\^_``r#   N)r   r   r   r?   r"   r#   r$   rx   rx      s#            r#   rx   c                   &     e Zd Zd fd	Zd Z xZS )CvtSelfAttentionProjectiondw_bnc                     t                                                       |dk    rt          ||||          | _        t	                      | _        d S )Nr|   )r:   r;   rn   convolution_projectionrx   linear_projection)r<   rM   r[   rO   rN   projection_methodr=   s         r$   r;   z#CvtSelfAttentionProjection.__init__   sQ    ''*HT_ahjp*q*qD'!A!C!Cr#   c                 Z    |                      |          }|                     |          }|S r9   )r~   r   ru   s     r$   r?   z"CvtSelfAttentionProjection.forward   s.    22<@@--l;;r#   )r|   rv   rG   s   @r$   r{   r{      sR        D D D D D D      r#   r{   c                   .     e Zd Z	 d fd	Zd Zd Z xZS )CvtSelfAttentionTc                    t                                                       |dz  | _        || _        || _        || _        t          |||||dk    rdn|          | _        t          |||||          | _        t          |||||          | _	        t          j        |||	          | _        t          j        |||	          | _        t          j        |||	          | _        t          j        |
          | _        d S )Ng      avglinear)r   )rp   )r:   r;   scalewith_cls_tokenrM   	num_headsr{   convolution_projection_queryconvolution_projection_keyconvolution_projection_valuer   Linearprojection_queryprojection_keyprojection_valuerR   rS   )r<   r   rM   r[   	padding_q
padding_kvstride_q	stride_kvqkv_projection_methodqkv_biasattention_drop_rater   kwargsr=   s                r$   r;   zCvtSelfAttention.__init__   s     	_
,"",F*?5*H*HhhNc-
 -
 -
) +E{J	Mb+
 +
 +
' -G{J	Mb-
 -
 -
) !#	)YX N N N i	98LLL "	)YX N N Nz"566r#   c                     |j         \  }}}| j        | j        z  }|                    ||| j        |                              dddd          S )Nr   rf   r   r
   )r-   rM   r   rg   rh   )r<   rW   ri   rl   _head_dims         r$   "rearrange_for_multi_head_attentionz3CvtSelfAttention.rearrange_for_multi_head_attention   sS    %1%7"
K>T^3  [$.(SS[[\]_`bcefgggr#   c                 r   | j         rt          j        |d||z  gd          \  }}|j        \  }}}|                    ddd                              ||||          }|                     |          }|                     |          }	|                     |          }
| j         rHt          j	        ||	fd          }	t          j	        ||fd          }t          j	        ||
fd          }
| j
        | j        z  }|                     |                     |	                    }	|                     |                     |                    }|                     |                     |
                    }
t          j        d|	|g          | j        z  }t          j        j                            |d          }|                     |          }t          j        d||
g          }|j        \  }}}}|                    dddd                                                              ||| j        |z            }|S )	Nr   r   rf   dimzbhlk,bhtk->bhltzbhlt,bhtv->bhlvr
   )r   r   splitr-   rh   rg   r   r   r   catrM   r   r   r   r   r   einsumr   r   
functionalsoftmaxrS   
contiguous)r<   rW   rj   rk   	cls_tokenri   rl   rL   keyqueryvaluer   attention_scoreattention_probscontextr   s                   r$   r?   zCvtSelfAttention.forward   s"    	X&+k,FUN@SUV&W&W#I|0<0B-
K#++Aq!4499*lTZ\abb--l;;11,??11,?? 	9Iy%0a888E)Y,!444CIy%0a888E>T^3778M8Me8T8TUU55d6I6I#6N6NOO778M8Me8T8TUU,'85#,GG$*T(-55o25NN,,77,0?E2JKK&}1k1//!Q1--88::??
KY]YgjrYrssr#   T)r   r   r   r;   r   r?   rF   rG   s   @r$   r   r      sd         '7 '7 '7 '7 '7 '7Rh h h      r#   r   c                   (     e Zd ZdZ fdZd Z xZS )CvtSelfOutputz
    The residual connection is defined in CvtLayer instead of here (as is the case with other models), due to the
    layernorm applied before each block.
    c                     t                                                       t          j        ||          | _        t          j        |          | _        d S r9   )r:   r;   r   r   denserR   rS   )r<   rM   	drop_rater=   s      r$   r;   zCvtSelfOutput.__init__  sA    Yy)44
z),,r#   c                 Z    |                      |          }|                     |          }|S r9   r   rS   r<   rW   input_tensors      r$   r?   zCvtSelfOutput.forward	  s*    zz,//||L11r#   rX   rG   s   @r$   r   r      sQ         
- - - - -
      r#   r   c                   .     e Zd Z	 d fd	Zd Zd Z xZS )CvtAttentionTc                     t                                                       t          |||||||||	|
|          | _        t	          ||          | _        t                      | _        d S r9   )r:   r;   r   	attentionr   r4   setpruned_heads)r<   r   rM   r[   r   r   r   r   r   r   r   r   r   r=   s                r$   r;   zCvtAttention.__init__  sr     	)!
 
 $Iy99EEr#   c                    t          |          dk    rd S t          || j        j        | j        j        | j                  \  }}t          | j        j        |          | j        _        t          | j        j        |          | j        _        t          | j        j	        |          | j        _	        t          | j
        j        |d          | j
        _        | j        j        t          |          z
  | j        _        | j        j        | j        j        z  | j        _        | j                            |          | _        d S )Nr   r   r   )lenr   r   num_attention_headsattention_head_sizer   r   r   r   r   r4   r   all_head_sizeunion)r<   headsindexs      r$   prune_headszCvtAttention.prune_heads0  s   u::??F74>5t~7Y[_[l
 
u
  2$.2FNN/0BEJJ1$.2FNN.t{/@%QOOO .2^-ORUV[R\R\-\*'+~'IDNLn'n$ -33E::r#   c                 `    |                      |||          }|                     ||          }|S r9   )r   r4   )r<   rW   rj   rk   self_outputattention_outputs         r$   r?   zCvtAttention.forwardB  s1    nn\65AA;;{LAAr#   r   )r   r   r   r;   r   r?   rF   rG   s   @r$   r   r     sa         " " " " " "@; ; ;$             r#   r   c                   $     e Zd Z fdZd Z xZS )CvtIntermediatec                     t                                                       t          j        |t	          ||z                      | _        t          j                    | _        d S r9   )r:   r;   r   r   intr   GELU
activation)r<   rM   	mlp_ratior=   s      r$   r;   zCvtIntermediate.__init__I  sJ    Yy#i).C*D*DEE
'))r#   c                 Z    |                      |          }|                     |          }|S r9   )r   r   ru   s     r$   r?   zCvtIntermediate.forwardN  s*    zz,//|44r#   rv   rG   s   @r$   r   r   H  sG        $ $ $ $ $
      r#   r   c                   $     e Zd Z fdZd Z xZS )	CvtOutputc                     t                                                       t          j        t	          ||z            |          | _        t          j        |          | _        d S r9   )r:   r;   r   r   r   r   rR   rS   )r<   rM   r   r   r=   s       r$   r;   zCvtOutput.__init__U  sN    Ys9y#8999EE
z),,r#   c                 d    |                      |          }|                     |          }||z   }|S r9   r   r   s      r$   r?   zCvtOutput.forwardZ  s4    zz,//||L11#l2r#   rv   rG   s   @r$   r   r   T  sG        - - - - -
      r#   r   c                   ,     e Zd ZdZ	 d fd	Zd Z xZS )CvtLayerzb
    CvtLayer composed by attention layers, normalization and multi-layer perceptrons (mlps).
    Tc                    t                                                       t          |||||||||	|
||          | _        t	          ||          | _        t          |||          | _        |dk    rt          |          nt          j
                    | _        t          j        |          | _        t          j        |          | _        d S )Nr%   rA   )r:   r;   r   r   r   intermediater   r4   r7   r   Identityr5   rb   layernorm_beforelayernorm_after)r<   r   rM   r[   r   r   r   r   r   r   r   r   r   drop_path_rater   r=   s                  r$   r;   zCvtLayer.__init__f  s    " 	%!
 
 ,IyAA	9i@@BPSVBVBV~>>>>\^\g\i\i "Y 7 7!|I66r#   c                 <   |                      |                     |          ||          }|}|                     |          }||z   }|                     |          }|                     |          }|                     ||          }|                     |          }|S r9   )r   r   r5   r   r   r4   )r<   rW   rj   rk   self_attention_outputr   layer_outputs          r$   r?   zCvtLayer.forward  s     $!!,//!
 !

 1>>*:;; (,6 ++L99((66 {{<>>~~l33r#   r   rX   rG   s   @r$   r   r   a  s\         & %7 %7 %7 %7 %7 %7N      r#   r   c                   $     e Zd Z fdZd Z xZS )CvtStagec           	      &    t                                                        _        | _         j        j         j                 r=t          j        t          j        dd j        j	        d                              _        t          j         j                 j         j                  j        dk    rj        nj	         j        dz
           j	         j                 j         j                 j         j                            _        d t          j        dj         j                 j        |         d          D             t          j         fdt+          j         j                           D               _        d S )	Nr   r   r   )rK   rN   rL   rM   rO   rT   c                 6    g | ]}|                                 S r"   )item).0xs     r$   
<listcomp>z%CvtStage.__init__.<locals>.<listcomp>  s-     
 
 
AFFHH
 
 
r#   cpu)r,   c                     g | ]}t          j        j                 j        j                 j        j                 j        j                 j        j                 j        j                 j        j                 j	        j                 j
        j                 j        j                 j        j                 j                 j        j                 j        j                            S ))r   rM   r[   r   r   r   r   r   r   r   r   r   r   r   )r   r   stagerM   
kernel_qkvr   r   r   r   r   r   r   r   r   r   )r   r   configdrop_path_ratesr<   s     r$   r   z%CvtStage.__init__.<locals>.<listcomp>  s       " ! $.tz:$.tz: & 1$* =$.tz:%0<$.tz:#_TZ8*0*Ftz*R#_TZ8(.(B4:(N$.tz:#24:#>$.tz:#)#3DJ#?    r#   )r:   r;   r   r   r   r   	Parameterr   randnrM   rI   patch_sizespatch_striderL   patch_paddingr   	embeddinglinspacer   depth
Sequentialrangelayers)r<   r   r   r   r=   s   `` @r$   r;   zCvtStage.__init__  s   
; , 	X\%+aDK<QRT<U*V*VWWDN&)$*5&tz204
a,,VEUVZV`cdVdEe&tz2(4)$*5
 
 

 
#nQ0Edj0QSYS_`eSfotuuu
 
 
 m     " v|DJ788#  
r#   c                 :   d }|                      |          }|j        \  }}}}|                    ||||z                                ddd          }| j        j        | j                 r4| j                            |dd          }t          j	        ||fd          }| j
        D ]} ||||          }|}| j        j        | j                 rt          j        |d||z  gd          \  }}|                    ddd                              ||||          }||fS )Nr   rf   r   r   r   )r   r-   rg   rh   r   r   r   expandr   r   r   r   )	r<   rW   r   ri   rL   rj   rk   layerlayer_outputss	            r$   r?   zCvtStage.forward  s:   	~~l332>2D/
L&%#((\6E>RRZZ[\^_abcc; , 	G--j"bAAI 9i%>AFFFL[ 	) 	)E!E,>>M(LL; , 	X&+k,FUN@SUV&W&W#I|#++Aq!4499*lTZ\abbY&&r#   rv   rG   s   @r$   r   r     sH        (
 (
 (
 (
 (
T' ' ' ' ' ' 'r#   r   c                   &     e Zd Z fdZddZ xZS )
CvtEncoderc                     t                                                       || _        t          j        g           | _        t          t          |j                            D ]*}| j        	                    t          ||                     +d S r9   )r:   r;   r   r   
ModuleListstagesr   r   r   appendr   )r<   r   	stage_idxr=   s      r$   r;   zCvtEncoder.__init__  s    mB''s6<0011 	< 	<IKx	::;;;;	< 	<r#   FTc                     |rdnd }|}d }t          | j                  D ]\  }} ||          \  }}|r||fz   }|st          d |||fD                       S t          |||          S )Nr"   c              3      K   | ]}||V  	d S r9   r"   )r   vs     r$   	<genexpr>z%CvtEncoder.forward.<locals>.<genexpr>  s(      bbqTUTaTaTaTaTabbr#   r   r   r   )	enumerater  r!   r   )	r<   rV   output_hidden_statesreturn_dictall_hidden_statesrW   r   r   stage_modules	            r$   r?   zCvtEncoder.forward  s    "6@BBD#	!*4;!7!7 	H 	HA&2l<&@&@#L)# H$5$G! 	cbb\9>O$Pbbbbbb**%+
 
 
 	
r#   )FTrv   rG   s   @r$   r   r     sL        < < < < <
 
 
 
 
 
 
 
r#   r   c                   .    e Zd ZU eed<   dZdZdgZd ZdS )CvtPreTrainedModelr   cvtrV   r   c                    t          |t          j        t          j        f          rit          j                            |j        j        d| j        j	                  |j        _        |j
         |j
        j                                         dS dS t          |t          j                  r?|j
        j                                         |j        j                            d           dS t          |t                    rY| j        j        |j                 rDt          j                            |j        j        d| j        j	                  |j        _        dS dS dS )zInitialize the weightsr%   )meanstdNg      ?)r\   r   r   r`   inittrunc_normal_weightdatar   initializer_rangerp   zero_rb   fill_r   r   r   )r<   modules     r$   _init_weightsz CvtPreTrainedModel._init_weights  s6   fry")455 	!#!6!6v}7IPSY]YdYv!6!w!wFM{& &&((((( '&-- 	K""$$$M$$S))))))) 	{$V\2 (*(=(=$)9V )> ) ) %%%	 	 r#   N)	r   r   r   r   r    base_model_prefixmain_input_name_no_split_modulesr  r"   r#   r$   r  r    sD         $O#    r#   r  c                        e Zd Zd
 fd	Zd Ze	 	 	 ddeej                 dee	         dee	         de
eef         fd	            Z xZS )CvtModelTc                     t                                          |           || _        t          |          | _        |                                  dS )zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)r:   r;   r   r   encoder	post_init)r<   r   add_pooling_layerr=   s      r$   r;   zCvtModel.__init__  sI    
 	   !&))r#   c                     |                                 D ]/\  }}| j        j        |         j                            |           0dS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr$  r   r   r   )r<   heads_to_pruner   r   s       r$   _prune_headszCvtModel._prune_heads!  sU    
 +0022 	C 	CLE5Lu%/;;EBBBB	C 	Cr#   NrV   r  r  r)   c                     ||n| j         j        }||n| j         j        }|t          d          |                     |||          }|d         }|s|f|dd          z   S t          ||j        |j                  S )Nz You have to specify pixel_valuesr  r  r   r   r	  )r   r  use_return_dict
ValueErrorr$  r   r   r   )r<   rV   r  r  encoder_outputssequence_outputs         r$   r?   zCvtModel.forward)  s     %9$D  $+Jj 	 &1%<kk$+B]?@@@,,!5# ' 
 

 *!, 	<#%(;;;*-+;)7
 
 
 	
r#   r   )NNN)r   r   r   r;   r*  r   r   r   rD   boolr   r!   r   r?   rF   rG   s   @r$   r"  r"    s             C C C  04/3&*	
 
u|,
 'tn
 d^	

 
u11	2
 
 
 ^
 
 
 
 
r#   r"  z
    Cvt Model transformer with an image classification head on top (a linear layer on top of the final hidden state of
    the [CLS] token) e.g. for ImageNet.
    c                        e Zd Z fdZe	 	 	 	 d	deej                 deej                 dee         dee         de	e
ef         f
d            Z xZS )
CvtForImageClassificationc                    t                                          |           |j        | _        t          |d          | _        t          j        |j        d                   | _        |j        dk    r%t          j	        |j        d         |j                  nt          j
                    | _        |                                  d S )NF)r&  r   r   )r:   r;   
num_labelsr"  r  r   rb   rM   	layernormr   r   
classifierr%  )r<   r   r=   s     r$   r;   z"CvtForImageClassification.__init__P  s        +Fe<<<f&6r&:;; CIBSVWBWBWBIf&r*F,=>>>]_]h]j]j 	
 	r#   NrV   labelsr  r  r)   c                     ||n| j         j        }|                     |||          }|d         }|d         }| j         j        d         r|                     |          }nP|j        \  }}	}
}|                    ||	|
|z                                ddd          }|                     |          }|                    d          }| 	                    |          }d}|n| j         j
        p| j         j        dk    rd| j         _
        nS| j         j        dk    r7|j        t          j        k    s|j        t          j        k    rd	| j         _
        nd
| j         _
        | j         j
        dk    r\t!                      }| j         j        dk    r1 ||                                |                                          }n |||          }n| j         j
        d	k    rLt%                      } ||                    d| j         j                  |                    d                    }n*| j         j
        d
k    rt'                      } |||          }|s|f|dd         z   }||f|z   n|S t)          |||j                  S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr,  r   r   r   rf   r   
regressionsingle_label_classificationmulti_label_classification)losslogitsr   )r   r-  r  r   r6  r-   rg   rh   r  r7  problem_typer5  r+   r   longr   r	   squeezer   r   r   r   )r<   rV   r8  r  r  outputsr0  r   ri   rL   rj   rk   sequence_output_meanr>  r=  loss_fctr4   s                    r$   r?   z!CvtForImageClassification.forward^  s    &1%<kk$+B]((!5#  
 
 "!*AJ	; $ 	>"nnY77OO6E6K3Jfe-22:|VV[^\\ddefhiklmmO"nn_==O.333::!566{'/;)Q../;DK,,[+a//V\UZ5O5OSYS_chclSlSl/LDK,,/KDK,{'<77"99;)Q..#8FNN$4$4fnn6F6FGGDD#8FF33DD)-JJJ+--xB0F G GUWYY)-III,..x// 	FY,F)-)9TGf$$vE3f\c\qrrrrr#   )NNNN)r   r   r   r;   r   r   r   rD   r1  r   r!   r   r?   rF   rG   s   @r$   r3  r3  I  s              04)-/3&*<s <su|,<s &<s 'tn	<s
 d^<s 
u::	;<s <s <s ^<s <s <s <s <sr#   r3  )r3  r"  r  )r%   F)6r   collections.abcr]   dataclassesr   typingr   r   r   r   torch.nnr   r   r	   modeling_outputsr   r   modeling_utilsr   pytorch_utilsr   r   utilsr   r   configuration_cvtr   
get_loggerr   loggerr   rD   rC   r1  r5   Moduler7   rI   rP   rn   rx   r{   r   r   r   r   r   r   r   r   r  r"  r3  __all__r"   r#   r$   <module>rR     s         ! ! ! ! ! ! " " " " " " " "        A A A A A A A A A A Q Q Q Q Q Q Q Q - - - - - - Q Q Q Q Q Q Q Q , , , , , , , , ( ( ( ( ( ( 
	H	%	%   
B B B B B+ B B  B U\ e T V[Vb    *% % % % %") % % %    BI   $    	   2    RY   (    ry   
 
 
 
 
 
 
 
N N N N Nry N N Nb    BI   "6  6  6  6  6 29 6  6  6 r	 	 	 	 	bi 	 	 	
 
 
 
 
	 
 
 
? ? ? ? ?ry ? ? ?D<' <' <' <' <'ry <' <' <'~
 
 
 
 
 
 
 
8        , 0
 0
 0
 0
 0
! 0
 0
 0
f   Ls Ls Ls Ls Ls 2 Ls Ls Ls^ J
I
Ir#   