
     `i<                     8   d Z ddlmZmZmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ  ej        e          Z G d dej                  Z G d dej                  Z G d dej                  Z  G d dej                  Z!e G d de
                      Z"e G d de"                      Z# ed           G d de"                      Z$ ed           G d de"e                      Z%g d Z&dS )!zPyTorch TextNet model.    )AnyOptionalUnionN)Tensor)PreTrainedModel)ACT2CLS)BackboneOutputBaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)TextNetConfig)logging)BackboneMixin   )auto_docstringc                   H     e Zd Zdef fdZdej        dej        fdZ xZS )TextNetConvLayerconfigc                 D   t                                                       |j        | _        |j        | _        |j        | _        t          |j        t                    r |j        d         dz  |j        d         dz  fn	|j        dz  }t          j        |j        |j        |j        |j        |d          | _        t          j        |j        |j                  | _        t          j                    | _        | j         t)          | j                             | _        d S d S )Nr         F)kernel_sizestridepaddingbias)super__init__stem_kernel_sizer   stem_strider   stem_act_funcactivation_function
isinstancetuplennConv2dstem_num_channelsstem_out_channelsconvBatchNorm2dbatch_norm_eps
batch_normIdentity
activationr   )selfr   r   	__class__s      /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/textnet/modeling_textnet.pyr   zTextNetConvLayer.__init__*   s   !2(#)#7  &1599.V"a');A)>!)CDD(A- 	 I$$/%
 
 
	 .)A6CXYY+--#/%d&>?AADOOO 0/    hidden_statesreturnc                     |                      |          }|                     |          }|                     |          S N)r(   r+   r-   )r.   r2   s     r0   forwardzTextNetConvLayer.forwardE   s6    		-0066}---r1   )	__name__
__module____qualname__r   r   torchr   r6   __classcell__r/   s   @r0   r   r   )   sq        B} B B B B B B6.U\ .el . . . . . . . .r1   r   c            
       \     e Zd ZdZdededededef
 fdZdej        d	ej        fd
Z	 xZ
S )TextNetRepConvLayera  
    This layer supports re-parameterization by combining multiple convolutional branches
    (e.g., main convolution, vertical, horizontal, and identity branches) during training.
    At inference time, these branches can be collapsed into a single convolution for
    efficiency, as per the re-parameterization paradigm.

    The "Rep" in the name stands for "re-parameterization" (introduced by RepVGG).
    r   in_channelsout_channelsr   r   c                    t                                                       || _        || _        || _        || _        |d         dz
  dz  |d         dz
  dz  f}t          j                    | _        t          j	        |||||d          | _
        t          j        ||j                  | _        |d         dz
  dz  df}d|d         dz
  dz  f}|d         dk    rHt          j	        |||d         df||d          | _        t          j        ||j                  | _        nd\  | _        | _        |d         dk    rHt          j	        ||d|d         f||d          | _        t          j        ||j                  | _        nd\  | _        | _        ||k    r!|dk    rt          j        ||j                  nd | _        d S )Nr   r   r   F)r?   r@   r   r   r   r   )num_featuresepsNN)r   r   num_channelsr@   r   r   r$   ReLUr!   r%   	main_convr)   r*   main_batch_normvertical_convvertical_batch_normhorizontal_convhorizontal_batch_normrbr_identity)
r.   r   r?   r@   r   r   r   vertical_paddinghorizontal_paddingr/   s
            r0   r   zTextNetRepConvLayer.__init__U   s
   '(&NQ&1,{1~/Aa.GH#%799 #%#
 
 
  "~<VMbccc(^a/A5q9+a.1"4!:;q>Q!#')(^Q/(" " "D (*~<U[Uj'k'k'kD$$;E8D 8q>Q#%9')A/*$ $ $D  *,\W]Wl)m)m)mD&&?I<D $"< {**v{{ N9NOOOO 	r1   r2   r3   c                    |                      |          }|                     |          }| j        /|                     |          }|                     |          }||z   }| j        /|                     |          }|                     |          }||z   }| j        |                     |          }||z   }|                     |          S r5   )rG   rH   rI   rJ   rK   rL   rM   r!   )r.   r2   main_outputsvertical_outputshorizontal_outputsid_outs         r0   r6   zTextNetRepConvLayer.forward   s    ~~m44++L99 )#11-@@#778HII'*::L +!%!5!5m!D!D!%!;!;<N!O!O'*<<L(&&}55F'&0L''555r1   )r7   r8   r9   __doc__r   intr   r:   r   r6   r;   r<   s   @r0   r>   r>   K   s         7
} 7
3 7
c 7
`c 7
mp 7
 7
 7
 7
 7
 7
r6U\ 6el 6 6 6 6 6 6 6 6r1   r>   c                   .     e Zd Zdedef fdZd Z xZS )TextNetStager   depthc                    t                                                       |j        |         }|j        |         }t	          |          }|j        |         }|j        |dz            }|g|g|dz
  z  z   }|g|z  }	g }
t          ||	||          D ]"}|
                    t          |g|R             #t          j
        |
          | _        d S )Nr   )r   r   conv_layer_kernel_sizesconv_layer_strideslenhidden_sizeszipappendr>   r$   
ModuleListstage)r.   r   rY   r   r   
num_layersstage_in_channel_sizestage_out_channel_sizer?   r@   rb   stage_configr/   s               r0   r   zTextNetStage.__init__   s    4U;*51%%
 & 3E :!'!4UQY!?,-1G0HJYZN0[[./*<\;OO 	E 	ELLL,VClCCCDDDD]5))


r1   c                 0    | j         D ]} ||          }|S r5   )rb   )r.   hidden_stateblocks      r0   r6   zTextNetStage.forward   s*    Z 	/ 	/E 5..LLr1   )r7   r8   r9   r   rV   r   r6   r;   r<   s   @r0   rX   rX      sZ        *} *S * * * * * *"      r1   rX   c            	       d     e Zd Zdef fdZ	 	 d	dej        dee         dee         de	fdZ
 xZS )
TextNetEncoderr   c                    t                                                       g }t          |j                  }t	          |          D ]%}|                    t          ||                     &t          j        |          | _	        d S r5   )
r   r   r]   r[   ranger`   rX   r$   ra   stages)r.   r   rn   
num_stagesstage_ixr/   s        r0   r   zTextNetEncoder.__init__   sy    788
j)) 	: 	:HMM,vx889999mF++r1   Nrh   output_hidden_statesreturn_dictr3   c                     |g}| j         D ]"} ||          }|                    |           #|s|f}|r||fz   n|S t          ||          S )N)last_hidden_stater2   )rn   r`   r
   )r.   rh   rq   rr   r2   rb   outputs          r0   r6   zTextNetEncoder.forward   s~     &[ 	/ 	/E 5..L  .... 	Q"_F0DP6],,,&P-\ijjjjr1   rD   )r7   r8   r9   r   r   r:   r   r   boolr
   r6   r;   r<   s   @r0   rk   rk      s        ,} , , , , , , 04&*	k klk 'tnk d^	k
 
(k k k k k k k kr1   rk   c                   (    e Zd ZU eed<   dZdZd ZdS )TextNetPreTrainedModelr   textnetpixel_valuesc                    t          |t          j        t          j        f          rT|j        j                            d| j        j                   |j	         |j	        j        
                                 d S d S t          |t          j                  rF|j        j                            d           |j	        "|j	        j        
                                 d S d S d S )Ng        )meanstdg      ?)r"   r$   Linearr%   weightdatanormal_r   initializer_ranger   zero_r)   fill_)r.   modules     r0   _init_weightsz$TextNetPreTrainedModel._init_weights   s    fry")455 	)M&&CT[5R&SSS{& &&((((( '&// 	)M$$S))){& &&(((((	) 	)&&r1   N)r7   r8   r9   r   __annotations__base_model_prefixmain_input_namer    r1   r0   rx   rx      s<         !$O) ) ) ) )r1   rx   c                        e Zd Z fdZe	 ddedee         dee         dee	e
ee
         f         e	e
         ef         fd            Z xZS )	TextNetModelc                     t                                          |           t          |          | _        t	          |          | _        t          j        d          | _        | 	                                 d S )N)r   r   )
r   r   r   stemrk   encoderr$   AdaptiveAvgPool2dpooler	post_initr.   r   r/   s     r0   r   zTextNetModel.__init__   sa       $V,,	%f--*622r1   Nrz   rq   rr   r3   c                 :   ||n| j         j        }||n| j         j        }|                     |          }|                     |||          }|d         }|                     |          }|s||f}|r||d         fz   n|S t          |||r|d         nd           S )Nrq   rr   r   r   )rt   pooler_outputr2   )r   use_return_dictrq   r   r   r   r   )	r.   rz   rq   rr   rh   encoder_outputsrt   pooled_outputru   s	            r0   r6   zTextNetModel.forward   s     &1%<kk$+B]$8$D  $+Jj 	 yy..,,/CQ\ ' 
 
 ,A.$566 	V'7F5IU6_Q/111vU7/'0DN/!,,$
 
 
 	
r1   rD   )r7   r8   r9   r   r   r   r   rv   r   r#   r   listr   r6   r;   r<   s   @r0   r   r      s             os
 
"
:B4.
^fgk^l
	uS$s)^$eCj2ZZ	[
 
 
 ^
 
 
 
 
r1   r   z
    TextNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    )custom_introc                        e Zd Z fdZe	 	 	 	 d	deej                 deej                 dee	         dee	         de
f
d            Z xZS )
TextNetForImageClassificationc                    t                                          |           |j        | _        t          |          | _        t          j        d          | _        t          j                    | _	        |j        dk    r%t          j
        |j        d         |j                  nt          j                    | _        t          j        | j        | j	        g          | _        |                                  d S )N)r   r   r   )r   r   
num_labelsr   ry   r$   r   avg_poolFlattenflattenr~   r^   r,   fcra   
classifierr   r   s     r0   r   z&TextNetForImageClassification.__init__  s        +#F++,V44z||KQK\_`K`K`")F/3V5FGGGfhfqfsfs -(EFF 	r1   Nrz   labelsrq   rr   r3   c                 X   ||n| j         j        }|                     |||          }|d         }| j        D ]} ||          }|                     |          }d}	||                     ||| j                   }	|s|f|dd         z   }
|	|	f|
z   n|
S t          |	||j                  S )al  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

        Examples:
        ```python
        >>> import torch
        >>> import requests
        >>> from transformers import TextNetForImageClassification, TextNetImageProcessor
        >>> from PIL import Image

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> processor = TextNetImageProcessor.from_pretrained("czczup/textnet-base")
        >>> model = TextNetForImageClassification.from_pretrained("czczup/textnet-base")

        >>> inputs = processor(images=image, return_tensors="pt")
        >>> with torch.no_grad():
        ...     outputs = model(**inputs)
        >>> outputs.logits.shape
        torch.Size([1, 2])
        ```Nr   r   r   )losslogitsr2   )r   r   ry   r   r   loss_functionr   r2   )r.   rz   r   rq   rr   outputsrt   layerr   r   ru   s              r0   r6   z%TextNetForImageClassification.forward&  s    B &1%<kk$+B],,|BVdo,pp#AJ_ 	9 	9E %&7 8 8*++%%ffdkBBD 	DY,F'+'7D7V##VC3f\c\qrrrrr1   )NNNN)r7   r8   r9   r   r   r   r:   FloatTensor
LongTensorrv   r   r6   r;   r<   s   @r0   r   r     s              59-1/3&*0s 0su010s )*0s 'tn	0s
 d^0s 
.0s 0s 0s ^0s 0s 0s 0s 0sr1   r   zP
    TextNet backbone, to be used with frameworks like DETR and MaskFormer.
    c                        e Zd ZdZ fdZe	 d	dedee         dee         de	e
e
         ef         fd            Z xZS )
TextNetBackboneFc                     t                                          |           t                                          |           t          |          | _        |j        | _        |                                  d S r5   )r   r   _init_backboner   ry   r^   rB   r   r   s     r0   r   zTextNetBackbone.__init__b  se       v&&&#F++"/ 	r1   Nrz   rq   rr   r3   c                 l   ||n| j         j        }||n| j         j        }|                     |d|          }|r|j        n|d         }d}t          | j                  D ]\  }}|| j        v r|||         fz  }|s|f}	|r|r|j        n|d         }|	|fz  }	|	S t          ||r|j        ndd          S )a  
        Examples:

        ```python
        >>> import torch
        >>> import requests
        >>> from PIL import Image
        >>> from transformers import AutoImageProcessor, AutoBackbone

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> processor = AutoImageProcessor.from_pretrained("czczup/textnet-base")
        >>> model = AutoBackbone.from_pretrained("czczup/textnet-base")

        >>> inputs = processor(image, return_tensors="pt")
        >>> with torch.no_grad():
        >>>     outputs = model(**inputs)
        ```NTr   r   r   )feature_mapsr2   
attentions)	r   r   rq   ry   r2   	enumeratestage_namesout_featuresr	   )
r.   rz   rq   rr   r   r2   r   idxrb   ru   s
             r0   r6   zTextNetBackbone.forwardl  s   . &1%<kk$+B]$8$D  $+Jj 	 ,,|$T_,``1<L--'!*#D$455 	6 	6JC)))s!3 55 	"_F# +9D T 5 5'RS*=**M%3GQ'//T
 
 
 	
r1   rD   )r7   r8   r9   has_attentionsr   r   r   r   rv   r   r#   r	   r6   r;   r<   s   @r0   r   r   Z  s         N     os/
 /
"/
:B4./
^fgk^l/
	uU|^+	,/
 /
 /
 ^/
 /
 /
 /
 /
r1   r   )r   r   rx   r   )'rU   typingr   r   r   r:   torch.nnr$   r   transformersr   transformers.activationsr   transformers.modeling_outputsr	   r
   r   r   1transformers.models.textnet.configuration_textnetr   transformers.utilsr   !transformers.utils.backbone_utilsr   utilsr   
get_loggerr7   loggerModuler   r>   rX   rk   rx   r   r   r   __all__r   r1   r0   <module>r      s     ' ' ' ' ' ' ' ' ' '              ( ( ( ( ( ( , , , , , ,            L K K K K K & & & & & & ; ; ; ; ; ; # # # # # # 
	H	%	%. . . . .ry . . .DW6 W6 W6 W6 W6") W6 W6 W6t    29   0k k k k kRY k k k: ) ) ) ) )_ ) ) )  "
 "
 "
 "
 "
) "
 "
 "
J   @s @s @s @s @s$: @s @s @sF   
=
 =
 =
 =
 =
,m =
 =
 
=
@ i
h
hr1   