
     `iO                        d Z ddlZddlmZmZ ddlZddlmZ ddlmZ ddl	m
Z
mZmZ ddlmZ dd	lmZmZ d
dlmZ  ej        e          ZdedefdZd,deeef         defdZ G d dej                  Z G d dej                  Z G d dej                  Z  G d dej                  Z! G d dej                  Z" G d dej                  Z# G d d ej                  Z$ G d! d"ej                  Z%e G d# d$e                      Z&e G d% d&e&                      Z' ed'(           G d) d*e&                      Z(g d+Z)dS )-zPyTorch EfficientNet model.    N)OptionalUnion)nn   )ACT2FN)BaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)auto_docstringlogging   )EfficientNetConfigconfignum_channelsc                     | j         }|| j        z  }t          |t          ||dz  z             |z  |z            }|d|z  k     r||z  }t          |          S )z<
    Round number of filters based on depth multiplier.
       g?)depth_divisorwidth_coefficientmaxint)r   r   divisornew_dims       /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/efficientnet/modeling_efficientnet.pyround_filtersr   %   sk     "GF,,L'3|gk9::gEOPPG |###7w<<    Tkernel_sizeadjustc                     t          | t                    r| | f} | d         dz  | d         dz  f}|r$|d         dz
  |d         |d         dz
  |d         fS |d         |d         |d         |d         fS )aJ  
    Utility function to get the tuple padding value for the depthwise convolution.

    Args:
        kernel_size (`int` or `tuple`):
            Kernel size of the convolution layers.
        adjust (`bool`, *optional*, defaults to `True`):
            Adjusts padding value to apply to right and bottom sides of the input.
    r   r   r   )
isinstancer   )r   r   corrects      r   correct_padr"   4   s     +s## 1"K01~"KNa$78G @
Q
GAJNGAJGG
GAJ
GAJ??r   c                   L     e Zd ZdZdef fdZdej        dej        fdZ xZ	S )EfficientNetEmbeddingszL
    A module that corresponds to the stem module of the original work.
    r   c                 |   t                                                       t          |d          | _        t	          j        d          | _        t	          j        |j        | j        dddd          | _	        t	          j
        | j        |j        |j        	          | _        t          |j                 | _        d S )
N    )r   r   r   r   paddingr   r   validFr   strider(   bias)epsmomentum)super__init__r   out_dimr   	ZeroPad2dr(   Conv2dr   convolutionBatchNorm2dbatch_norm_epsbatch_norm_momentum	batchnormr   
hidden_act
activationselfr   	__class__s     r   r0   zEfficientNetEmbeddings.__init__M   s    $VR00|L99991QPW^c
 
 
 &:OZ`Ztuuu !23r   pixel_valuesreturnc                     |                      |          }|                     |          }|                     |          }|                     |          }|S N)r(   r4   r8   r:   )r<   r>   featuress      r   forwardzEfficientNetEmbeddings.forwardX   sM    <<--##H-->>(++??8,,r   )
__name__
__module____qualname____doc__r   r0   torchTensorrC   __classcell__r=   s   @r   r$   r$   H   su         	41 	4 	4 	4 	4 	4 	4EL U\        r   r$   c                   .     e Zd Z	 	 	 	 	 	 	 d fd	Z xZS )EfficientNetDepthwiseConv2dr   r   r   Tzerosc	                 f    ||z  }	t                                          ||	|||||||	  	         d S )N)	in_channelsout_channelsr   r+   r(   dilationgroupsr,   padding_mode)r/   r0   )r<   rP   depth_multiplierr   r+   r(   rR   r,   rT   rQ   r=   s             r   r0   z$EfficientNetDepthwiseConv2d.__init__b   sV     #%55#%#% 	 
	
 
	
 
	
 
	
 
	
r   )r   r   r   r   r   TrN   )rD   rE   rF   r0   rJ   rK   s   @r   rM   rM   a   sT         
 
 
 
 
 
 
 
 
 
r   rM   c                   X     e Zd ZdZdedededef fdZdej        dej	        fd	Z
 xZS )
EfficientNetExpansionLayerz_
    This corresponds to the expansion phase of each block in the original implementation.
    r   in_dimr1   r+   c                     t                                                       t          j        ||ddd          | _        t          j        ||j                  | _        t          |j	                 | _
        d S )Nr   sameFrP   rQ   r   r(   r,   )num_featuresr-   )r/   r0   r   r3   expand_convr5   r6   	expand_bnr   r9   
expand_act)r<   r   rX   r1   r+   r=   s        r   r0   z#EfficientNetExpansionLayer.__init__   so    9 
 
 
 W&BWXXX !23r   hidden_statesr?   c                     |                      |          }|                     |          }|                     |          }|S rA   )r]   r^   r_   r<   r`   s     r   rC   z"EfficientNetExpansionLayer.forward   s=    ((77}5566r   )rD   rE   rF   rG   r   r   r0   rH   FloatTensorrI   rC   rJ   rK   s   @r   rW   rW   {   s         
41 
43 
4 
4VY 
4 
4 
4 
4 
4 
4U%6 5<        r   rW   c            
       \     e Zd ZdZdededededef
 fdZdej	        d	ej
        fd
Z xZS )EfficientNetDepthwiseLayerzk
    This corresponds to the depthwise convolution phase of each block in the original implementation.
    r   rX   r+   r   adjust_paddingc                 v   t                                                       || _        | j        dk    rdnd}t          ||          }t	          j        |          | _        t          ||||d          | _        t	          j	        ||j
        |j                  | _        t          |j                 | _        d S )	Nr   r)   rZ   )r   r'   Fr*   r\   r-   r.   )r/   r0   r+   r"   r   r2   depthwise_conv_padrM   depthwise_convr5   r6   r7   depthwise_normr   r9   depthwise_act)	r<   r   rX   r+   r   rf   conv_padr(   r=   s	           r   r0   z#EfficientNetDepthwiseLayer.__init__   s     	"kQ..77Fk.AAA"$,w"?"?"?9FHSX
 
 
 !nV%:VE_
 
 
 $F$56r   r`   r?   c                     | j         dk    r|                     |          }|                     |          }|                     |          }|                     |          }|S )Nr   )r+   ri   rj   rk   rl   rb   s     r   rC   z"EfficientNetDepthwiseLayer.forward   sa    ;! 33MBBM++M::++M::**=99r   rD   rE   rF   rG   r   r   boolr0   rH   rc   rI   rC   rJ   rK   s   @r   re   re      s         7"7 7 	7
 7 7 7 7 7 7 7,	U%6 	5< 	 	 	 	 	 	 	 	r   re   c            	       Z     e Zd ZdZddedededef fdZdej	        d	ej
        fd
Z xZS )EfficientNetSqueezeExciteLayerzl
    This corresponds to the Squeeze and Excitement phase of each block in the original implementation.
    Fr   rX   
expand_dimexpandc                    t                                                       |r|n|| _        t          dt	          ||j        z                      | _        t          j        d          | _	        t          j
        | j        | j        dd          | _        t          j
        | j        | j        dd          | _        t          |j                 | _        t          j                    | _        d S )Nr   )output_sizerZ   )rP   rQ   r   r(   )r/   r0   dimr   r   squeeze_expansion_ratiodim_ser   AdaptiveAvgPool2dsqueezer3   reducert   r   r9   
act_reduceSigmoid
act_expand)r<   r   rX   rs   rt   r=   s        r   r0   z'EfficientNetSqueezeExciteLayer.__init__   s    !'3::V!S&*H!HIIJJ+:::i	
 
 
 i	
 
 
 !!23*,,r   r`   r?   c                    |}|                      |          }|                     |          }|                     |          }|                     |          }|                     |          }t          j        ||          }|S rA   )r{   r|   r}   rt   r   rH   mul)r<   r`   inputss      r   rC   z&EfficientNetSqueezeExciteLayer.forward   ss    ]33M2266M2266	&-88r   )Fro   rK   s   @r   rr   rr      s         ' '1 '3 'C 'Y] ' ' ' ' ' '*
U%6 
5< 
 
 
 
 
 
 
 
r   rr   c                   n     e Zd ZdZdedededededef fdZd	e	j
        d
e	j
        de	j        fdZ xZS )EfficientNetFinalBlockLayerz[
    This corresponds to the final phase of each block in the original implementation.
    r   rX   r1   r+   	drop_rateid_skipc                     t                                                       |dk    o| | _        t          j        ||ddd          | _        t          j        ||j        |j                  | _	        t          j
        |          | _        d S )Nr   rZ   Fr[   rh   p)r/   r0   apply_dropoutr   r3   project_convr5   r6   r7   
project_bnDropoutdropout)r<   r   rX   r1   r+   r   r   r=   s          r   r0   z$EfficientNetFinalBlockLayer.__init__   s     	#q[8[I 
 
 
 . f&;fF`
 
 
 zI...r   
embeddingsr`   r?   c                     |                      |          }|                     |          }| j        r|                     |          }||z   }|S rA   )r   r   r   r   )r<   r   r`   s      r   rC   z#EfficientNetFinalBlockLayer.forward   sR    ))-8866 	7 LL77M)J6Mr   rD   rE   rF   rG   r   r   floatrp   r0   rH   rc   rI   rC   rJ   rK   s   @r   r   r      s         /(/25/@C/MP/]b/mq/ / / / / /"%"3 EDU Z_Zf        r   r   c                   l     e Zd ZdZdededededededed	ed
ef fdZde	j
        de	j        fdZ xZS )EfficientNetBlocka  
    This corresponds to the expansion and depthwise convolution phase of each block in the original implementation.

    Args:
        config ([`EfficientNetConfig`]):
            Model configuration class.
        in_dim (`int`):
            Number of input channels.
        out_dim (`int`):
            Number of output channels.
        stride (`int`):
            Stride size to be used in convolution layers.
        expand_ratio (`int`):
            Expand ratio to set the output dimensions for the expansion and squeeze-excite layers.
        kernel_size (`int`):
            Kernel size for the depthwise convolution layer.
        drop_rate (`float`):
            Dropout rate to be used in the final phase of each block.
        id_skip (`bool`):
            Whether to apply dropout and sum the final hidden states with the input embeddings during the final phase
            of each block. Set to `True` for the first block of each stage.
        adjust_padding (`bool`):
            Whether to apply padding to only right and bottom side of the input kernel before the depthwise convolution
            operation, set to `True` for inputs with odd input sizes.
    r   rX   r1   r+   expand_ratior   r   r   rf   c
                    t                                                       || _        | j        dk    | _        ||z  }
| j        rt	          |||
|          | _        t          || j        r|
n||||	          | _        t          |||
| j                  | _	        t          || j        r|
n|||||          | _        d S )Nr   )r   rX   r1   r+   )r   rX   r+   r   rf   )r   rX   rs   rt   )r   rX   r1   r+   r   r   )r/   r0   r   rt   rW   	expansionre   rj   rr   squeeze_exciter   
projection)r<   r   rX   r1   r+   r   r   r   r   rf   expand_in_dimr=   s              r   r0   zEfficientNetBlock.__init__  s     	('1,-; 	7fmF  DN 9$(K;==V#)
 
 
 =&]4;
 
 
 6$(K;==V
 
 
r   r`   r?   c                     |}| j         dk    r|                     |          }|                     |          }|                     |          }|                     ||          }|S )Nr   )r   r   rj   r   r   )r<   r`   r   s      r   rC   zEfficientNetBlock.forwardH  sg    "
!! NN=99M++M:: ++M::
MBBr   r   rK   s   @r   r   r     s         4'
"'
 '
 	'

 '
 '
 '
 '
 '
 '
 '
 '
 '
 '
 '
R
U%6 
5< 
 
 
 
 
 
 
 
r   r   c            	       h     e Zd ZdZdef fdZ	 	 ddej        dee	         dee	         d	e
fd
Z xZS )EfficientNetEncoderz
    Forward propagates the embeddings through each EfficientNet block.

    Args:
        config ([`EfficientNetConfig`]):
            Model configuration class.
    r   c                     t                                                       | _        |j         _         fdt	          |j                  }t          fd|j        D                       }d}g }t          |          D ]}t          ||j        |                   }t          ||j
        |                   }|j        |         }	|j        |         }
|j        |         }t           |j        |                             D ]d}|dk    }|dk    rdn|	}	|dk    r|n|}||j        v}|j        |z  |z  }t!          ||||	|
||||	  	        }|                    |           |dz  }et%          j        |           _        t%          j        |t          |d          ddd	           _        t%          j        |j        |j        |j        
           _        t8          |j                  _        d S )Nc                 V    t          t          j        j        | z                      S rA   )r   mathceildepth_coefficient)repeatsr<   s    r   round_repeatsz3EfficientNetEncoder.__init__.<locals>.round_repeatsc  s#    ty!7'!ABBCCCr   c              3   .   K   | ]} |          V  d S rA    ).0nr   s     r   	<genexpr>z/EfficientNetEncoder.__init__.<locals>.<genexpr>h  s-      LLaq))LLLLLLr   r   r   )	r   rX   r1   r+   r   r   r   r   rf   i   rZ   Fr[   rh   )r/   r0   r   r   lenrP   sumnum_block_repeatsranger   rQ   strideskernel_sizesexpand_ratiosdepthwise_paddingdrop_connect_rater   appendr   
ModuleListblocksr3   top_convr5   
hidden_dimr6   r7   top_bnr   r9   top_activation)r<   r   num_base_blocks
num_blockscurr_block_numr   irX   r1   r+   r   r   jr   rf   r   blockr   r=   s   `                @r   r0   zEfficientNetEncoder.__init__^  sE   !'!9	D 	D 	D 	D 	D f011LLLL63KLLLLL
'' 	$ 	$A"66+=a+@AAF#FF,?,BCCG^A&F -a0K!/2L==)A!)DEEFF $ $q&!ee$%EEv!/v7O!O"4~E
R	)!!#! +!-'##1
 
 
 e$$$!#'$* mF++	&vt44
 
 
 n*0EPVPj
 
 
 %V%67r   FTr`   output_hidden_statesreturn_dictr?   c                 $   |r|fnd }| j         D ]} ||          }|r||fz  }|                     |          }|                     |          }|                     |          }|st	          d ||fD                       S t          ||          S )Nc              3      K   | ]}||V  	d S rA   r   )r   vs     r   r   z.EfficientNetEncoder.forward.<locals>.<genexpr>  s"      XXq!-----XXr   )last_hidden_stater`   )r   r   r   r   tupler   )r<   r`   r   r   all_hidden_statesr   s         r   rC   zEfficientNetEncoder.forward  s     1EN],,$[ 	6 	6E!E-00M# 6!m%55!m44M22++M:: 	YXX]4E$FXXXXXX-++
 
 
 	
r   )FT)rD   rE   rF   rG   r   r0   rH   rc   r   rp   r   rC   rJ   rK   s   @r   r   r   U  s         581 58 58 58 58 58 58t 05&*	
 
(
 'tn
 d^	

 
(
 
 
 
 
 
 
 
r   r   c                   <    e Zd ZU eed<   dZdZg Zdej	        fdZ
dS )EfficientNetPreTrainedModelr   efficientnetr>   modulec                    t          |t          j        t          j        t          j        f          rR|j        j                            d| j        j	                   |j
        "|j
        j                                         dS dS dS )zInitialize the weightsg        )meanstdN)r    r   Linearr3   r5   weightdatanormal_r   initializer_ranger,   zero_)r<   r   s     r   _init_weightsz)EfficientNetPreTrainedModel._init_weights  s}    fry")R^DEE 	) M&&CT[5R&SSS{& &&(((((	) 	) '&r   N)rD   rE   rF   r   __annotations__base_model_prefixmain_input_name_no_split_modulesr   Moduler   r   r   r   r   r     sP         &$O)BI ) ) ) ) ) )r   r   c                        e Zd Zdef fdZe	 	 	 d	deej                 dee	         dee	         de
eef         fd            Z xZS )
EfficientNetModelr   c                    t                                          |           || _        t          |          | _        t          |          | _        |j        dk    r!t          j	        |j
        d          | _        nC|j        dk    r!t          j        |j
        d          | _        nt          d|j                   |                                  d S )Nr   T)	ceil_moder   z2config.pooling must be one of ['mean', 'max'] got )r/   r0   r   r$   r   r   encoderpooling_typer   	AvgPool2dr   pooler	MaxPool2d
ValueErrorpooling	post_initr;   s     r   r0   zEfficientNetModel.__init__  s       088*622 &((,v'8DIIIDKK E)),v'8DIIIDKKbRXR`bbccc 	r   Nr>   r   r   r?   c                    ||n| j         j        }||n| j         j        }|t          d          |                     |          }|                     |||          }|d         }|                     |          }|                    |j        d d                   }|s||f|dd          z   S t          |||j
                  S )Nz You have to specify pixel_valuesr   r   r   r   r   )r   pooler_outputr`   )r   r   use_return_dictr   r   r   r   reshapeshaper	   r`   )r<   r>   r   r   embedding_outputencoder_outputsr   pooled_outputs           r   rC   zEfficientNetModel.forward  s     %9$D  $+Jj 	 &1%<kk$+B]?@@@??<88,,!5# ' 
 
 ,A.$566%--m.A"1".EFF 	L%}58KKK7/')7
 
 
 	
r   )NNN)rD   rE   rF   r   r0   r   r   rH   rc   rp   r   r   r	   rC   rJ   rK   s   @r   r   r     s        1      "  59/3&*	"
 "
u01"
 'tn"
 d^	"

 
u>>	?"
 "
 "
 ^"
 "
 "
 "
 "
r   r   z
    EfficientNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g.
    for ImageNet.
    )custom_introc                        e Zd Z fdZe	 	 	 	 d	deej                 deej                 dee	         dee	         de
eef         f
d            Z xZS )
"EfficientNetForImageClassificationc                    t                                          |           |j        | _        || _        t	          |          | _        t          j        |j                  | _	        | j        dk    rt          j
        |j        | j                  nt          j                    | _        |                                  d S )Nr   r   )r/   r0   
num_labelsr   r   r   r   r   dropout_rater   r   r   Identity
classifierr   r;   s     r   r0   z+EfficientNetForImageClassification.__init__   s        +-f55zF$7888KO?]^K^K^")F$5tGGGdfdodqdq 	r   Nr>   labelsr   r   r?   c                 j   ||n| j         j        }|                     |||          }|r|j        n|d         }|                     |          }|                     |          }d}||                     ||| j                   }|s|f|dd         z   }	||f|	z   n|	S t          |||j                  S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   r   )losslogitsr`   )	r   r   r   r   r   r   loss_functionr
   r`   )
r<   r>   r   r   r   outputsr   r   r   outputs
             r   rC   z*EfficientNetForImageClassification.forward  s     &1%<kk$+B]##LG[it#uu1<L--'!*]33//%%ffdkBBD 	FY,F)-)9TGf$$vE3!/
 
 
 	
r   )NNNN)rD   rE   rF   r0   r   r   rH   rc   
LongTensorrp   r   r   r
   rC   rJ   rK   s   @r   r   r     s        
 
 
 
 
  59-1/3&*!
 !
u01!
 )*!
 'tn	!

 d^!
 
u::	;!
 !
 !
 ^!
 !
 !
 !
 !
r   r   )r   r   r   )T)*rG   r   typingr   r   rH   r   activationsr   modeling_outputsr   r	   r
   modeling_utilsr   utilsr   r   configuration_efficientnetr   
get_loggerrD   loggerr   r   r   rp   r"   r   r$   r3   rM   rW   re   rr   r   r   r   r   r   r   __all__r   r   r   <module>r	     s   " !  " " " " " " " "        ! ! ! ! ! !         
 . - - - - - , , , , , , , , : : : : : : 
	H	%	%, C    @ @U3:. @ @ @ @ @(    RY   2
 
 
 
 
") 
 
 
4       4$ $ $ $ $ $ $ $N$ $ $ $ $RY $ $ $N    ")   BN N N N N	 N N NbW
 W
 W
 W
 W
") W
 W
 W
t ) ) ) ) )/ ) ) )  5
 5
 5
 5
 5
3 5
 5
 5
p   /
 /
 /
 /
 /
)D /
 /
 /
d e
d
dr   