
     `i                      d Z ddlmZ ddlZddlmZ ddlZddlZ	ddl
mZ ddlmZmZ ddlmZmZmZmZmZ dd	lmZmZ dd
lmZmZmZmZ ddlmZ  ej        e           Z!dZ"dZ#d Z$d Z%	 dLdMdZ&dNdOdZ' G d d ej(        j)                  Z* G d! d"ej(        j+                  Z, G d# d$ej(        j)                  Z- G d% d&ej(        j)                  Z. G d' d(ej(        j)                  Z/ G d) d*ej(        j)                  Z0 G d+ d,ej(        j)                  Z1 G d- d.ej(        j)                  Z2 G d/ d0e2          Z3 G d1 d2ej(        j)                  Z4 G d3 d4ej(        j)                  Z5 G d5 d6ej(        j)                  Z6 G d7 d8ej(        j)                  Z7 G d9 d:ej(        j)                  Z8 G d; d<ej(        j)                  Z9 G d= d>ej(        j)                  Z:e G d? d@ej(        j)                              Z; G dA dBe          Z<dCZ=dDZ> edEe=           G dF dGe<                      Z? edHe=           G dI dJe<                      Z@g dKZAdS )PzTensorFlow Hubert model.    )annotationsN)Any   )get_tf_activation)TFBaseModelOutputTFCausalLMOutput)TFPreTrainedModelget_initializerkeraskeras_serializableunpack_inputs)
shape_liststable_softmax)add_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )HubertConfigr   g    חc                    t           j                            t           j                            t          |           dd                     }t           j                            | |z   |          \  }}|S )z
    Categorical sampling without replacement is currently not implemented. The gumbel-max trick will do for now - see
    https://github.com/tensorflow/tensorflow/issues/9260 for more info
    r   r   )tfmathlograndomuniformr   nntop_k)distributionnum_samplesz_indicess        /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/hubert/modeling_tf_hubert.py_sample_without_replacementr$   5   sY    
 
RY&&z,'?'?AFF	G	GGA\A-{;;JAwN    c           
        t          |          }t          j        t          j        t          j        t          j        |d                   d          |          ddg          }t          j        t          j        |t          j        |ddg          gd                    }t          j        |t          j        | dg          |          S )zT
    Scatter function as in PyTorch with indices in format (batch_dim, indices)
    r   axisr   )	r   r   reshapebroadcast_toexpand_dimsrange	transposeconcat
scatter_nd)valuesbatch_indicesoutput_shapeindices_shapebroad_casted_batch_dimspair_indicess         r#    _scatter_values_on_batch_indicesr7   @   s     }--M j
rxa0@'A'AKKK][[^_ac]d  <	+BBJ}_`bd^eDfDf*gij k kllL=rz&2$'?'?NNNr%   shapetuple[int, int]	mask_probfloatmask_lengthint	min_masksreturn	tf.Tensorc           	     T   | \  }}|dk     rt          d          t          j                            ||d| d| d           |t          j        |t          j                  z  |z  t          j                            d          z   }t          j        ||          }t          j        |t          j	                  }t          j
                            ||z  |          }t          j        |          }t          j        ||ft          j	                  }t          j        |||dz
  z
  f          }t          ||          }	t          j        |	d	          }	t          j        |	dd|f          }	t          j        |	|||z  f          }	t          j        |          t          j        t          j        d
d
f         }
t          j        |
||df          }
t          j        |
|||z  f          }
|	|
z   }	t+          t          j        |	          |	t          j        |                    }|S )a  
    Computes random mask spans for a given shape

    Args:
        shape: the shape for which to compute masks.
            should be of size 2 where first element is batch size and 2nd is timesteps
        attention_mask: optional padding mask of the same size as shape, which will prevent masking padded elements
        mask_prob:
            probability for each token to be chosen as start of the span to be masked. this will be multiplied by
            number of timesteps divided by length of mask span to mask approximately this percentage of all elements.
            however due to overlaps, the actual number will be smaller (unless no_overlap is True)
        mask_length: size of the mask
        min_masks: minimum number of masked spans

    Adapted from [fairseq's
    data_utils.py](https://github.com/pytorch/fairseq/blob/e0788f7007a8473a76db573985031f3c94201e79/fairseq/data/data_utils.py#L376).
    r   z&`mask_length` has to be bigger than 0.zO`mask_length` has to be smaller than `sequence_length`, but got `mask_length`: z and `sequence_length`: `messager   dtyper'   N)
ValueErrorr   	debuggingassert_lesscastfloat32r   r   maximumint32r   minimumsqueezezerosonesr$   r,   tiler*   r-   newaxisr7   	ones_liker8   )r8   r:   r<   r>   
batch_sizesequence_lengthnum_masked_spansspec_aug_maskuniform_distspec_aug_mask_idxsoffsetss              r#   _compute_mask_indicesr]   P   s4   . #(JQABBBL6^i 6 6#26 6 6	     !27?BJ#G#GG+UXZXaXiXijnXoXooz"2I>>w/:: w+'EGWXXz"233 Hj/:"(KKKM 7J;?(KLMML 5\CSTT (:B??!3aK5HII$6EUXcEc8deeh{##BJ
AAA$=>Ggg
,<a@AAGj:/?+/M"NOOG+g5 5
'((*<bh}>U>U M r%   masktgt_len
int | Nonec                    t          |           d         }||n|}t          j        d          }t          j        | |j                  } t          j        | ddddddf         dd|df          }||z
  t          z  S )z_
    Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
    r   Ng      ?rF   )r   r   constantrK   rG   rS   LARGE_NEGATIVE)r^   r_   src_lenone_cstexpanded_masks        r#   _expand_maskrg      s     q!G ,gg'Gk#G74w}---DGDD$!12Q7A4FGGMm#~55r%   c                       e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 d*d+ fdZ fdZd Z fdZd Zd Z	d  Z
d! Zd" Zd# Zd$ Zd% Zd& Zd' Zd( Zd) Z xZS ),TFHubertGroupNormzp
    From tensorflow-addons https://www.tensorflow.org/addons/api_docs/python/tfa/layers/GroupNormalization
        r'   MbP?TrQ   rR   Ngroupsr=   r)   epsilonr;   centerboolscalebeta_initializerkeras.initializers.Initializergamma_initializerbeta_regularizerkeras.regularizers.Regularizergamma_regularizerbeta_constraintkeras.constraints.Constraintgamma_constraintc                f    t                      j        di | d| _        || _        || _        || _        || _        || _        t          j	        
                    |          | _        t          j	        
                    |          | _        t          j        
                    |          | _        t          j        
                    |	          | _        t          j        
                    |
          | _        t          j        
                    |          | _        |                                  d S )NT )super__init__supports_maskingrl   r)   rm   rn   rp   r   initializersgetrq   rs   regularizersrt   rv   constraintsrw   ry   _check_axis)selfrl   r)   rm   rn   rp   rq   rs   rt   rv   rw   ry   kwargs	__class__s                r#   r}   zTFHubertGroupNorm.__init__   s     	""6""" $	
 % 2 6 67G H H!&!3!7!78I!J!J % 2 6 67G H H!&!3!7!78I!J!J$044_EE % 1 5 56F G Gr%   c                T   |                      |           |                     |           |                     |           |                     |           |                     |           |                     |           d| _        t                                          |           d S NT)	_check_if_input_shape_is_none'_set_number_of_groups_for_instance_norm_check_size_of_dimensions_create_input_spec_add_gamma_weight_add_beta_weightbuiltr|   buildr   input_shaper   s     r#   r   zTFHubertGroupNorm.build   s    **;77744[AAA&&{333,,,{+++k***
k"""""r%   c                2   t           j                            |          }t          j        |          }|                     |||          \  }}|                     ||          }|| j                 | j        z  dk    }|st          j	        ||          }n|}|S Nr   )
r   backend	int_shaper   r8   _reshape_into_groups_apply_normalizationr)   rl   r*   )	r   inputsr   tensor_input_shapereshaped_inputsgroup_shapenormalized_inputsis_instance_normoutputss	            r#   callzTFHubertGroupNorm.call   s    m--f55Xf--'+'@'@Vh'i'i$ 55o{SS'	2dkAaG 	(j!24FGGGG'Gr%   c                6   | j         | j        | j        | j        | j        t
          j                            | j                  t
          j                            | j	                  t
          j
                            | j                  t
          j
                            | j                  t
          j                            | j                  t
          j                            | j                  d}t!                                                      }i ||S )N)rl   r)   rm   rn   rp   rq   rs   rt   rv   rw   ry   )rl   r)   rm   rn   rp   r   r   	serializerq   rs   r   rt   rv   r   rw   ry   r|   
get_config)r   configbase_configr   s      r#   r   zTFHubertGroupNorm.get_config   s    kI|kZ % 2 < <T=R S S!&!3!=!=d>T!U!U % 2 < <T=R S S!&!3!=!=d>T!U!U$0::4;OPP % 1 ; ;D<Q R R
 
 gg((**(+(((r%   c                    |S Nr{   r   r   s     r#   compute_output_shapez&TFHubertGroupNorm.compute_output_shape   s    r%   c                f   fdt          t          |                    D             }|| j                 | j        z  dk    }|sj|| j                 | j        z  || j        <   |                    | j        | j                   t          j        |          }t          j        ||          }||fS ||fS )Nc                     g | ]
}|         S r{   r{   ).0ir   s     r#   
<listcomp>z:TFHubertGroupNorm._reshape_into_groups.<locals>.<listcomp>   s    NNN)!,NNNr%   r   )r-   lenr)   rl   insertr   stackr*   )r   r   r   r   r   r   r   s      `   r#   r   z&TFHubertGroupNorm._reshape_into_groups   s    NNNNeC<L<L6M6MNNN'	2dkAaG 	'%0%;t{%JK	"ty$+666(;//K j==O"K//;&&r%   c                $   t           j                            |          }t          t	          dt          |                              }|| j                 | j        z  dk    }|s| j        dk    rdn	| j        dz
  }n| j        dk    rdn	| j        dz
  }|                    |           t          j
                            ||d          \  }}|                     |          \  }	}
t          j
                            ||||	|
| j                  }|S )Nr   r'   T)keepdims)meanvariancerp   offsetvariance_epsilon)r   r   r   listr-   r   r)   rl   popr   r   moments_get_reshaped_weightsbatch_normalizationrm   )r   r   r   r   group_reduction_axesr   r)   r   r   gammabetar   s               r#   r   z&TFHubertGroupNorm._apply_normalization  s   m--o>>#E!S-=-=$>$>??'	2dkAaG 	<b22di!mDDb22di!mD  &&&8LW[\\h00==tE55!\ 6 
 
 ! r%   c                    |                      |          }d }d }| j        rt          j        | j        |          }| j        rt          j        | j        |          }||fS r   )_create_broadcast_shaperp   r   r*   r   rn   r   )r   r   broadcast_shaper   r   s        r#   r   z'TFHubertGroupNorm._get_reshaped_weights  sc    66{CC: 	<Jtz?;;E; 	::di99Dd{r%   c                    || j                  }|:t          dt          | j                   z   dz   t          |          z   dz             d S )NzAxis z\ of input tensor should have a defined dimension but the layer received an input with shape .)r)   rH   strr   r   dims      r#   r   z/TFHubertGroupNorm._check_if_input_shape_is_none)  sd    $)$;di..!pq k""# 	   ;r%   c                H    || j                  }| j        dk    r	|| _        d S d S Nr'   )r)   rl   r   s      r#   r   z9TFHubertGroupNorm._set_number_of_groups_for_instance_norm4  s.    $)$;"DKKK r%   c                :   || j                  }|| j        k     r:t          dt          | j                  z   dz   t          |          z   dz             || j        z  dk    r:t          dt          | j                  z   dz   t          |          z   dz             d S )NzNumber of groups (z.) cannot be more than the number of channels ().r   z0) must be a multiple of the number of channels ()r)   rl   rH   r   r   s      r#   r   z+TFHubertGroupNorm._check_size_of_dimensions:  s    $)$$dk""#BC c(( 	   !!$dk""#DE c(( 	   "!r%   c                :    | j         dk    rt          d          d S )Nr   zdYou are trying to normalize your batch axis. Do you want to use tf.layer.batch_normalization instead)r)   rH   r   s    r#   r   zTFHubertGroupNorm._check_axisN  s)    9>>v   >r%   c                    || j                  }t          j                            t	          |          | j         |i          | _        d S )N)ndimaxes)r)   r   layers	InputSpecr   
input_specr   s      r#   r   z$TFHubertGroupNorm._create_input_specT  s>    $)$,00c+6F6FdiY\M]0^^r%   c                    || j                  }|f}| j        r0|                     |d| j        | j        | j                  | _        d S d | _        d S )Nr   r8   nameinitializerregularizer
constraint)r)   rp   
add_weightrs   rv   ry   r   r   r   r   r8   s       r#   r   z#TFHubertGroupNorm._add_gamma_weightX  sb    $)$: 		 2 20 )  DJJJ DJJJr%   c                    || j                  }|f}| j        r0|                     |d| j        | j        | j                  | _        d S d | _        d S )Nr   r   )r)   rn   r   rq   rt   rw   r   r   s       r#   r   z"TFHubertGroupNorm._add_beta_weightg  sb    $)$; 		 1 1/ (  DIII DIIIr%   c                    dgt          |          z  }|| j                 | j        z  dk    }|s>|| j                 | j        z  || j        <   |                    | j        | j                   n| j        || j        <   |S r   )r   r)   rl   r   )r   r   r   r   s       r#   r   z)TFHubertGroupNorm._create_broadcast_shapev  s    #K 0 00'	2dkAaG 	5)4TY)?4;)NODI&""49dk::::)-ODI&r%   )rj   r'   rk   TTrQ   rR   NNNN)rl   r=   r)   r=   rm   r;   rn   ro   rp   ro   rq   rr   rs   rr   rt   ru   rv   ru   rw   rx   ry   rx   )__name__
__module____qualname____doc__r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   __classcell__r   s   @r#   ri   ri      sg         ;B<B;?<@8<9=      <	# 	# 	# 	# 	#   ) ) ) ) )"  
' 
' 
'! ! !.	 	 		 	 	    (  _ _ _          r%   ri   c                  B     e Zd ZdZ fdZd Zd Z fdZ fdZ xZ	S )TFHubertWeightNormConv1DzeAdapted from https://www.tensorflow.org/probability/api_docs/python/tfp/layers/weight_norm/WeightNormc           
          t                      j        d|||dddd| || _        d| _        t	          j        ddg          | _        d S )	NvalidT	he_normal)filterskernel_sizerl   paddinguse_biasbias_initializer   r   r   r{   )r|   r}   explicit_paddingfilter_axisr   rb   kernel_norm_axes)r   r   r   rl   r   r   r   s         r#   r}   z!TFHubertWeightNormConv1D.__init__  sr     	
#(	
 	
 	
 	
 	
 !1 "QF 3 3r%   c                    t          j        t          j        t          j        | j                  | j                            }| j                            |ddt           j        t           j        f                    dS )z"Set the norm of the weight vector.r(   N)	r   sqrt
reduce_sumsquareweight_vr   weight_gassignrT   )r   kernel_norms     r#   
_init_normz#TFHubertWeightNormConv1D._init_norm  s_    gbmBIdm,D,D4K`aaabb[BJ
)BCDDDDDr%   c                    t           j                            | j        | j                  t          j        | j                  z  }t          j        |          | _        dS )zGenerate normalized weights.r(   N)r   r   l2_normalizer   r   r.   r   kernel)r   r   s     r#   _normalize_kernelz*TFHubertWeightNormConv1D._normalize_kernel  sK    ##DM8M#NNQSQ]^b^kQlQlll6**r%   c                   | j         st                                          |           t          j        t          j        | j                  dd          | _        | j        | _        |                     dt          | j        j
        | j                           ddfd| j        j        d          | _        |                                  |                     d| j        fd	d
          | _        d S d S )Nr   T)r   	trainabler   r   rR   )r   r8   r   rG   r  biasrQ   )r   r8   r   r  )r   r|   r   r   Variabler.   r   r   r   r=   r8   r   rG   r   r   r   r  r   s     r#   r   zTFHubertWeightNormConv1D.build  s    z 	qGGMM+&&&+bl4;&?&?j\`aaaDK KDM OO4=.t/?@AA1aH"m) ,  DM OOVDL?X_koppDIII	q 	qr%   c                    |                                   t          j        |d| j        | j        fdf          }t	                                          |          }|S )N)r   r   )r   r   padr   r|   r   )r   r   padded_inputsoutputr   s       r#   r   zTFHubertWeightNormConv1D.call  sU     	   v1FH]0^`f'ghhm,,r%   )
r   r   r   r   r}   r   r   r   r   r   r   s   @r#   r   r     s        oo4 4 4 4 4E E E
+ + +
q q q q q"	 	 	 	 	 	 	 	 	r%   r   c                  2     e Zd Zdd fd
ZddZddZ xZS )TFHubertNoLayerNormConvLayerr   r   r   layer_idr=   r   r   r?   Nonec                V    t                      j        di | |dk    r|j        |         nd| _        |j        |         | _        t
          j                            | j        |j        |         |j	        |         |j
        d          | _        t          |j                  | _        d S )Nr   r   convr   r   stridesr   r   r{   )r|   r}   conv_dimin_conv_dimout_conv_dimr   r   Conv1Dconv_kernelconv_stride	conv_biasr  r   feat_extract_activation
activationr   r   r
  r   r   s       r#   r}   z%TFHubertNoLayerNormConvLayer.__init__  s    ""6"""8@16?844!"OH5L''%*84&x0% ( 
 
	 ,F,JKKr%   hidden_statesr@   c                Z    |                      |          }|                     |          }|S r   )r  r  r   r  s     r#   r   z!TFHubertNoLayerNormConvLayer.call  s*    		-0066r%   Nc                    | j         rd S d| _         t          | dd           [t          j        | j        j                  5  | j                            d d | j        g           d d d            d S # 1 swxY w Y   d S d S NTr  )r   getattrr   
name_scoper  r   r   r  r   s     r#   r   z"TFHubertNoLayerNormConvLayer.build  s    : 	F
4&&2ty~.. @ @	tT-= >???@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ 32s    #A00A47A4r   r   r   r
  r=   r   r   r?   r  r  r@   r?   r@   r   r   r   r   r}   r   r   r   r   s   @r#   r	  r	    sy        L L L L L L L   
@ @ @ @ @ @ @ @r%   r	  c                  2     e Zd Zdd fd
ZddZddZ xZS )TFHubertLayerNormConvLayerr   r   r   r
  r=   r   r   r?   r  c                    t                      j        di | |dk    r|j        |         nd| _        |j        |         | _        t
          j                            | j        |j        |         |j	        |         |j
        d          | _        t
          j                            d|j                  | _        t          |j                  | _        d S )Nr   r   r  r  
layer_norm)r   rm   r{   )r|   r}   r  r  r  r   r   r  r  r  r  r  LayerNormalizationlayer_norm_epsr(  r   r  r  r  s       r#   r}   z#TFHubertLayerNormConvLayer.__init__  s    ""6"""8@16?844!"OH5L''%*84&x0% ( 
 
	  ,99|U[Uj9kk+F,JKKr%   r  r@   c                    |                      |          }|                     |          }|                     |          }|S r   r  r(  r  r  s     r#   r   zTFHubertLayerNormConvLayer.call  ;    		-006666r%   Nc                   | j         rd S d| _         t          | dd           Xt          j        | j        j                  5  | j                            d d | j        g           d d d            n# 1 swxY w Y   t          | dd           [t          j        | j        j                  5  | j                            d d | j	        g           d d d            d S # 1 swxY w Y   d S d S NTr  r(  
r   r  r   r   r  r   r   r  r(  r  r   s     r#   r   z TFHubertLayerNormConvLayer.build     : 	F
4&&2ty~.. @ @	tT-= >???@ @ @ @ @ @ @ @ @ @ @ @ @ @ @4t,,8t344 G G%%tT43D&EFFFG G G G G G G G G G G G G G G G G G 98$    #A//A36A3)#CC Cr!  r"  r#  r   r$  r   s   @r#   r&  r&    sy        L L L L L L L   	G 	G 	G 	G 	G 	G 	G 	Gr%   r&  c                  2     e Zd Zdd fd
ZddZddZ xZS )TFHubertGroupNormConvLayerr   r   r   r
  r=   r   r   r?   r  c                    t                      j        di | |dk    r|j        |         nd| _        |j        |         | _        t
          j                            | j        |j        |         |j	        |         |j
        d          | _        t          |j                  | _        t          | j        |j        d          | _        d S )Nr   r   r  r  r(  )rl   rm   r   r{   )r|   r}   r  r  r  r   r   r  r  r  r  r  r   r  r  ri   r*  r(  r  s       r#   r}   z#TFHubertGroupNormConvLayer.__init__  s    ""6"""8@16?844!"OH5L''%*84&x0% ( 
 
	 ,F,JKK+43DfNcjvwwwr%   r  r@   c                    |                      |          }|                     |          }|                     |          }|S r   r,  r  s     r#   r   zTFHubertGroupNormConvLayer.call  r-  r%   Nc                   | j         rd S d| _         t          | dd           Xt          j        | j        j                  5  | j                            d d | j        g           d d d            n# 1 swxY w Y   t          | dd           [t          j        | j        j                  5  | j                            d d | j	        g           d d d            d S # 1 swxY w Y   d S d S r/  r0  r   s     r#   r   z TFHubertGroupNormConvLayer.build  r1  r2  r!  r"  r#  r   r$  r   s   @r#   r4  r4    sy        x x x x x x x   	G 	G 	G 	G 	G 	G 	G 	Gr%   r4  c                  0     e Zd Zd fdZdd
ZddZ xZS )TFHubertPositionalConvEmbeddingr   r   r   r   r?   r  c                    t                      j        di | t          |j        |j        |j        |j        dz  d          | _        t          |j                  | _        t          |j
                  | _        || _        d S )Nr   r  )r   r   rl   r   r   r{   )r|   r}   r   hidden_sizenum_conv_pos_embeddingsnum_conv_pos_embedding_groupsr  TFHubertSamePadLayerr   r   r  r  r   r   r   r   r   s      r#   r}   z(TFHubertPositionalConvEmbedding.__init__   s    ""6""",&67#;q@
 
 
	 ,F,JKK+F,JKKr%   r  r@   c                    |                      |          }|                     |          }|                     |          }|S r   )r  r   r  r  s     r#   r   z$TFHubertPositionalConvEmbedding.call-  s;    		-00]3366r%   Nc                   | j         rd S d| _         t          | dd           `t          j        | j        j                  5  | j                            d d | j        j        g           d d d            d S # 1 swxY w Y   d S d S r  )	r   r  r   r   r  r   r   r   r;  r   s     r#   r   z%TFHubertPositionalConvEmbedding.build3  s    : 	F
4&&2ty~.. G G	tT[-D EFFFG G G G G G G G G G G G G G G G G G 32s    (A55A9<A9r   r   r   r   r?   r  r#  r   r$  r   s   @r#   r9  r9    sm                G G G G G G G Gr%   r9  c                  $     e Zd Z fdZd Z xZS )r>  c                ^     t                      j        di | |dz  dk    rdnd| _        d S )Nr   r   r   r{   )r|   r}   num_pad_remove)r   r<  r   r   s      r#   r}   zTFHubertSamePadLayer.__init__>  sA    ""6"""#:Q#>!#C#Caar%   c                J    | j         dk    r|d d d | j          d d f         }|S )Nr   )rE  r  s     r#   r   zTFHubertSamePadLayer.callB  s;    "")!!!-C0C/C-CQQQ*FGMr%   )r   r   r   r}   r   r   r   s   @r#   r>  r>  =  sL        K K K K K      r%   r>  c                  .     e Zd Zd fdZd Zdd
Z xZS )TFHubertFeatureEncoderr   r   r   r   r?   r  c                ^    t                      j        di | j        dk    r;t          ddd           gfdt	          j        dz
            D             z   }nDj        dk    r!fdt	          j                  D             }nt          d	j         d
          || _        d S )Ngroupr   conv_layers.r
  r   c           	     F    g | ]}t          |d z   d|d z              S )r   rK  rL  )r	  r   r   r   s     r#   r   z3TFHubertFeatureEncoder.__init__.<locals>.<listcomp>M  sR     g g g -Va!eJ`YZ]^Y^J`J`aaag g gr%   r   layerc                :    g | ]}t          |d |           S )rK  rL  )r&  rN  s     r#   r   z3TFHubertFeatureEncoder.__init__.<locals>.<listcomp>R  sB        +6ADVSTDVDVWWW  r%   z`config.feat_extract_norm` is z), but has to be one of ['group', 'layer']r{   )r|   r}   feat_extract_normr4  r-   num_feat_extract_layersrH   conv_layers)r   r   r   rS  r   s    `  r#   r}   zTFHubertFeatureEncoder.__init__I  s   ""6"""#w..5fqOa^_OaOabbbc g g g gv=ABBg g g KK %00   v=>>  KK
 t1Ittt   'r%   c                Z    t          j        |d          }| j        D ]} ||          }|S r   )r   r,   rS  )r   input_valuesr  
conv_layers       r#   r   zTFHubertFeatureEncoder.call\  s<    |R88* 	6 	6J&J}55MMr%   Nc                    | j         rd S d| _         | j        D ]H}t          j        |j                  5  |                    d            d d d            n# 1 swxY w Y   Id S r   )r   rS  r   r   r   r   )r   r   rV  s      r#   r   zTFHubertFeatureEncoder.buildb  s    : 	F
* 	' 	'Jz// ' '  &&&' ' ' ' ' ' ' ' ' ' ' ' ' ' '	' 	's   AA	A	rB  r   r$  r   s   @r#   rH  rH  H  s`        ' ' ' ' ' '&  ' ' ' ' ' ' ' 'r%   rH  c                       e Zd Z fdZ xZS )TFHubertFeatureExtractorc                     t                      j        |fi | t          j        d| j        j         d| j        j        d         j         dt                     d S )NzThe class `zD` has been depreciated and will be removed in Transformers v5. Use `r   z
` instead.)r|   r}   warningswarnr   r   	__bases__FutureWarningr?  s      r#   r}   z!TFHubertFeatureExtractor.__init__l  s    **6***E$.1 E EN,Q/8E E E 		
 	
 	
 	
 	
r%   )r   r   r   r}   r   r   s   @r#   rY  rY  k  s8        
 
 
 
 
 
 
 
 
r%   rY  c                  2     e Zd Zd fdZddd
ZddZ xZS )TFHubertFeatureProjectionr   r   c                p    t                      j        di | t          j                            |j        d          | _        t          j                            |j        t          |j
                  dd          | _        t          j                            |j                  | _        || _        d S )Nr(  rm   r   rQ   
projectionunitskernel_initializerr   r   )rater{   )r|   r}   r   r   r)  r*  r(  Denser;  r
   initializer_rangerc  Dropoutfeat_proj_dropoutdropoutr   r?  s      r#   r}   z"TFHubertFeatureProjection.__init__w  s    ""6""",99&BW^j9kk,,,$.v/GHH$	 - 
 
 |++1I+JJr%   Fr  r@   trainingro   r?   c                    |                      |          }|                     |          }|                     ||          }|S Nrm  )r(  rc  rl  r   r  rm  s      r#   r   zTFHubertFeatureProjection.call  s?    6666]XFFr%   Nc                   | j         rd S d| _         t          | dd           ct          j        | j        j                  5  | j                            d d | j        j        d         g           d d d            n# 1 swxY w Y   t          | dd           ft          j        | j	        j                  5  | j	                            d d | j        j        d         g           d d d            d S # 1 swxY w Y   d S d S )NTr(  r'   rc  )
r   r  r   r   r(  r   r   r   r  rc  r   s     r#   r   zTFHubertFeatureProjection.build  s   : 	F
4t,,8t344 N N%%tT4;3G3K&LMMMN N N N N N N N N N N N N N N4t,,8t344 N N%%tT4;3G3K&LMMMN N N N N N N N N N N N N N N N N N 98s$    .A::A>A>4.C//C36C3r   r   Fr  r@   rm  ro   r?   r@   r   r$  r   s   @r#   r`  r`  v  sr                 	N 	N 	N 	N 	N 	N 	N 	Nr%   r`  c                  P     e Zd ZdZ	 	 	 d!d" fdZd#dZ	 	 	 	 	 d$d%dZd&d Z xZS )'TFHubertAttentionz6Multi-headed attention from "Attention Is All You Need        FT	embed_dimr=   	num_headsrl  r;   
is_decoderro   r  c                r    t                      j        d
i | || _        || _        t          j                            |          | _        ||z  | _        | j        |z  | j        k    rt          d| j         d| d          | j        dz  | _
        || _        t          j                            ||d          | _        t          j                            ||d          | _        t          j                            ||d          | _        t          j                            ||d	          | _        d S )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: r   g      k_proj)r   r   q_projv_projout_projr{   )r|   r}   ry  rz  r   r   rj  rl  head_dimrH   scalingr{  rh  r}  r~  r  r  )r   ry  rz  rl  r{  r  r   r   s          r#   r}   zTFHubertAttention.__init__  s1    	""6"""""|++G44!Y.MI%$.883dn 3 3%.3 3 3   }d*$l((T(QQl((T(QQl((T(QQ**9t**UUr%   tensorr@   seq_lenbszc           	     n    t          j        t          j        |||| j        | j        f          d          S )Nr   r   r   r   )r   r.   r*   rz  r  )r   r  r  r  s       r#   _shapezTFHubertAttention._shape  s.    |BJvWdndm/\]]_klllr%   Nr  key_value_statestf.Tensor | Nonepast_key_valuetuple[tuple[tf.Tensor]] | Noneattention_masklayer_head_maskrm  bool | Noner?   "tuple[tf.Tensor, tf.Tensor | None]c           
     	   |du}t          |          \  }}	}
|                     |          | j        z  }|r||d         }|d         }n>|rU|                     |                     |          d|          }|                     |                     |          d|          }n||                     |                     |          d|          }|                     |                     |          d|          }t          j        |d         |gd          }t          j        |d         |gd          }nT|                     |                     |          d|          }|                     |                     |          d|          }| j        r||f}|| j	        z  d| j
        f}t          j        |                     ||	|          |          }t          j        ||          }t          j        ||          }t          |          d         }t          j        ||d          }t          j                            t          |          || j	        z  |	|gd	|| j	        z  |	|f d
t          |                      |t          j                            t          |          |d|	|gd|d|	|f d
t          |                      t          j        ||j                  }t          j        ||| j	        |	|f          |z   }t          j        ||| j	        z  |	|f          }t#          |d          }|t          j                            t          |          | j	        gd| j	         d
t          |                      t          j        |d          t          j        ||| j	        |	|f          z  }t          j        ||| j	        z  |	|f          }|                     ||          }t          j        ||          }t          j                            t          |          || j	        z  |	| j
        gd|| j	        |	| j
        f d
t          |                      t          j        t          j        ||| j	        |	| j
        f          d          }t          j        |||	|
f          }|                     |          }t          j        ||| j	        |	|f          }|||fS )z#Input shape: Batch x Time x ChannelNr   r   r'   r   r(   T)transpose_bz$Attention weights should be of size z	, but is rC   z!Attention mask should be of size rF   z/Head mask for a single layer should be of size )r   r'   r   r   rp  z `attn_output` should be of size r  )r   r~  r  r  r}  r  r   r/   r{  rz  r  r*   matmulrI   assert_equalrK   rG   r   rl  r.   r  )r   r  r  r  r  r  rm  is_cross_attentionr  r_   ry  query_states
key_statesvalue_states
proj_shaperd   attn_weights
attn_probsattn_outputs                      r#   r   zTFHubertAttention.call  s5    .T9",]";";Wi {{=11DL@ 	L."<'*J)!,LL 	LT[[1A%B%BBLLJ;;t{{3C'D'Db#NNLL'T[[%?%?SIIJ;;t{{='A'A2sKKLN1$5z#BKKKJ9nQ&7%FQOOOLL T[[%?%?SIIJ;;t{{='A'A2sKKL? 	8 ),7NDN*B>
z$++lGS"I"I:VVZ
J77
z,
;;Z((+yztLLL
!!|$$4>!7G4/dn8LgW^7_ / /|,,/ /	 	" 	
 	
 	
 %L%%>**a'*5a'8R 5 5">225 5	 &     W^<;MNNNN:lS$.'SZ4[\\_mmL:lS4>5I7T[4\]]L%l<<<&L%%?++ 6t~ 6 6"?336 6	 &    :o}EE
sDNGWEI I L :lS4>5I7T[4\]]L\\,\BB
i
L99
!!{##4>!7DM:.CRVR_3` . .{++. .	 	" 	
 	
 	
 lJ{S$.'4=$QRRT`
 
 jsGY.GHHmmK00"$*\CQXZa;b"c"cL.88r%   c                t   | j         rd S d| _         t          | dd           Xt          j        | j        j                  5  | j                            d d | j        g           d d d            n# 1 swxY w Y   t          | dd           Xt          j        | j        j                  5  | j                            d d | j        g           d d d            n# 1 swxY w Y   t          | dd           Xt          j        | j	        j                  5  | j	                            d d | j        g           d d d            n# 1 swxY w Y   t          | dd           [t          j        | j
        j                  5  | j
                            d d | j        g           d d d            d S # 1 swxY w Y   d S d S )NTr}  r~  r  r  )r   r  r   r   r}  r   r   ry  r~  r  r  r   s     r#   r   zTFHubertAttention.build/  s   : 	F
44((4t{/00 @ @!!4t~">???@ @ @ @ @ @ @ @ @ @ @ @ @ @ @44((4t{/00 @ @!!4t~">???@ @ @ @ @ @ @ @ @ @ @ @ @ @ @44((4t{/00 @ @!!4t~">???@ @ @ @ @ @ @ @ @ @ @ @ @ @ @4T**6t}122 B B##T4$@AAAB B B B B B B B B B B B B B B B B B 76sH    #A//A36A3)#CCC#EEE;#F++F/2F/)rx  FT)
ry  r=   rz  r=   rl  r;   r{  ro   r  ro   )r  r@   r  r=   r  r=   )NNNNF)r  r@   r  r  r  r  r  r  r  r  rm  r  r?   r  r   )	r   r   r   r   r}   r  r   r   r   r   s   @r#   rw  rw    s        @@  V V V V V V V8m m m m .29=+/,0 %t9 t9 t9 t9 t9lB B B B B B B Br%   rw  c                  2     e Zd Zd fdZddd
ZddZ xZS )TFHubertFeedForwardr   r   c                    t                      j        di | t          j                            |j                  | _        t          j                            |j        t          |j
                  dd          | _        t          |j                  | _        t          j                            |j        t          |j
                  dd          | _        t          j                            |j                  | _        || _        d S )NrQ   intermediate_denserd  output_denser{   )r|   r}   r   r   rj  activation_dropoutintermediate_dropoutrh  intermediate_sizer
   ri  r  r   
hidden_actintermediate_act_fnr;  r  hidden_dropoutoutput_dropoutr   r?  s      r#   r}   zTFHubertFeedForward.__init__C  s    ""6"""$)L$8$89R$S$S!"',"4"4*.v/GHH$%	 #5 #
 #
 $5V5F#G#G !L..$.v/GHH$	 / 
 
 $l2263HIIr%   Fr  r@   rm  ro   r?   c                    |                      |          }|                     |          }|                     ||          }|                     |          }|                     ||          }|S ro  )r  r  r  r  r  rq  s      r#   r   zTFHubertFeedForward.callY  sq    //>>00??11-(1SS))-88++MH+MMr%   Nc                   | j         rd S d| _         t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           `t          j        | j	        j                  5  | j	                            d d | j        j
        g           d d d            d S # 1 swxY w Y   d S d S )NTr  r  )r   r  r   r   r  r   r   r   r;  r  r  r   s     r#   r   zTFHubertFeedForward.buildb  s   : 	F
4-t44@t6;<< U U'--tT4;;R.STTTU U U U U U U U U U U U U U U4..:t0566 U U!''tT[5R(STTTU U U U U U U U U U U U U U U U U U ;:s$    (A44A8;A8.(C##C'*C'rs  rt  ru  r   r$  r   s   @r#   r  r  B  sr             ,    	U 	U 	U 	U 	U 	U 	U 	Ur%   r  c                  8     e Zd Zd fdZ	 	 	 dddZddZ xZS )TFHubertEncoderLayerr   r   c                    t                      j        d	i | t          |j        |j        |j        dd          | _        t          j        	                    |j
                  | _        t          j                            |j        d          | _        t          |d          | _        t          j                            |j        d          | _        || _        d S 
NF	attention)ry  rz  rl  r{  r   r(  rb  feed_forwardr   final_layer_normr{   r|   r}   rw  r;  num_attention_headsattention_dropoutr  r   r   rj  r  rl  r)  r*  r(  r  r  r  r   r?  s      r#   r}   zTFHubertEncoderLayer.__init__p      ""6"""*(0,
 
 
 |++F,ABB,99&BW^j9kk/^LLL % ? ?H]dv ? w wr%   NFr  r@   r  r  output_attentionsr  rm  ro   r?   tuple[tf.Tensor]c                   |}|                      |||          \  }}}|                     ||          }||z   }|                     |          }||                     |          z   }|                     |          }|f}|r||fz  }|S N)r  rm  rp  )r  rl  r(  r  r  	r   r  r  r  rm  attn_residualr  r!   r   s	            r#   r   zTFHubertEncoderLayer.call  s     &)-.8 *8 *
 *
&|Q ]XFF%566%(9(9-(H(HH--m<< " 	'&Gr%   c                h   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j	        g           d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j
        j                  5  | j
                            d            d d d            n# 1 swxY w Y   t          | dd           `t          j        | j        j                  5  | j                            d d | j        j	        g           d d d            d S # 1 swxY w Y   d S d S NTr  r(  r  r  r   r  r   r   r  r   r   r(  r   r;  r  r  r   s     r#   r   zTFHubertEncoderLayer.build     : 	F
4d++7t~233 + +$$T***+ + + + + + + + + + + + + + +4t,,8t344 M M%%tT4;3J&KLLLM M M M M M M M M M M M M M M4..:t0566 . .!''---. . . . . . . . . . . . . . .4+T22>t49:: S S%++T49P,QRRRS S S S S S S S S S S S S S S S S S ?>H    A''A+.A+!(CCCD66D:=D:0(F%%F),F)rs  NFF
r  r@   r  r  r  r  rm  ro   r?   r  r   r$  r   s   @r#   r  r  o  s}             $ ,0).    2S S S S S S S Sr%   r  c                  8     e Zd Zd fdZ	 	 	 dddZddZ xZS )#TFHubertEncoderLayerStableLayerNormr   r   c                    t                      j        d	i | t          |j        |j        |j        dd          | _        t          j        	                    |j
                  | _        t          j                            |j        d          | _        t          |d          | _        t          j                            |j        d          | _        || _        d S r  r  r?  s      r#   r}   z,TFHubertEncoderLayerStableLayerNorm.__init__  r  r%   NFr  r@   r  r  r  r  rm  ro   r?   r  c                   |}|                      |          }|                     |||          \  }}}|                     ||          }||z   }||                     |                     |                    z   }|f}|r||fz  }|S r  )r(  r  rl  r  r  r  s	            r#   r   z(TFHubertEncoderLayerStableLayerNorm.call  s     &66)-.8 *8 *
 *
&|Q ]XFF%5%(9(9$:O:OP]:^:^(_(__ " 	'&Gr%   c                h   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j	        g           d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j
        j                  5  | j
                            d            d d d            n# 1 swxY w Y   t          | dd           `t          j        | j        j                  5  | j                            d d | j        j	        g           d d d            d S # 1 swxY w Y   d S d S r  r  r   s     r#   r   z)TFHubertEncoderLayerStableLayerNorm.build  r  r  rs  r  r  r   r$  r   s   @r#   r  r    s}             $ ,0).    .S S S S S S S Sr%   r  c                  <     e Zd Zd fdZ	 	 	 	 	 dddZddZ xZS )TFHubertEncoderr   r   c                h    t                      j        di | | _        t          d          | _        t
          j                            j        d          | _	        t
          j        
                    j                  | _        fdt          j                  D             | _        d S )Npos_conv_embedr  r(  rb  c                8    g | ]}t          d |           S zlayers.r  )r  rN  s     r#   r   z,TFHubertEncoder.__init__.<locals>.<listcomp>  s-    ppp1*6!FFFpppr%   r{   r|   r}   r   r9  r  r   r   r)  r*  r(  rj  r  rl  r-   num_hidden_layersrO  r?  s    ` r#   r}   zTFHubertEncoder.__init__  s    ""6"""=fK[\\\,99&BW^j9kk|++F,ABBppppPUV\VnPoPoppp


r%   NFTr  r@   r  r  r  r  output_hidden_statesreturn_dictrm  r?   $TFBaseModelOutput | tuple[tf.Tensor]c                b   |rdnd }|rdnd }|(|t          j        |d          z  }t          |          }nd }|                     |          }	||	z   }|                     |          }|                     ||          }t          | j                  D ]e\  }
}|r||fz   }t          j	        
                    dd          }|r|| j        j        k     r@ |||||          }|d         }|r||d         fz   }f|r||fz   }|st          d |||fD                       S t          |||          S )	Nr{   r'   rp  r   r   r  r  r  rm  c              3     K   | ]}||V  	d S r   r{   r   vs     r#   	<genexpr>z'TFHubertEncoder.call.<locals>.<genexpr>  (      mmq_`_l_l_l_l_lmmr%   last_hidden_stater  
attentions)r   r,   rg   r  r(  rl  	enumeraterO  npr   r   r   	layerdroptupler   r   r  r  r  r  r  rm  all_hidden_statesall_self_attentionsposition_embeddingsr   layer_moduledropout_probabilitylayer_outputss                 r#   r   zTFHubertEncoder.call  s    #7@BBD$5?bb4%)BN>2,N,NNM).99NN!N"11-@@%(;;66]XFF(44 	P 	POA|# I$58H$H! #%)"3"3Aq"9"9 04;3HHH(L+-"3!	  M *!,M  P&9]1=M<O&O#   	E 1]4D D 	nmm]4EGZ$[mmmmmm ++*
 
 
 	
r%   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j	        g           d d d            n# 1 swxY w Y   t          | dd           P| j
        D ]J}t          j        |j                  5  |                    d            d d d            n# 1 swxY w Y   Id S d S NTr  r(  rO  r   r  r   r   r  r   r   r(  r   r;  rO  r   r   rO  s      r#   r   zTFHubertEncoder.build%     : 	F
4)400<t2788 0 0#))$///0 0 0 0 0 0 0 0 0 0 0 0 0 0 04t,,8t344 M M%%tT4;3J&KLLLM M M M M M M M M M M M M M M4$''3 & &]5:.. & &KK%%%& & & & & & & & & & & & & & & 43& &6    A''A+.A+!(CCCD55D9	<D9	rs  NFFTFr  r@   r  r  r  r  r  r  r  r  rm  r  r?   r  r   r$  r   s   @r#   r  r    s        q q q q q q ,0).,1#' %5
 5
 5
 5
 5
n& & & & & & & &r%   r  c                  <     e Zd Zd fdZ	 	 	 	 	 dddZddZ xZS )TFHubertEncoderStableLayerNormr   r   c                h    t                      j        di | | _        t          d          | _        t
          j                            j        d          | _	        t
          j        
                    j                  | _        fdt          j                  D             | _        d S )Nr  r  r(  rb  c                8    g | ]}t          d |           S r  )r  rN  s     r#   r   z;TFHubertEncoderStableLayerNorm.__init__.<locals>.<listcomp>=  s:     
 
 
PQ/]q]]KKK
 
 
r%   r{   r  r?  s    ` r#   r}   z'TFHubertEncoderStableLayerNorm.__init__7  s    ""6"""=fK[\\\,99&BW^j9kk|++F,ABB
 
 
 
UZ[a[sUtUt
 
 



r%   NFTr  r@   r  r  r  r  r  r  rm  r?   r  c                b   |rdnd }|rdnd }|(|t          j        |d          z  }t          |          }nd }|                     |          }	||	z   }|                     ||          }t          | j                  D ]e\  }
}|r||fz   }t          j        	                    dd          }|r|| j
        j        k     r@ |||||          }|d         }|r||d         fz   }f|                     |          }|r||fz   }|st          d |||fD                       S t          |||          S )	Nr{   r'   rp  r   r   r  c              3     K   | ]}||V  	d S r   r{   r  s     r#   r  z6TFHubertEncoderStableLayerNorm.call.<locals>.<genexpr>q  r  r%   r  )r   r,   rg   r  rl  r  rO  r  r   r   r   r  r(  r  r   r  s                 r#   r   z#TFHubertEncoderStableLayerNorm.callA  s    #7@BBD$5?bb4%)BN>2,N,NNM).99NN!N"11-@@%(;;]XFF(44 	P 	POA|# I$58H$H! #%)"3"3Aq"9"9 04;3HHH(L+-"3!	  M *!,M  P&9]1=M<O&O#66 	E 1]4D D 	nmm]4EGZ$[mmmmmm ++*
 
 
 	
r%   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j	        g           d d d            n# 1 swxY w Y   t          | dd           P| j
        D ]J}t          j        |j                  5  |                    d            d d d            n# 1 swxY w Y   Id S d S r  r  r  s      r#   r   z$TFHubertEncoderStableLayerNorm.buildx  r  r  rs  r  r  r   r$  r   s   @r#   r  r  6  s|        
 
 
 
 
 
 ,0).,1#' %5
 5
 5
 5
 5
n& & & & & & & &r%   r  c                  j     e Zd ZeZd fdZddZddZdd dZe		 	 	 	 	 	 	 	 	 d!d"d            Z
 xZS )#TFHubertMainLayerr   r   c                
    t                      j        di | || _        t          |d          | _        t          |d          | _        |j        rt          |d          | _	        d S t          |d          | _	        d S )Nfeature_extractorr  feature_projectionencoderr{   )r|   r}   r   rH  r  r`  r  do_stable_layer_normr  r  r  r?  s      r#   r}   zTFHubertMainLayer.__init__  s    ""6"""!7EX!Y!Y!Y";FI]"^"^"^& 	C9&yQQQDLLL*6	BBBDLLLr%   Nc                   |                      | j        j        fddd          | _        | j        rd S d| _        t          | dd           Pt          j        | j        j	                  5  | j        
                    d            d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j	                  5  | j        
                    d            d d d            n# 1 swxY w Y   t          | dd           St          j        | j        j	                  5  | j        
                    d            d d d            d S # 1 swxY w Y   d S d S )Nr   Tmasked_spec_embed)r8   r   r  r   r  r  r  )r   r   r;  r  r   r  r   r   r  r   r   r  r  r   s     r#   r   zTFHubertMainLayer.build  s   !%;*,)tZm "1 "
 "
 : 	F
4,d33?t5:;; 3 3&,,T2223 3 3 3 3 3 3 3 3 3 3 3 3 3 34-t44@t6;<< 4 4'--d3334 4 4 4 4 4 4 4 4 4 4 4 4 4 44D))5t|011 ) )""4((() ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) 65s6   )BBB
C11C58C5+EEEinput_lengthsr@   c                z    d }t          | j        j        | j        j                  D ]\  }} ||||          }|S )zH
        Computes the output length of the convolutional layers
        c                    | |z
  |z  dz   S r   r{   )input_lengthr   strides      r#   _conv_out_lengthzLTFHubertMainLayer._get_feat_extract_output_lengths.<locals>._conv_out_length  s     !;.69A==r%   )zipr   r  r  )r   r  r  r   r  s        r#    _get_feat_extract_output_lengthsz2TFHubertMainLayer._get_feat_extract_output_lengths  sY    
	> 	> 	>
 $'t{'>@W#X#X 	Q 	QK,,]KPPMMr%   r  mask_time_indicesr  c                V   t          |          \  }}}t          | j        dd          s|S |ot          j        t          j        |ddddt          j        f         t          j                  | j        t          j        t          j        ddf         |          }n| j        j	        dk    rt          ||f| j        j	        | j        j        d          }t          j        t          j        |ddddt          j        f         t          j                  | j        t          j        t          j        ddf         |          }| j        j        dk    rUt          ||f| j        j        | j        j                  }t          j        |ddt          j        ddf         |d          }|S )z
        Masks extracted features along time axis and/or along feature axis according to
        [SpecAugment](https://huggingface.co/papers/1904.08779).
        apply_spec_augmentTNr   r   )r:   r<   r>   )r:   r<   )r   r  r   r   whererK   rT   ro   r  mask_time_probr]   mask_time_lengthmask_feature_probmask_feature_length)r   r  r
  rV   rW   r;  mask_feature_indicess          r#   _mask_hidden_statesz%TFHubertMainLayer._mask_hidden_states  s   
 4>m3L3L0
O[ t{$8$?? 	!  (H)!!!QQQ
*:;RWEE&rz2:qqq'@A MM ['!++ 5_-+4 K8	! ! ! H)!!!QQQ
*:;RWEE&rz2:qqq'@A M ;(1,,#8[)+7 K;$ $ $ 
 H%9!!!RZ:J%K]\]^^Mr%   FrU  r  token_type_idsposition_ids	head_maskinputs_embedsr  r  r  r  rm  ro   r   r   c                D   |                      t          j        |t          j                  |
          }|W|                     t          j        |d                    }t          j        |t          |          d         |j                  }| 	                    ||
          }|
                    d          }|
r|                     ||          }|                     |||||	|
          }|d         }|	s|f|dd          z   S t          ||j        |j        	          S )
Nrp  r'   r   )maxlenrG   r
  )r
  )r  r  r  r  rm  r   r  )r  r   rK   rL   r	  r   sequence_maskr   rG   r  r   r  r  r   r  r  )r   rU  r  r  r  r  r  r  r  r  rm  r   r  output_lengthsr
  encoder_outputss                   r#   r   zTFHubertMainLayer.call  sJ    ..rw|RZ/P/P[c.dd%!BB2=Q_acCdCdeeN-z-'@'@'C=K^  N ///QQ"JJ':;; 	i 44]Vg4hhM,,)/!5# ' 
 
 (* 	:!#oabb&999 +)7&1
 
 
 	
r%   rs  r   )r  r@   )r  r@   r
  r  	NNNNNNNNF)rU  r@   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rm  ro   r   r   )r   r   r   r   config_classr}   r   r	  r  r   r   r   r   s   @r#   r  r    s        L	C 	C 	C 	C 	C 	C) ) ) )$   * * * * *X  ,0+/)-&**..215#'/
 /
 /
 /
 ]/
 /
 /
 /
 /
r%   r  c                  D     e Zd ZdZeZdZdZed             Z	 fdZ
 xZS )TFHubertPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    hubertrU  c                    t          j        dt           j        d          t          j        dt           j        d          t          j        dt           j        d          dS )N)Ni>  rU  r  )NNr  r  )rU  r  r  )r   
TensorSpecrL   rN   r   s    r#   input_signaturez'TFHubertPreTrainedModel.input_signature!  sW     M-.YYY mL"(IYZZZ mL"(IYZZZ
 
 	
r%   c                     t                      j        |g|R i | t                              d| j        j         d           d S )N
z has backpropagation operations that are NOT supported on CPU. If you wish to train/fine-tune this model, you need a GPU or a TPU)r|   r}   loggerwarningr   r   r   r   r   r   r   s       r#   r}   z TFHubertPreTrainedModel.__init__)  si    3&333F333E( E E E	
 	
 	
 	
 	
r%   )r   r   r   r   r   r  base_model_prefixmain_input_namepropertyr$  r}   r   r   s   @r#   r   r     sm         
  L $O
 
 X

 
 
 
 
 
 
 
 
r%   r   a	  

    This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
    etc.)

    This model is also a [keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it
    as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to general usage and
    behavior.

    <Tip>

    TensorFlow models and layers in `transformers` accept two formats as input:

    - having all inputs as keyword arguments (like PyTorch models), or
    - having all inputs as a list, tuple or dict in the first positional argument.

    The reason the second format is supported is that Keras methods prefer this format when passing inputs to models
    and layers. Because of this support, when using methods like `model.fit()` things should "just work" for you - just
    pass your inputs and labels in any format that `model.fit()` supports! If, however, you want to use the second
    format outside of Keras methods like `fit()` and `predict()`, such as when creating your own layers or models with
    the Keras `Functional` API, there are three possibilities you can use to gather all the input Tensors in the first
    positional argument:

    - a single Tensor with `input_values` only and nothing else: `model(input_values)`
    - a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
    `model([input_values, attention_mask])` or `model([input_values, attention_mask, token_type_ids])`
    - a dictionary with one or several input Tensors associated to the input names given in the docstring:
    `model({"input_values": input_values, "token_type_ids": token_type_ids})`

    Note that when creating models and layers with
    [subclassing](https://keras.io/guides/making_new_layers_and_models_via_subclassing/) then you don't need to worry
    about any of this, as you can just pass inputs like you would to any other Python function!

    </Tip>

    Args:
        config ([`HubertConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a	  
    Args:
        input_values (`np.ndarray`, `tf.Tensor`, `list[tf.Tensor]` `dict[str, tf.Tensor]` or `dict[str, np.ndarray]` and each example must have the shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.__call__`] and
            [`PreTrainedTokenizer.encode`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`np.ndarray` or `tf.Tensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        token_type_ids (`np.ndarray` or `tf.Tensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`np.ndarray` or `tf.Tensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`np.ndarray` or `tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`np.ndarray` or `tf.Tensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_values` you can choose to directly pass an embedded representation.
            This is useful if you want more control over how to convert `input_values` indices into associated vectors
            than the model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the
            config will be used instead.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail. This argument can be used only in eager mode, in graph mode the value in the config will be
            used instead.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in
            eager mode, in graph mode the value will always be set to True.
        training (`bool`, *optional*, defaults to `False``):
            Whether or not to use the model in training mode (some modules like dropout modules have different
            behaviors between training and evaluation).
zbThe bare TFHubert Model transformer outputting raw hidden-states without any specific head on top.c                       e Zd Zd fdZ ee           eee          e		 	 	 	 	 	 	 	 	 ddd                                    Z
ddZ xZS )TFHubertModelr   r   c                |     t                      j        |g|R i | || _        t          |d          | _        d S )Nr!  r  )r|   r}   r   r  r!  r)  s       r#   r}   zTFHubertModel.__init__  sI    3&333F333'X>>>r%   output_typer  NFrU  r@   r  r  r  r  r  r  r  r  r  r  rm  ro   r?   r  c                    |r|n| j         j        }|r|n| j         j        }|	r|	n| j         j        }	|                     |||||||||	|

  
        }|S )a!  

        Returns:

        Example:

        ```python
        >>> from transformers import AutoProcessor, TFHubertModel
        >>> from datasets import load_dataset

        >>> processor = AutoProcessor.from_pretrained("facebook/hubert-large-ls960-ft")
        >>> model = TFHubertModel.from_pretrained("facebook/hubert-large-ls960-ft")


        >>> def map_to_array(example):
        ...     example["speech"] = example["audio"]["array"]
        ...     return example


        >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
        >>> ds = ds.map(map_to_array)

        >>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values  # Batch size 1
        >>> hidden_states = model(input_values).last_hidden_state
        ```
rU  r  r  r  r  r  r  r  r  rm  )r   r  r  r  r!  )r   rU  r  r  r  r  r  r  r  r  rm  r   s               r#   r   zTFHubertModel.call  s    T 8Lq33QUQ\Qq1Be--He%0Mkkdk6M++%))%'/!5#  
 
 r%   c                    | j         rd S d| _         t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTr!  )r   r  r   r   r!  r   r   r   s     r#   r   zTFHubertModel.build  s    : 	F
44((4t{/00 ( (!!$'''( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( 54s    A((A,/A,rs  r  )rU  r@   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rm  ro   r?   r  r   )r   r   r   r}   r   HUBERT_INPUTS_DOCSTRINGr   r   _CONFIG_FOR_DOCr   r   r   r   r   s   @r#   r.  r.    s        
? ? ? ? ? ?
 +*+BCC+<?[[[ ,0+/)-&**.)-,0#'8 8 8 8 ] \[ DC8t( ( ( ( ( ( ( (r%   r.  zfTFHubert Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).c                       e Zd Zd fdZd Zd Z ee           ee	e
          e	 	 	 	 	 	 	 	 	 	 ddd                                    ZddZ xZS )TFHubertForCTCr   r   c                j    t                      j        |g|R i | t          |d          | _        t          j                            |j                  | _        t          j        	                    |j
        d          | _        t          |d          r|j        r|j        n|j        | _        d S )Nr!  r  lm_headadd_adapter)r|   r}   r  r!  r   r   rj  final_dropoutrl  rh  
vocab_sizer:  hasattrr;  output_hidden_sizer;  r)  s       r#   r}   zTFHubertForCTC.__init__  s    3&333F333'X>>>|++F,@AA|))&*;))LL)0)G)GvFL^vF%%djdv 	r%   c                b    t          j        dt                     |                                  dS )z
        Calling this function will disable the gradient computation for the feature encoder so that its parameters will
        not be updated during training.
        zThe method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5. Please use the equivalent `freeze_feature_encoder` method instead.N)r[  r\  r^  freeze_feature_encoderr   s    r#   freeze_feature_extractorz'TFHubertForCTC.freeze_feature_extractor  s;    
 	Q	
 	
 	

 	##%%%%%r%   c                (    d| j         j        _        dS )z
        Calling this function will disable the gradient computation for the feature encoder so that its parameter will
        not be updated during training.
        FN)r!  r  r  r   s    r#   rA  z%TFHubertForCTC.freeze_feature_encoder  s    
 38%///r%   r0  NFrU  r@   r  r  r  r  r  r  r  r  labelsr  r  rm  r?   #TFCausalLMOutput | tuple[tf.Tensor]c                   |>t          j        |          | j        j        k    rt	          d| j        j                   |                     ||||||||	|
|
  
        }|d         }|                     ||          }|                     |          }|-||nt          j        |t           j	                  }| j        
                    t          j        |d                    }t          j        |dk    t           j                  }t          j        |d          }t           j                            ||||| j        j        d	
          }| j        j        dk    r)t          j        |          }t          j        |d          }| j        j        dk    r)t          j        |          }t          j        |d          }nd}|
s|f|dd         z   }||f|z   n|S t)          |||j        |j                  S )a  
        labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_values` docstring) Tokens with indices set to `-100` are ignored (masked),
            the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`

        Returns:

        Example:

        ```python
        >>> import tensorflow as tf
        >>> from transformers import AutoProcessor, TFHubertForCTC
        >>> from datasets import load_dataset

        >>> processor = AutoProcessor.from_pretrained("facebook/hubert-large-ls960-ft")
        >>> model = TFHubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft")


        >>> def map_to_array(example):
        ...     example["speech"] = example["audio"]["array"]
        ...     return example


        >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
        >>> ds = ds.map(map_to_array)

        >>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values  # Batch size 1
        >>> logits = model(input_values).logits
        >>> predicted_ids = tf.argmax(logits, axis=-1)

        >>> transcription = processor.decode(predicted_ids[0])

        >>> # compute loss
        >>> target_transcription = "A MAN SAID TO THE UNIVERSE SIR I EXIST"

        >>> # Pass the transcription as text to encode labels
        >>> labels = processor(text=transcription, return_tensors="tf").input_values

        >>> loss = model(input_values, labels=labels).loss
        ```Nz$Label values must be <= vocab_size: r3  r   rp  rF   r'   r(   F)logitsrD  logit_lengthlabel_lengthblank_indexlogits_time_majorsumrE   r   r   )lossrG  r  r  )r   
reduce_maxr   r=  rH   r!  rl  r:  rU   rL   r	  r   rK   rN   r   ctc_losspad_token_idctc_loss_reductionr*   reduce_meanr   r  r  )r   rU  r  r  r  r  r  r  rD  r  r  rm  r   r  rG  r  labels_masktarget_lengthsrM  r  s                       r#   r   zTFHubertForCTC.call  s   t "-"7"74;;Q"Q"Q\DKDZ\\]]]++%))%'/!5#  
 
  
]XFFm,,"0"<",|cecmBnBnBn  !KHHWelnIoIoIoppM '&A+rx88K];R@@@N5>>*+ K4"' "  D {-66}T**z$--{-77~d++z$--D 	FY,F)-)9TGf$$vE!/)	
 
 
 	
r%   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           [t          j        | j        j                  5  | j                            d d | j        g           d d d            d S # 1 swxY w Y   d S d S )NTr!  r:  )	r   r  r   r   r!  r   r   r:  r?  r   s     r#   r   zTFHubertForCTC.build{  sq   : 	F
44((4t{/00 ( (!!$'''( ( ( ( ( ( ( ( ( ( ( ( ( ( (4D))5t|011 J J""D$0G#HIIIJ J J J J J J J J J J J J J J J J J 65s$    A''A+.A+!#CCCrs  )
NNNNNNNNNF)rU  r@   r  r  r  r  r  r  r  r  r  r  r  r  rD  r  r  r  r  r  rm  r  r?   rE  r   )r   r   r   r}   rB  rA  r   r5  r   r   r6  r   r   r   r   r   s   @r#   r8  r8    s        

 
 
 
 
 

& 
& 
&8 8 8 +*+BCC+;/ZZZ ,0+/)-&**.)-#',0#' %q
 q
 q
 q
 ] [Z DCq
f	J 	J 	J 	J 	J 	J 	J 	Jr%   r8  )r8  r.  r   r!  )
r8   r9   r:   r;   r<   r=   r>   r=   r?   r@   r   )r^   r@   r_   r`   )Br   
__future__r   r[  typingr   numpyr  
tensorflowr   activations_tfr   modeling_tf_outputsr   r   modeling_tf_utilsr	   r
   r   r   r   tf_utilsr   r   utilsr   r   r   r   configuration_hubertr   
get_loggerr   r'  r6  rc   r$   r7   r]   rg   r   Layerri   r  r   r	  r&  r4  r9  r>  rH  rY  r`  rw  r  r  r  r  r  r  r   HUBERT_START_DOCSTRINGr5  r.  r8  __all__r{   r%   r#   <module>rd     sR     " " " " " "                / / / / / / F F F F F F F F              3 2 2 2 2 2 2 2            / . . . . . 
	H	%	%    O O O( 	G G G G GV
6 
6 
6 
6 
6U U U U U* U U Ur5 5 5 5 5u|2 5 5 5r@ @ @ @ @5<#5 @ @ @<G G G G G!3 G G GFG G G G G!3 G G GFG G G G Gel&8 G G G<    5<-    '  '  '  '  'U\/  '  '  'F
 
 
 
 
5 
 
 
N N N N N 2 N N NBgB gB gB gB gB* gB gB gBV)U )U )U )U )U%,, )U )U )UZ8S 8S 8S 8S 8S5<- 8S 8S 8Sx6S 6S 6S 6S 6S%,*< 6S 6S 6StM& M& M& M& M&el( M& M& M&bO& O& O& O& O&U\%7 O& O& O&d K
 K
 K
 K
 K
* K
 K
 K
\
 
 
 
 
/ 
 
 
4( T5 p h I( I( I( I( I(+ I( I(	 I(X p ]J ]J ]J ]J ]J, ]J ]J	 ]J@ I
H
Hr%   