
     `i2                   ,   d Z ddlmZ ddlZddlmZ ddlmZ ddlZ	ddl
ZddlmZ ddlmZmZmZ dd	lmZmZmZmZmZ dd
lmZmZ ddlmZmZmZmZm Z  ddl!m"Z"  ej#        e$          Z%dZ&dZ'dZ(dZ)e G d de                      Z*d Z+d Z,	 dSdTdZ-dUdVd#Z. G d$ d%ej/        j0                  Z1 G d& d'ej/        j2                  Z3 G d( d)ej/        j0                  Z4 G d* d+ej/        j0                  Z5 G d, d-ej/        j0                  Z6 G d. d/ej/        j0                  Z7 G d0 d1ej/        j0                  Z8 G d2 d3ej/        j0                  Z9 G d4 d5e9          Z: G d6 d7ej/        j0                  Z; G d8 d9ej/        j0                  Z< G d: d;ej/        j0                  Z= G d< d=ej/        j0                  Z> G d> d?ej/        j0                  Z? G d@ dAej/        j0                  Z@ G dB dCej/        j0                  ZAe G dD dEej/        j0                              ZB G dF dGe          ZCdHZDdIZE edJeD           G dK dLeC                      ZF edMeD           G dN dOeC                      ZG G dP dQeC          ZHg dRZIdS )WzTensorFlow Wav2Vec2 model.    )annotationsN)	dataclass)Any   )get_tf_activation)TFBaseModelOutputTFCausalLMOutputTFSequenceClassifierOutput)TFPreTrainedModelget_initializerkeraskeras_serializableunpack_inputs)
shape_liststable_softmax)ModelOutputadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )Wav2Vec2Config   zfacebook/wav2vec2-base-960hr   g    חc                  L    e Zd ZU dZdZded<   dZded<   dZded<   dZded<   dS )	TFWav2Vec2BaseModelOutputa1  
    Output type of [`TFWav2Vec2BaseModelOutput`], with potential hidden states and attentions.

    Args:
        last_hidden_state (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        extract_features (`tf.Tensor` of shape `(batch_size, sequence_length, conv_dim[-1])`):
            Sequence of extracted feature vectors of the last convolutional layer of the model.
        hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
            `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    Ntf.Tensor | Nonelast_hidden_stateextract_featuresztuple[tf.Tensor] | Nonehidden_states
attentions)	__name__
__module____qualname____doc__r   __annotations__r   r   r         /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/wav2vec2/modeling_tf_wav2vec2.pyr   r   :   sd          * +/....)------1M1111*.J......r'   r   c                    t           j                            t           j                            t          |           dd                     }t           j                            | |z   |          \  }}|S )z
    Categorical sampling without replacement is currently not implemented. The gumbel-max trick will do for now - see
    https://github.com/tensorflow/tensorflow/issues/9260 for more info
    r   r   )tfmathlograndomuniformr   nntop_k)distributionnum_samplesz_indicess        r(   _sample_without_replacementr6   W   sY    
 
RY&&z,'?'?AFF	G	GGA\A-{;;JAwNr'   c           
        t          |          }t          j        t          j        t          j        t          j        |d                   d          |          ddg          }t          j        t          j        |t          j        |ddg          gd                    }t          j        |t          j        | dg          |          S )zT
    Scatter function as in PyTorch with indices in format (batch_dim, indices)
    r   axisr   )	r   r*   reshapebroadcast_toexpand_dimsrange	transposeconcat
scatter_nd)valuesbatch_indicesoutput_shapeindices_shapebroad_casted_batch_dimspair_indicess         r(    _scatter_values_on_batch_indicesrH   a   s     }--M j
rxa0@'A'AKKK][[^_ac]d  <	+BBJ}_`bd^eDfDf*gij k kllL=rz&2$'?'?NNNr'   shapetuple[int, int]	mask_probfloatmask_lengthint	min_masksreturn	tf.Tensorc           	     T   | \  }}|dk     rt          d          t          j                            ||d| d| d           |t          j        |t          j                  z  |z  t          j                            d          z   }t          j        ||          }t          j        |t          j	                  }t          j
                            ||z  |          }t          j        |          }t          j        ||ft          j	                  }t          j        |||dz
  z
  f          }t          ||          }	t          j        |	d	          }	t          j        |	dd|f          }	t          j        |	|||z  f          }	t          j        |          t          j        t          j        d
d
f         }
t          j        |
||df          }
t          j        |
|||z  f          }
|	|
z   }	t+          t          j        |	          |	t          j        |                    }|S )a  
    Computes random mask spans for a given shape

    Args:
        shape: the shape for which to compute masks.
            should be of size 2 where first element is batch size and 2nd is timesteps
        attention_mask: optional padding mask of the same size as shape, which will prevent masking padded elements
        mask_prob:
            probability for each token to be chosen as start of the span to be masked. this will be multiplied by
            number of timesteps divided by length of mask span to mask approximately this percentage of all elements.
            however due to overlaps, the actual number will be smaller (unless no_overlap is True)
        mask_length: size of the mask
        min_masks: minimum number of masked spans

    Adapted from [fairseq's
    data_utils.py](https://github.com/pytorch/fairseq/blob/e0788f7007a8473a76db573985031f3c94201e79/fairseq/data/data_utils.py#L376).
    r   z&`mask_length` has to be bigger than 0.zO`mask_length` has to be smaller than `sequence_length`, but got `mask_length`: z and `sequence_length`: `messager   dtyper8   N)
ValueErrorr*   	debuggingassert_lesscastfloat32r-   r.   maximumint32r+   minimumsqueezezerosonesr6   r=   tiler;   r>   newaxisrH   	ones_likerI   )rI   rK   rM   rO   
batch_sizesequence_lengthnum_masked_spansspec_aug_maskuniform_distspec_aug_mask_idxsoffsetss              r(   _compute_mask_indicesrn   p   s4   . #(JQABBBL6^i 6 6#26 6 6	     !27?BJ#G#GG+UXZXaXiXijnXoXooz"2I>>w/:: w+'EGWXXz"233 Hj/:"(KKKM 7J;?(KLMML 5\CSTT (:B??!3aK5HII$6EUXcEc8deeh{##BJ
AAA$=>Ggg
,<a@AAGj:/?+/M"NOOG+g5 5
'((*<bh}>U>U M r'   masktgt_len
int | Nonec                    t          |           d         }||n|}t          j        d          }t          j        | |j                  } t          j        | ddddddf         dd|df          }||z
  t          z  S )z_
    Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
    r   Ng      ?rW   )r   r*   constantr\   rX   rd   LARGE_NEGATIVE)ro   rp   src_lenone_cstexpanded_masks        r(   _expand_maskrx      s     q!G ,gg'Gk#G74w}---DGDD$!12Q7A4FGGMm#~55r'   c                       e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 d*d+ fdZ fdZd Z fdZd Zd Z	d  Z
d! Zd" Zd# Zd$ Zd% Zd& Zd' Zd( Zd) Z xZS ),TFWav2Vec2GroupNormzp
    From tensorflow-addons https://www.tensorflow.org/addons/api_docs/python/tfa/layers/GroupNormalization
        r8   MbP?Trb   rc   NgroupsrN   r:   epsilonrL   centerboolscalebeta_initializerkeras.initializers.Initializergamma_initializerbeta_regularizerkeras.regularizers.Regularizergamma_regularizerbeta_constraintkeras.constraints.Constraintgamma_constraintc                f    t                      j        di | d| _        || _        || _        || _        || _        || _        t          j	        
                    |          | _        t          j	        
                    |          | _        t          j        
                    |          | _        t          j        
                    |	          | _        t          j        
                    |
          | _        t          j        
                    |          | _        |                                  d S )NTr&   )super__init__supports_maskingr}   r:   r~   r   r   r   initializersgetr   r   regularizersr   r   constraintsr   r   _check_axis)selfr}   r:   r~   r   r   r   r   r   r   r   r   kwargs	__class__s                r(   r   zTFWav2Vec2GroupNorm.__init__   s     	""6""" $	
 % 2 6 67G H H!&!3!7!78I!J!J % 2 6 67G H H!&!3!7!78I!J!J$044_EE % 1 5 56F G Gr'   c                T   |                      |           |                     |           |                     |           |                     |           |                     |           |                     |           d| _        t                                          |           d S )NT)	_check_if_input_shape_is_none'_set_number_of_groups_for_instance_norm_check_size_of_dimensions_create_input_spec_add_gamma_weight_add_beta_weightbuiltr   buildr   input_shaper   s     r(   r   zTFWav2Vec2GroupNorm.build   s    **;77744[AAA&&{333,,,{+++k***
k"""""r'   c                2   t           j                            |          }t          j        |          }|                     |||          \  }}|                     ||          }|| j                 | j        z  dk    }|st          j	        ||          }n|}|S Nr   )
r   backend	int_shaper*   rI   _reshape_into_groups_apply_normalizationr:   r}   r;   )	r   inputsr   tensor_input_shapereshaped_inputsgroup_shapenormalized_inputsis_instance_normoutputss	            r(   callzTFWav2Vec2GroupNorm.call   s    m--f55Xf--'+'@'@Vh'i'i$ 55o{SS'	2dkAaG 	(j!24FGGGG'Gr'   c                6   | j         | j        | j        | j        | j        t
          j                            | j                  t
          j                            | j	                  t
          j
                            | j                  t
          j
                            | j                  t
          j                            | j                  t
          j                            | j                  d}t!                                                      }i ||S )N)r}   r:   r~   r   r   r   r   r   r   r   r   )r}   r:   r~   r   r   r   r   	serializer   r   r   r   r   r   r   r   r   
get_config)r   configbase_configr   s      r(   r   zTFWav2Vec2GroupNorm.get_config  s    kI|kZ % 2 < <T=R S S!&!3!=!=d>T!U!U % 2 < <T=R S S!&!3!=!=d>T!U!U$0::4;OPP % 1 ; ;D<Q R R
 
 gg((**(+(((r'   c                    |S Nr&   r   r   s     r(   compute_output_shapez(TFWav2Vec2GroupNorm.compute_output_shape  s    r'   c                f   fdt          t          |                    D             }|| j                 | j        z  dk    }|sj|| j                 | j        z  || j        <   |                    | j        | j                   t          j        |          }t          j        ||          }||fS ||fS )Nc                     g | ]
}|         S r&   r&   ).0ir   s     r(   
<listcomp>z<TFWav2Vec2GroupNorm._reshape_into_groups.<locals>.<listcomp>  s    NNN)!,NNNr'   r   )r>   lenr:   r}   insertr*   stackr;   )r   r   r   r   r   r   r   s      `   r(   r   z(TFWav2Vec2GroupNorm._reshape_into_groups  s    NNNNeC<L<L6M6MNNN'	2dkAaG 	'%0%;t{%JK	"ty$+666(;//K j==O"K//;&&r'   c                $   t           j                            |          }t          t	          dt          |                              }|| j                 | j        z  dk    }|s| j        dk    rdn	| j        dz
  }n| j        dk    rdn	| j        dz
  }|                    |           t          j
                            ||d          \  }}|                     |          \  }	}
t          j
                            ||||	|
| j                  }|S )Nr   r8   T)keepdims)meanvariancer   offsetvariance_epsilon)r   r   r   listr>   r   r:   r}   popr*   r/   moments_get_reshaped_weightsbatch_normalizationr~   )r   r   r   r   group_reduction_axesr   r:   r   r   gammabetar   s               r(   r   z(TFWav2Vec2GroupNorm._apply_normalization&  s   m--o>>#E!S-=-=$>$>??'	2dkAaG 	<b22di!mDDb22di!mD  &&&8LW[\\h00==tE55!\ 6 
 
 ! r'   c                    |                      |          }d }d }| j        rt          j        | j        |          }| j        rt          j        | j        |          }||fS r   )_create_broadcast_shaper   r*   r;   r   r   r   )r   r   broadcast_shaper   r   s        r(   r   z)TFWav2Vec2GroupNorm._get_reshaped_weights=  sc    66{CC: 	<Jtz?;;E; 	::di99Dd{r'   c                    || j                  }|:t          dt          | j                   z   dz   t          |          z   dz             d S )NzAxis z\ of input tensor should have a defined dimension but the layer received an input with shape .)r:   rY   strr   r   dims      r(   r   z1TFWav2Vec2GroupNorm._check_if_input_shape_is_noneH  sd    $)$;di..!pq k""# 	   ;r'   c                H    || j                  }| j        dk    r	|| _        d S d S Nr8   )r:   r}   r   s      r(   r   z;TFWav2Vec2GroupNorm._set_number_of_groups_for_instance_normS  s.    $)$;"DKKK r'   c                :   || j                  }|| j        k     r:t          dt          | j                  z   dz   t          |          z   dz             || j        z  dk    r:t          dt          | j                  z   dz   t          |          z   dz             d S )NzNumber of groups (z.) cannot be more than the number of channels ().r   z0) must be a multiple of the number of channels ()r:   r}   rY   r   r   s      r(   r   z-TFWav2Vec2GroupNorm._check_size_of_dimensionsY  s    $)$$dk""#BC c(( 	   !!$dk""#DE c(( 	   "!r'   c                :    | j         dk    rt          d          d S )Nr   zdYou are trying to normalize your batch axis. Do you want to use tf.layer.batch_normalization instead)r:   rY   r   s    r(   r   zTFWav2Vec2GroupNorm._check_axism  s)    9>>v   >r'   c                    || j                  }t          j                            t	          |          | j         |i          | _        d S )N)ndimaxes)r:   r   layers	InputSpecr   
input_specr   s      r(   r   z&TFWav2Vec2GroupNorm._create_input_specs  s>    $)$,00c+6F6FdiY\M]0^^r'   c                    || j                  }|f}| j        r0|                     |d| j        | j        | j                  | _        d S d | _        d S )Nr   rI   nameinitializerregularizer
constraint)r:   r   
add_weightr   r   r   r   r   r   r   rI   s       r(   r   z%TFWav2Vec2GroupNorm._add_gamma_weightw  sb    $)$: 		 2 20 )  DJJJ DJJJr'   c                    || j                  }|f}| j        r0|                     |d| j        | j        | j                  | _        d S d | _        d S )Nr   r   )r:   r   r   r   r   r   r   r   s       r(   r   z$TFWav2Vec2GroupNorm._add_beta_weight  sb    $)$; 		 1 1/ (  DIII DIIIr'   c                    dgt          |          z  }|| j                 | j        z  dk    }|s>|| j                 | j        z  || j        <   |                    | j        | j                   n| j        || j        <   |S r   )r   r:   r}   r   )r   r   r   r   s       r(   r   z+TFWav2Vec2GroupNorm._create_broadcast_shape  s    #K 0 00'	2dkAaG 	5)4TY)?4;)NODI&""49dk::::)-ODI&r'   )r{   r8   r|   TTrb   rc   NNNN)r}   rN   r:   rN   r~   rL   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r!   r"   r#   r$   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   __classcell__r   s   @r(   rz   rz      sg         ;B<B;?<@8<9=      <	# 	# 	# 	# 	#   ) ) ) ) )"  
' 
' 
'! ! !.	 	 		 	 	    (  _ _ _          r'   rz   c                  B     e Zd ZdZ fdZd Zd Z fdZ fdZ xZ	S )TFWav2Vec2WeightNormConv1DzeAdapted from https://www.tensorflow.org/probability/api_docs/python/tfp/layers/weight_norm/WeightNormc           
          t                      j        d|||dddd| || _        d| _        t	          j        ddg          | _        d S )	NvalidT	he_normal)filterskernel_sizer}   paddinguse_biasbias_initializerr   r   r   r&   )r   r   explicit_paddingfilter_axisr*   rs   kernel_norm_axes)r   r   r   r}   r   r   r   s         r(   r   z#TFWav2Vec2WeightNormConv1D.__init__  sr     	
#(	
 	
 	
 	
 	
 !1 "QF 3 3r'   c                    t          j        t          j        t          j        | j                  | j                            }| j                            |ddt           j        t           j        f                    dS )z"Set the norm of the weight vector.r9   N)	r*   sqrt
reduce_sumsquareweight_vr   weight_gassignre   )r   kernel_norms     r(   
_init_normz%TFWav2Vec2WeightNormConv1D._init_norm  s_    gbmBIdm,D,D4K`aaabb[BJ
)BCDDDDDr'   c                    t           j                            | j        | j                  t          j        | j                  z  }t          j        |          | _        dS )zGenerate normalized weights.r9   N)r*   r/   l2_normalizer  r   r?   r  kernel)r   r  s     r(   _normalize_kernelz,TFWav2Vec2WeightNormConv1D._normalize_kernel  sK    ##DM8M#NNQSQ]^b^kQlQlll6**r'   c                   | j         st                                          |           t          j        t          j        | j                  dd          | _        | j        | _        |                     dt          | j        j
        | j                           ddfd| j        j        d          | _        |                                  |                     d| j        fd	d
          | _        d S d S )Nr  T)r   	trainabler  r   rc   )r   rI   r   rX   r  biasrb   )r   rI   r   r  )r   r   r   r*   Variabler?   r  r  r   rN   rI   r   rX   r  r  r   r  r   s     r(   r   z TFWav2Vec2WeightNormConv1D.build  s    z 	qGGMM+&&&+bl4;&?&?j\`aaaDK KDM OO4=.t/?@AA1aH"m) ,  DM OOVDL?X_koppDIII	q 	qr'   c                    |                                   t          j        |d| j        | j        fdf          }t	                                          |          }|S )N)r   r   )r	  r*   padr   r   r   )r   r   padded_inputsoutputr   s       r(   r   zTFWav2Vec2WeightNormConv1D.call  sU     	   v1FH]0^`f'ghhm,,r'   )
r!   r"   r#   r$   r   r  r	  r   r   r   r   s   @r(   r   r     s        oo4 4 4 4 4E E E
+ + +
q q q q q"	 	 	 	 	 	 	 	 	r'   r   c                  2     e Zd Zdd fd
ZddZddZ xZS )TFWav2Vec2NoLayerNormConvLayerr   r   r   layer_idrN   r   r   rP   Nonec                V    t                      j        di | |dk    r|j        |         nd| _        |j        |         | _        t
          j                            | j        |j        |         |j	        |         |j
        d          | _        t          |j                  | _        d S )Nr   r   convr   r   stridesr   r   r&   )r   r   conv_dimin_conv_dimout_conv_dimr   r   Conv1Dconv_kernelconv_stride	conv_biasr  r   feat_extract_activation
activationr   r   r  r   r   s       r(   r   z'TFWav2Vec2NoLayerNormConvLayer.__init__  s    ""6"""8@16?844!"OH5L''%*84&x0% ( 
 
	 ,F,JKKr'   r   rQ   c                Z    |                      |          }|                     |          }|S r   )r  r"  r   r   s     r(   r   z#TFWav2Vec2NoLayerNormConvLayer.call  s*    		-0066r'   Nc                    | j         rd S d| _         t          | dd           [t          j        | j        j                  5  | j                            d d | j        g           d d d            d S # 1 swxY w Y   d S d S NTr  )r   getattrr*   
name_scoper  r   r   r  r   s     r(   r   z$TFWav2Vec2NoLayerNormConvLayer.build  s    : 	F
4&&2ty~.. @ @	tT-= >???@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ 32s    #A00A47A4r   r   r   r  rN   r   r   rP   r  r   rQ   rP   rQ   r   r!   r"   r#   r   r   r   r   r   s   @r(   r  r    sy        L L L L L L L   
@ @ @ @ @ @ @ @r'   r  c                  2     e Zd Zdd fd
ZddZddZ xZS )TFWav2Vec2LayerNormConvLayerr   r   r   r  rN   r   r   rP   r  c                    t                      j        di | |dk    r|j        |         nd| _        |j        |         | _        t
          j                            | j        |j        |         |j	        |         |j
        d          | _        t
          j                            d|j                  | _        t          |j                  | _        d S )Nr   r   r  r  
layer_norm)r   r~   r&   )r   r   r  r  r  r   r   r  r  r  r   r  LayerNormalizationlayer_norm_epsr1  r   r!  r"  r#  s       r(   r   z%TFWav2Vec2LayerNormConvLayer.__init__  s    ""6"""8@16?844!"OH5L''%*84&x0% ( 
 
	  ,99|U[Uj9kk+F,JKKr'   r   rQ   c                    |                      |          }|                     |          }|                     |          }|S r   r  r1  r"  r%  s     r(   r   z!TFWav2Vec2LayerNormConvLayer.call  ;    		-006666r'   Nc                   | j         rd S d| _         t          | dd           Xt          j        | j        j                  5  | j                            d d | j        g           d d d            n# 1 swxY w Y   t          | dd           [t          j        | j        j                  5  | j                            d d | j	        g           d d d            d S # 1 swxY w Y   d S d S NTr  r1  
r   r(  r*   r)  r  r   r   r  r1  r  r   s     r(   r   z"TFWav2Vec2LayerNormConvLayer.build     : 	F
4&&2ty~.. @ @	tT-= >???@ @ @ @ @ @ @ @ @ @ @ @ @ @ @4t,,8t344 G G%%tT43D&EFFFG G G G G G G G G G G G G G G G G G 98$    #A//A36A3)#CC Cr*  r+  r,  r   r-  r   s   @r(   r/  r/    sy        L L L L L L L   	G 	G 	G 	G 	G 	G 	G 	Gr'   r/  c                  2     e Zd Zdd fd
ZddZddZ xZS )TFWav2Vec2GroupNormConvLayerr   r   r   r  rN   r   r   rP   r  c                    t                      j        di | |dk    r|j        |         nd| _        |j        |         | _        t
          j                            | j        |j        |         |j	        |         |j
        d          | _        t          |j                  | _        t          | j        |j        d          | _        d S )Nr   r   r  r  r1  )r}   r~   r   r&   )r   r   r  r  r  r   r   r  r  r  r   r  r   r!  r"  rz   r3  r1  r#  s       r(   r   z%TFWav2Vec2GroupNormConvLayer.__init__  s    ""6"""8@16?844!"OH5L''%*84&x0% ( 
 
	 ,F,JKK-$f.C,
 
 
r'   r   rQ   c                    |                      |          }|                     |          }|                     |          }|S r   r5  r%  s     r(   r   z!TFWav2Vec2GroupNormConvLayer.call)  r6  r'   Nc                   | j         rd S d| _         t          | dd           Xt          j        | j        j                  5  | j                            d d | j        g           d d d            n# 1 swxY w Y   t          | dd           [t          j        | j        j                  5  | j                            d d | j	        g           d d d            d S # 1 swxY w Y   d S d S r8  r9  r   s     r(   r   z"TFWav2Vec2GroupNormConvLayer.build/  r:  r;  r*  r+  r,  r   r-  r   s   @r(   r=  r=    sr        
 
 
 
 
 
 
"   	G 	G 	G 	G 	G 	G 	G 	Gr'   r=  c                  0     e Zd Zd fdZdd
ZddZ xZS )!TFWav2Vec2PositionalConvEmbeddingr   r   r   r   rP   r  c                    t                      j        di | t          |j        |j        |j        |j        dz  d          | _        t          |j                  | _        t          |j
                  | _        || _        d S )Nr   r  )r   r   r}   r   r   r&   )r   r   r   hidden_sizenum_conv_pos_embeddingsnum_conv_pos_embedding_groupsr  TFWav2Vec2SamePadLayerr   r   r!  r"  r   r   r   r   r   s      r(   r   z*TFWav2Vec2PositionalConvEmbedding.__init__<  s    ""6""".&67#;q@
 
 
	 .f.LMM+F,JKKr'   r   rQ   c                    |                      |          }|                     |          }|                     |          }|S r   )r  r   r"  r%  s     r(   r   z&TFWav2Vec2PositionalConvEmbedding.callI  s;    		-00]3366r'   Nc                   | j         rd S d| _         t          | dd           `t          j        | j        j                  5  | j                            d d | j        j        g           d d d            d S # 1 swxY w Y   d S d S r'  )	r   r(  r*   r)  r  r   r   r   rD  r   s     r(   r   z'TFWav2Vec2PositionalConvEmbedding.buildO  s    : 	F
4&&2ty~.. G G	tT[-D EFFFG G G G G G G G G G G G G G G G G G 32s    (A55A9<A9r   r   r   r   rP   r  r,  r   r-  r   s   @r(   rB  rB  ;  sm                G G G G G G G Gr'   rB  c                  $     e Zd Z fdZd Z xZS )rG  c                ^     t                      j        di | |dz  dk    rdnd| _        d S )Nr   r   r   r&   )r   r   num_pad_remove)r   rE  r   r   s      r(   r   zTFWav2Vec2SamePadLayer.__init__Y  sA    ""6"""#:Q#>!#C#Caar'   c                J    | j         dk    r|d d d | j          d d f         }|S )Nr   )rN  r%  s     r(   r   zTFWav2Vec2SamePadLayer.call]  s;    "")!!!-C0C/C-CQQQ*FGMr'   )r!   r"   r#   r   r   r   r   s   @r(   rG  rG  X  sL        K K K K K      r'   rG  c                  .     e Zd Zd fdZd Zdd
Z xZS )TFWav2Vec2FeatureEncoderr   r   r   r   rP   r  c                ^    t                      j        di | j        dk    r;t          ddd           gfdt	          j        dz
            D             z   }nDj        dk    r!fdt	          j                  D             }nt          d	j         d
          || _        d S )Ngroupr   conv_layers.r  r   c           	     F    g | ]}t          |d z   d|d z              S )r   rT  rU  )r  r   r   r   s     r(   r   z5TFWav2Vec2FeatureEncoder.__init__.<locals>.<listcomp>h  sR     i i i /vALb[\_`[`LbLbccci i ir'   r   layerc                :    g | ]}t          |d |           S )rT  rU  )r/  rW  s     r(   r   z5TFWav2Vec2FeatureEncoder.__init__.<locals>.<listcomp>m  sB        -VaFXUVFXFXYYY  r'   z`config.feat_extract_norm` is z), but has to be one of ['group', 'layer']r&   )r   r   feat_extract_normr=  r>   num_feat_extract_layersrY   conv_layers)r   r   r   r\  r   s    `  r(   r   z!TFWav2Vec2FeatureEncoder.__init__d  s   ""6"""#w..7Qc`aQcQcddde i i i iv=ABBi i i KK %00   v=>>  KK
 t1Ittt   'r'   c                Z    t          j        |d          }| j        D ]} ||          }|S r   )r*   r=   r\  )r   input_valuesr   
conv_layers       r(   r   zTFWav2Vec2FeatureEncoder.callw  s<    |R88* 	6 	6J&J}55MMr'   Nc                    | j         rd S d| _         t          | dd           P| j        D ]J}t          j        |j                  5  |                    d            d d d            n# 1 swxY w Y   Id S d S )NTr\  )r   r(  r\  r*   r)  r   r   )r   r   r_  s      r(   r   zTFWav2Vec2FeatureEncoder.build}  s    : 	F
4--9". + +
]:?33 + +$$T***+ + + + + + + + + + + + + + + :9+ +s   A&&A*	-A*	rK  r   r-  r   s   @r(   rQ  rQ  c  s`        ' ' ' ' ' '&  + + + + + + + +r'   rQ  c                       e Zd Z fdZ xZS )TFWav2Vec2FeatureExtractorc                     t                      j        |fi | t          j        d| j        j         d| j        j        d         j         dt                     d S )NzThe class `zD` has been depreciated and will be removed in Transformers v5. Use `r   z
` instead.)r   r   warningswarnr   r!   	__bases__FutureWarningrH  s      r(   r   z#TFWav2Vec2FeatureExtractor.__init__  s    **6***E$.1 E EN,Q/8E E E 		
 	
 	
 	
 	
r'   )r!   r"   r#   r   r   r   s   @r(   rb  rb    s8        
 
 
 
 
 
 
 
 
r'   rb  c                  2     e Zd Zd fdZddd
ZddZ xZS )TFWav2Vec2FeatureProjectionr   r   c                p    t                      j        di | t          j                            |j        d          | _        t          j                            |j        t          |j
                  dd          | _        t          j                            |j                  | _        || _        d S )Nr1  r~   r   rb   
projectionunitskernel_initializerr   r   )rater&   )r   r   r   r   r2  r3  r1  DenserD  r   initializer_rangerl  Dropoutfeat_proj_dropoutdropoutr   rH  s      r(   r   z$TFWav2Vec2FeatureProjection.__init__  s    ""6""",99&BW^j9kk,,,$.v/GHH$	 - 
 
 |++1I+JJr'   Fr   rQ   trainingr   rP   c                    |                      |          }|                     |          }|                     ||          }||fS Nrv  )r1  rl  ru  )r   r   rv  norm_hidden_statess       r(   r   z TFWav2Vec2FeatureProjection.call  sG    !__];;(:;;]XFF000r'   Nc                   | j         rd S d| _         t          | dd           ct          j        | j        j                  5  | j                            d d | j        j        d         g           d d d            n# 1 swxY w Y   t          | dd           ft          j        | j	        j                  5  | j	                            d d | j        j        d         g           d d d            d S # 1 swxY w Y   d S d S )NTr1  r8   rl  )
r   r(  r*   r)  r1  r   r   r   r  rl  r   s     r(   r   z!TFWav2Vec2FeatureProjection.build  s   : 	F
4t,,8t344 N N%%tT4;3G3K&LMMMN N N N N N N N N N N N N N N4t,,8t344 N N%%tT4;3G3K&LMMMN N N N N N N N N N N N N N N N N N 98s$    .A::A>A>4.C//C36C3r   r   Fr   rQ   rv  r   rP   rQ   r   r-  r   s   @r(   ri  ri    sr             1 1 1 1 1	N 	N 	N 	N 	N 	N 	N 	Nr'   ri  c                  P     e Zd ZdZ	 	 	 d!d" fdZd#dZ	 	 	 	 	 d$d%dZd&d Z xZS )'TFWav2Vec2Attentionz6Multi-headed attention from "Attention Is All You Need        FT	embed_dimrN   	num_headsru  rL   
is_decoderr   r  c                r    t                      j        d
i | || _        || _        t          j                            |          | _        ||z  | _        | j        |z  | j        k    rt          d| j         d| d          | j        dz  | _
        || _        t          j                            ||d          | _        t          j                            ||d          | _        t          j                            ||d          | _        t          j                            ||d	          | _        d S )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: r   g      k_proj)r   r   q_projv_projout_projr&   )r   r   r  r  r   r   rs  ru  head_dimrY   scalingr  rq  r  r  r  r  )r   r  r  ru  r  r  r   r   s          r(   r   zTFWav2Vec2Attention.__init__  s1    	""6"""""|++G44!Y.MI%$.883dn 3 3%.3 3 3   }d*$l((T(QQl((T(QQl((T(QQ**9t**UUr'   tensorrQ   seq_lenbszc           	     n    t          j        t          j        |||| j        | j        f          d          S )Nr   r   r   r   )r*   r?   r;   r  r  )r   r  r  r  s       r(   _shapezTFWav2Vec2Attention._shape  s.    |BJvWdndm/\]]_klllr'   Nr   key_value_statesr   past_key_valuetuple[tuple[tf.Tensor]] | Noneattention_masklayer_head_maskrv  bool | NonerP   "tuple[tf.Tensor, tf.Tensor | None]c           
     	   |du}t          |          \  }}	}
|                     |          | j        z  }|r||d         }|d         }n>|rU|                     |                     |          d|          }|                     |                     |          d|          }n||                     |                     |          d|          }|                     |                     |          d|          }t          j        |d         |gd          }t          j        |d         |gd          }nT|                     |                     |          d|          }|                     |                     |          d|          }| j        r||f}|| j	        z  d| j
        f}t          j        |                     ||	|          |          }t          j        ||          }t          j        ||          }t          |          d         }t          j        ||d          }t          j                            t          |          || j	        z  |	|gd	|| j	        z  |	|f d
t          |                      |t          j                            t          |          |d|	|gd|d|	|f d
t          |                      t          j        ||j                  }t          j        ||| j	        |	|f          |z   }t          j        ||| j	        z  |	|f          }t#          |d          }|t          j                            t          |          | j	        gd| j	         d
t          |                      t          j        |d          t          j        ||| j	        |	|f          z  }t          j        ||| j	        z  |	|f          }|                     ||          }t          j        ||          }t          j                            t          |          || j	        z  |	| j
        gd|| j	        |	| j
        f d
t          |                      t          j        t          j        ||| j	        |	| j
        f          d          }t          j        |||	|
f          }|                     |          }t          j        ||| j	        |	|f          }|||fS )z#Input shape: Batch x Time x ChannelNr   r   r8   r   r9   T)transpose_bz$Attention weights should be of size z	, but is rT   z!Attention mask should be of size rW   z/Head mask for a single layer should be of size )r   r8   r   r   ry  z `attn_output` should be of size r  )r   r  r  r  r  r  r*   r@   r  r  r  r;   matmulrZ   assert_equalr\   rX   r   ru  r?   r  )r   r   r  r  r  r  rv  is_cross_attentionr  rp   r  query_states
key_statesvalue_states
proj_shaperu   attn_weights
attn_probsattn_outputs                      r(   r   zTFWav2Vec2Attention.call  s5    .T9",]";";Wi {{=11DL@ 	L."<'*J)!,LL 	LT[[1A%B%BBLLJ;;t{{3C'D'Db#NNLL'T[[%?%?SIIJ;;t{{='A'A2sKKLN1$5z#BKKKJ9nQ&7%FQOOOLL T[[%?%?SIIJ;;t{{='A'A2sKKL? 	8 ),7NDN*B>
z$++lGS"I"I:VVZ
J77
z,
;;Z((+yztLLL
!!|$$4>!7G4/dn8LgW^7_ / /|,,/ /	 	" 	
 	
 	
 %L%%>**a'*5a'8R 5 5">225 5	 &     W^<;MNNNN:lS$.'SZ4[\\_mmL:lS4>5I7T[4\]]L%l<<<&L%%?++ 6t~ 6 6"?336 6	 &    :o}EE
sDNGWEI I L :lS4>5I7T[4\]]L\\,\BB
i
L99
!!{##4>!7DM:.CRVR_3` . .{++. .	 	" 	
 	
 	
 lJ{S$.'4=$QRRT`
 
 jsGY.GHHmmK00"$*\CQXZa;b"c"cL.88r'   c                t   | j         rd S d| _         t          | dd           Xt          j        | j        j                  5  | j                            d d | j        g           d d d            n# 1 swxY w Y   t          | dd           Xt          j        | j        j                  5  | j                            d d | j        g           d d d            n# 1 swxY w Y   t          | dd           Xt          j        | j	        j                  5  | j	                            d d | j        g           d d d            n# 1 swxY w Y   t          | dd           [t          j        | j
        j                  5  | j
                            d d | j        g           d d d            d S # 1 swxY w Y   d S d S )NTr  r  r  r  )r   r(  r*   r)  r  r   r   r  r  r  r  r   s     r(   r   zTFWav2Vec2Attention.buildK  s   : 	F
44((4t{/00 @ @!!4t~">???@ @ @ @ @ @ @ @ @ @ @ @ @ @ @44((4t{/00 @ @!!4t~">???@ @ @ @ @ @ @ @ @ @ @ @ @ @ @44((4t{/00 @ @!!4t~">???@ @ @ @ @ @ @ @ @ @ @ @ @ @ @4T**6t}122 B B##T4$@AAAB B B B B B B B B B B B B B B B B B 76sH    #A//A36A3)#CCC#EEE;#F++F/2F/)r  FT)
r  rN   r  rN   ru  rL   r  r   r  r   )r  rQ   r  rN   r  rN   )NNNNF)r   rQ   r  r   r  r  r  r   r  r   rv  r  rP   r  r   )	r!   r"   r#   r$   r   r  r   r   r   r   s   @r(   r  r    s        @@  V V V V V V V8m m m m .29=+/,0 %t9 t9 t9 t9 t9lB B B B B B B Br'   r  c                  2     e Zd Zd fdZddd
ZddZ xZS )TFWav2Vec2FeedForwardr   r   c                    t                      j        di | t          j                            |j                  | _        t          j                            |j        t          |j
                  dd          | _        t          |j                  | _        t          j                            |j        t          |j
                  dd          | _        t          j                            |j                  | _        || _        d S )Nrb   intermediate_denserm  output_denser&   )r   r   r   r   rs  activation_dropoutintermediate_dropoutrq  intermediate_sizer   rr  r  r   
hidden_actintermediate_act_fnrD  r  hidden_dropoutoutput_dropoutr   rH  s      r(   r   zTFWav2Vec2FeedForward.__init__^  s    ""6"""$)L$8$89R$S$S!"',"4"4*.v/GHH$%	 #5 #
 #
 $5V5F#G#G !L..$.v/GHH$	 / 
 
 $l2263HIIr'   Fr   rQ   rv  r   rP   c                    |                      |          }|                     |          }|                     ||          }|                     |          }|                     ||          }|S rx  )r  r  r  r  r  )r   r   rv  s      r(   r   zTFWav2Vec2FeedForward.callt  sq    //>>00??11-(1SS))-88++MH+MMr'   Nc                   | j         rd S d| _         t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           `t          j        | j	        j                  5  | j	                            d d | j        j
        g           d d d            d S # 1 swxY w Y   d S d S )NTr  r  )r   r(  r*   r)  r  r   r   r   rD  r  r  r   s     r(   r   zTFWav2Vec2FeedForward.build}  s   : 	F
4-t44@t6;<< U U'--tT4;;R.STTTU U U U U U U U U U U U U U U4..:t0566 U U!''tT[5R(STTTU U U U U U U U U U U U U U U U U U ;:s$    (A44A8;A8.(C##C'*C'r|  r}  r~  r   r-  r   s   @r(   r  r  ]  sr             ,    	U 	U 	U 	U 	U 	U 	U 	Ur'   r  c                  8     e Zd Zd fdZ	 	 	 dddZddZ xZS )TFWav2Vec2EncoderLayerr   r   c                    t                      j        d	i | t          |j        |j        |j        dd          | _        t          j        	                    |j
                  | _        t          j                            |j        d          | _        t          |d          | _        t          j                            |j        d          | _        || _        d S 
NF	attention)r  r  ru  r  r   r1  rk  feed_forwardr   final_layer_normr&   r   r   r  rD  num_attention_headsattention_dropoutr  r   r   rs  r  ru  r2  r3  r1  r  r  r  r   rH  s      r(   r   zTFWav2Vec2EncoderLayer.__init__      ""6""",(0,
 
 
 |++F,ABB,99&BW^j9kk1&~NNN % ? ?H]dv ? w wr'   NFr   rQ   r  r   output_attentionsr  rv  r   rP   tuple[tf.Tensor]c                   |}|                      |||          \  }}}|                     ||          }||z   }|                     |          }||                     |          z   }|                     |          }|f}|r||fz  }|S N)r  rv  ry  )r  ru  r1  r  r  	r   r   r  r  rv  attn_residualr  r4   r   s	            r(   r   zTFWav2Vec2EncoderLayer.call  s     &)-.8 *8 *
 *
&|Q ]XFF%566%(9(9-(H(HH--m<< " 	'&Gr'   c                h   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j	        g           d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j
        j                  5  | j
                            d            d d d            n# 1 swxY w Y   t          | dd           `t          j        | j        j                  5  | j                            d d | j        j	        g           d d d            d S # 1 swxY w Y   d S d S NTr  r1  r  r  r   r(  r*   r)  r  r   r   r1  r   rD  r  r  r   s     r(   r   zTFWav2Vec2EncoderLayer.build     : 	F
4d++7t~233 + +$$T***+ + + + + + + + + + + + + + +4t,,8t344 M M%%tT4;3J&KLLLM M M M M M M M M M M M M M M4..:t0566 . .!''---. . . . . . . . . . . . . . .4+T22>t49:: S S%++T49P,QRRRS S S S S S S S S S S S S S S S S S ?>H    A''A+.A+!(CCCD66D:=D:0(F%%F),F)r|  NFF
r   rQ   r  r   r  r  rv  r   rP   r  r   r-  r   s   @r(   r  r    s}             $ ,0).    2S S S S S S S Sr'   r  c                  8     e Zd Zd fdZ	 	 	 dddZddZ xZS )%TFWav2Vec2EncoderLayerStableLayerNormr   r   c                    t                      j        d	i | t          |j        |j        |j        dd          | _        t          j        	                    |j
                  | _        t          j                            |j        d          | _        t          |d          | _        t          j                            |j        d          | _        || _        d S r  r  rH  s      r(   r   z.TFWav2Vec2EncoderLayerStableLayerNorm.__init__  r  r'   NFr   rQ   r  r   r  r  rv  r   rP   r  c                   |}|                      |          }|                     |||          \  }}}|                     ||          }||z   }||                     |                     |                    z   }|f}|r||fz  }|S r  )r1  r  ru  r  r  r  s	            r(   r   z*TFWav2Vec2EncoderLayerStableLayerNorm.call  s     &66)-.8 *8 *
 *
&|Q ]XFF%5%(9(9$:O:OP]:^:^(_(__ " 	'&Gr'   c                h   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j	        g           d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j
        j                  5  | j
                            d            d d d            n# 1 swxY w Y   t          | dd           `t          j        | j        j                  5  | j                            d d | j        j	        g           d d d            d S # 1 swxY w Y   d S d S r  r  r   s     r(   r   z+TFWav2Vec2EncoderLayerStableLayerNorm.build  r  r  r|  r  r  r   r-  r   s   @r(   r  r    s}             $ ,0).    .S S S S S S S Sr'   r  c                  <     e Zd Zd fdZ	 	 	 	 	 dddZddZ xZS )TFWav2Vec2Encoderr   r   c                h    t                      j        di | | _        t          d          | _        t
          j                            j        d          | _	        t
          j        
                    j                  | _        fdt          j                  D             | _        d S )Npos_conv_embedr  r1  rk  c                8    g | ]}t          d |           S zlayers.r  )r  rW  s     r(   r   z.TFWav2Vec2Encoder.__init__.<locals>.<listcomp>  s-    rrrQ,V-A--HHHrrrr'   r&   r   r   r   rB  r  r   r   r2  r3  r1  rs  r  ru  r>   num_hidden_layersrX  rH  s    ` r(   r   zTFWav2Vec2Encoder.__init__  s    ""6"""?M]^^^,99&BW^j9kk|++F,ABBrrrrRWX^XpRqRqrrr


r'   NFTr   rQ   r  r   r  r  output_hidden_statesreturn_dictrv  rP   $TFBaseModelOutput | tuple[tf.Tensor]c                b   |rdnd }|rdnd }|(|t          j        |d          z  }t          |          }nd }|                     |          }	||	z   }|                     |          }|                     ||          }t          | j                  D ]e\  }
}|r||fz   }t          j	        
                    dd          }|r|| j        j        k     r@ |||||          }|d         }|r||d         fz   }f|r||fz   }|st          d |||fD                       S t          |||          S )	Nr&   r8   ry  r   r   r   r  r  rv  c              3     K   | ]}||V  	d S r   r&   r   vs     r(   	<genexpr>z)TFWav2Vec2Encoder.call.<locals>.<genexpr>6  (      mmq_`_l_l_l_l_lmmr'   r   r   r    )r*   r=   rx   r  r1  ru  	enumeraterX  npr-   r.   r   	layerdroptupler   r   r   r  r  r  r  rv  all_hidden_statesall_self_attentionsposition_embeddingsr   layer_moduledropout_probabilitylayer_outputss                 r(   r   zTFWav2Vec2Encoder.call  s    #7@BBD$5?bb4%)BN>2,N,NNM).99NN!N"11-@@%(;;66]XFF(44 	P 	POA|# I$58H$H! #%)"3"3Aq"9"9 04;3HHH(L+-"3!	  M *!,M  P&9]1=M<O&O#   	E 1]4D D 	nmm]4EGZ$[mmmmmm ++*
 
 
 	
r'   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j	        g           d d d            n# 1 swxY w Y   t          | dd           P| j
        D ]J}t          j        |j                  5  |                    d            d d d            n# 1 swxY w Y   Id S d S NTr  r1  rX  r   r(  r*   r)  r  r   r   r1  r   rD  rX  r   r   rX  s      r(   r   zTFWav2Vec2Encoder.build=     : 	F
4)400<t2788 0 0#))$///0 0 0 0 0 0 0 0 0 0 0 0 0 0 04t,,8t344 M M%%tT4;3J&KLLLM M M M M M M M M M M M M M M4$''3 & &]5:.. & &KK%%%& & & & & & & & & & & & & & & 43& &6    A''A+.A+!(CCCD55D9	<D9	r|  NFFTFr   rQ   r  r   r  r  r  r  r  r  rv  r  rP   r  r   r-  r   s   @r(   r  r    s        s s s s s s ,0).,1#' %5
 5
 5
 5
 5
n& & & & & & & &r'   r  c                  <     e Zd Zd fdZ	 	 	 	 	 dddZddZ xZS ) TFWav2Vec2EncoderStableLayerNormr   r   c                h    t                      j        di | | _        t          d          | _        t
          j                            j        d          | _	        t
          j        
                    j                  | _        fdt          j                  D             | _        d S )Nr  r  r1  rk  c                8    g | ]}t          d |           S r  )r  rW  s     r(   r   z=TFWav2Vec2EncoderStableLayerNorm.__init__.<locals>.<listcomp>T  s:     
 
 
RS1&}}}MMM
 
 
r'   r&   r  rH  s    ` r(   r   z)TFWav2Vec2EncoderStableLayerNorm.__init__N  s    ""6"""?M]^^^,99&BW^j9kk|++F,ABB
 
 
 
W\]c]uWvWv
 
 



r'   NFTr   rQ   r  r   r  r  r  r  rv  rP   r  c                b   |rdnd }|rdnd }|(|t          j        |d          z  }t          |          }nd }|                     |          }	||	z   }|                     ||          }t          | j                  D ]e\  }
}|r||fz   }t          j        	                    dd          }|r|| j
        j        k     r@ |||||          }|d         }|r||d         fz   }f|                     |          }|r||fz   }|st          d |||fD                       S t          |||          S )	Nr&   r8   ry  r   r   r  c              3     K   | ]}||V  	d S r   r&   r  s     r(   r  z8TFWav2Vec2EncoderStableLayerNorm.call.<locals>.<genexpr>  r  r'   r  )r*   r=   rx   r  ru  r  rX  r  r-   r.   r   r  r1  r  r   r  s                 r(   r   z%TFWav2Vec2EncoderStableLayerNorm.callX  s    #7@BBD$5?bb4%)BN>2,N,NNM).99NN!N"11-@@%(;;]XFF(44 	P 	POA|# I$58H$H! #%)"3"3Aq"9"9 04;3HHH(L+-"3!	  M *!,M  P&9]1=M<O&O#66 	E 1]4D D 	nmm]4EGZ$[mmmmmm ++*
 
 
 	
r'   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j	        g           d d d            n# 1 swxY w Y   t          | dd           P| j
        D ]J}t          j        |j                  5  |                    d            d d d            n# 1 swxY w Y   Id S d S r  r  r  s      r(   r   z&TFWav2Vec2EncoderStableLayerNorm.build  r  r  r|  r  r  r   r-  r   s   @r(   r  r  M  s|        
 
 
 
 
 
 ,0).,1#' %5
 5
 5
 5
 5
n& & & & & & & &r'   r  c                  j     e Zd ZeZd fdZddZddZdd dZe		 	 	 	 	 	 	 	 	 d!d"d            Z
 xZS )#TFWav2Vec2MainLayerr   r   c                
    t                      j        di | || _        t          |d          | _        t          |d          | _        |j        rt          |d          | _	        d S t          |d          | _	        d S )Nfeature_extractorr  feature_projectionencoderr&   )r   r   r   rQ  r  ri  r  do_stable_layer_normr  r  r  rH  s      r(   r   zTFWav2Vec2MainLayer.__init__  s    ""6"""!9&GZ![![!["=fK_"`"`"`& 	E;FSSSDLLL,V)DDDDLLLr'   Nc                   | j         rd S d| _         | j        j        dk    s| j        j        dk    r)|                     | j        j        fddd          | _        t          | dd           Pt          j	        | j
        j                  5  | j
                            d            d d d            n# 1 swxY w Y   t          | dd           Pt          j	        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           St          j	        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )	NTr  r.   masked_spec_embedrI   r   r  r   r  r  r  )r   r   mask_time_probmask_feature_probr   rD  r  r(  r*   r)  r  r   r   r  r  r   s     r(   r   zTFWav2Vec2MainLayer.build  s:   : 	F
;%++t{/Ls/R/R%)__{.0iSW^q &5 & &D" 4,d33?t5:;; 3 3&,,T2223 3 3 3 3 3 3 3 3 3 3 3 3 3 34-t44@t6;<< 4 4'--d3334 4 4 4 4 4 4 4 4 4 4 4 4 4 44D))5t|011 ) )""4((() ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) 65s6   	B00B47B4*DDDE33E7:E7input_lengthsrQ   c                z    d }t          | j        j        | j        j                  D ]\  }} ||||          }|S )H
        Computes the output length of the convolutional layers
        c                    | |z
  |z  dz   S r   r&   input_lengthr   strides      r(   _conv_out_lengthzNTFWav2Vec2MainLayer._get_feat_extract_output_lengths.<locals>._conv_out_length  s     !;.69A==r'   )zipr   r  r  )r   r  r  r   r  s        r(    _get_feat_extract_output_lengthsz4TFWav2Vec2MainLayer._get_feat_extract_output_lengths  sY    
	> 	> 	>
 $'t{'>@W#X#X 	Q 	QK,,]KPPMMr'   r   mask_time_indicesr   c                V   t          |          \  }}}t          | j        dd          s|S |ot          j        t          j        |ddddt          j        f         t          j                  | j        t          j        t          j        ddf         |          }n| j        j	        dk    rt          ||f| j        j	        | j        j        d          }t          j        t          j        |ddddt          j        f         t          j                  | j        t          j        t          j        ddf         |          }| j        j        dk    rUt          ||f| j        j        | j        j                  }t          j        |ddt          j        ddf         |d          }|S )z
        Masks extracted features along time axis and/or along feature axis according to
        [SpecAugment](https://huggingface.co/papers/1904.08779).
        apply_spec_augmentTNr   r   )rK   rM   rO   )rK   rM   )r   r(  r   r*   wherer\   re   r   r  r	  rn   mask_time_lengthr
  mask_feature_length)r   r   r  rg   rh   rD  mask_feature_indicess          r(   _mask_hidden_statesz'TFWav2Vec2MainLayer._mask_hidden_states  s   
 4>m3L3L0
O[ t{$8$?? 	!  (H)!!!QQQ
*:;RWEE&rz2:qqq'@A MM ['!++ 5_-+4 K8	! ! ! H)!!!QQQ
*:;RWEE&rz2:qqq'@A M ;(1,,#8[)+7 K;$ $ $ 
 H%9!!!RZ:J%K]\]^^Mr'   Fr^  r  token_type_idsposition_ids	head_maskinputs_embedsr  r  r  r  rv  r   r   r   c                N   |                      t          j        |t          j                  |
          }|W|                     t          j        |d                    }t          j        |t          |          d         |j                  }| 	                    ||
          \  }}|
                    d          }|
r|                     ||          }|                     |||||	|
          }|d         }|	s||f|dd          z   S t          |||j        |j        	          S )
Nry  r8   r   )maxlenrX   r  )r  r  r  r  r  rv  r   )r   r   r   r    )r  r*   r\   r]   r  r   sequence_maskr   rX   r  r   r  r  r   r   r    )r   r^  r  r  r  r  r   r  r  r  rv  r   r   output_lengthsr   r  encoder_outputss                    r(   r   zTFWav2Vec2MainLayer.call  s^     11"',
2S2S^f1gg %!BB2=Q_acCdCdeeN-z2B'C'CA'FN^Nd  N +/*A*ABR]e*A*f*f''"JJ':;; 	i 44]Vg4hhM,,)/!5# ' 
 
 (* 	K!#34qrr7JJJ(+-)7&1	
 
 
 	
r'   r|  r   )r  rQ   )r   rQ   r  r   	NNNNNNNNF)r^  rQ   r  r   r  r   r  r   r  r   r   r   r  r  r  r  r  r  rv  r   r   r   )r!   r"   r#   r   config_classr   r   r  r  r   r   r   r   s   @r(   r   r     s        !L	E 	E 	E 	E 	E 	E) ) ) )$   * * * * *X  ,0+/)-&**.)-,0#'1
 1
 1
 1
 ]1
 1
 1
 1
 1
r'   r   c                  n     e Zd ZdZeZdZdZed             Z	ed             Z
 fdZddZ	 dddZ xZS )TFWav2Vec2PreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    wav2vec2r^  c                    t          j        dt           j        d          t          j        dt           j        d          dS )N)NNr^  r  r  r^  r  )r*   
TensorSpecr]   r   s    r(   input_signaturez)TFWav2Vec2PreTrainedModel.input_signature:  s@     M,
XXX mL"*K[\\\
 
 	
r'   c                    t           j                            dt           j                  t          j        dt           j                  dS )N)r   i  )rI   rX   r-  )r*   r-   r.   r]   rc   r   s    r(   dummy_inputsz&TFWav2Vec2PreTrainedModel.dummy_inputsA  sA     I--HBJ-OO gHBJGGG
 
 	
r'   c                     t                      j        |g|R i | t                              d| j        j         d           d S )N
z has backpropagation operations that are NOT supported on CPU. If you wish to train/fine-tune this model, you need a GPU or a TPU)r   r   loggerwarningr   r!   r   r   r   r   r   s       r(   r   z"TFWav2Vec2PreTrainedModel.__init__H  si    3&333F333E( E E E	
 	
 	
 	
 	
r'   Nc                   || j         j        n|}d }t          | j         j        | j         j                  D ]\  }} ||||          }|r3t          | j         j                  D ]} ||d| j         j                  }|S )r  Nc                N    t           j                            | |z
  |          dz   S r   )r*   r+   floordivr  s      r(   r  zTTFWav2Vec2PreTrainedModel._get_feat_extract_output_lengths.<locals>._conv_out_lengthU  s$    7##L;$>GG!KKr'   r   )r   add_adapterr  r  r  r>   num_adapter_layersadapter_stride)r   r  r:  r  r   r  r4   s          r(   r  z:TFWav2Vec2PreTrainedModel._get_feat_extract_output_lengthsO  s     2=1Ddk--+	L 	L 	L $'t{'>@W#X#X 	Q 	QK,,]KPPMM 	_4;9:: _ _ 0 04;C] ^ ^r'   feature_vector_lengthrN   r  rQ   c                   t           j                            |d          d d df         }|                     ||          }t          j        |t           j                  }t          j        |          d         }t          j        ||f|j        d          }t          j	        |t          j
        t          j        |          |dz
  gd          t          j        |g|j                  	          }t          j        |dg          }t          j        |d          }t          j        |dg          }t          j        |t           j                  }|S )
Nr8   r9   )r:  r   r  )rX   r   r   rW   )r5   updates)r*   r+   cumsumr  r\   r_   rI   rb   rX   tensor_scatter_nd_updater   r>   rc   reverser   )r   r=  r  r:  non_padded_lengthsr%  rg   s          r(   "_get_feature_vector_attention_maskz<TFWav2Vec2PreTrainedModel._get_feature_vector_attention_mask`  sE     W^^N^DDQQQUK>>?Q_j>kk::Xn--a0
./~7KRb
 
 
 4Hbhz22NQ4FGaPPPGZL0DEEE
 
 

 N">>>>;;;N">>>99r'   r   )r=  rN   r  rQ   )r!   r"   r#   r$   r   r(  base_model_prefixmain_input_namepropertyr/  r1  r   r  rD  r   r   s   @r(   r*  r*  0  s         
 "L"$O
 
 X
 
 
 X

 
 
 
 
   $ RV        r'   r*  a	  

    This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
    etc.)

    This model is also a [keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it
    as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to general usage and
    behavior.

    <Tip>

    TensorFlow models and layers in `transformers` accept two formats as input:

    - having all inputs as keyword arguments (like PyTorch models), or
    - having all inputs as a list, tuple or dict in the first positional argument.

    The reason the second format is supported is that Keras methods prefer this format when passing inputs to models
    and layers. Because of this support, when using methods like `model.fit()` things should "just work" for you - just
    pass your inputs and labels in any format that `model.fit()` supports! If, however, you want to use the second
    format outside of Keras methods like `fit()` and `predict()`, such as when creating your own layers or models with
    the Keras `Functional` API, there are three possibilities you can use to gather all the input Tensors in the first
    positional argument:

    - a single Tensor with `input_values` only and nothing else: `model(input_values)`
    - a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
    `model([input_values, attention_mask])` or `model([input_values, attention_mask, token_type_ids])`
    - a dictionary with one or several input Tensors associated to the input names given in the docstring:
    `model({"input_values": input_values, "token_type_ids": token_type_ids})`

    Note that when creating models and layers with
    [subclassing](https://keras.io/guides/making_new_layers_and_models_via_subclassing/) then you don't need to worry
    about any of this, as you can just pass inputs like you would to any other Python function!

    </Tip>

    Args:
        config ([`Wav2Vec2Config`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a	  
    Args:
        input_values (`np.ndarray`, `tf.Tensor`, `list[tf.Tensor]` `dict[str, tf.Tensor]` or `dict[str, np.ndarray]` and each example must have the shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.__call__`] and
            [`PreTrainedTokenizer.encode`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`np.ndarray` or `tf.Tensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        token_type_ids (`np.ndarray` or `tf.Tensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`np.ndarray` or `tf.Tensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`np.ndarray` or `tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`np.ndarray` or `tf.Tensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_values` you can choose to directly pass an embedded representation.
            This is useful if you want more control over how to convert `input_values` indices into associated vectors
            than the model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the
            config will be used instead.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail. This argument can be used only in eager mode, in graph mode the value in the config will be
            used instead.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in
            eager mode, in graph mode the value will always be set to True.
        training (`bool`, *optional*, defaults to `False``):
            Whether or not to use the model in training mode (some modules like dropout modules have different
            behaviors between training and evaluation).
zdThe bare TFWav2Vec2 Model transformer outputting raw hidden-states without any specific head on top.c                       e Zd Zd fdZ ee           eee          e		 	 	 	 	 	 	 	 	 ddd                                    Z
ddZ xZS )TFWav2Vec2Modelr   r   c                |     t                      j        |g|R i | || _        t          |d          | _        d S )Nr+  r  )r   r   r   r   r+  r6  s       r(   r   zTFWav2Vec2Model.__init__  sI    3&333F333+FDDDr'   output_typer(  NFr^  rQ   r  r   r  r  r  r   r  r  r  r  rv  r   rP   r  c                    |r|n| j         j        }|r|n| j         j        }|	r|	n| j         j        }	|                     |||||||||	|

  
        }|S )a  

        Returns:

        Example:

        ```python
        >>> from transformers import AutoProcessor, TFWav2Vec2Model
        >>> from datasets import load_dataset

        >>> processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
        >>> model = TFWav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h")


        >>> def map_to_array(example):
        ...     example["speech"] = example["audio"]["array"]
        ...     return example


        >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
        >>> ds = ds.map(map_to_array)

        >>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values  # Batch size 1
        >>> hidden_states = model(input_values).last_hidden_state
        ```
r^  r  r  r  r  r   r  r  r  rv  )r   r  r  r  r+  )r   r^  r  r  r  r  r   r  r  r  rv  r   s               r(   r   zTFWav2Vec2Model.call  s    T 8Lq33QUQ\Qq1Be--He%0Mkkdk6M--%))%'/!5#   
 
 r'   c                    | j         rd S d| _         t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTr+  )r   r(  r*   r)  r+  r   r   r   s     r(   r   zTFWav2Vec2Model.build!  s    : 	F
4T**6t}122 * *##D)))* * * * * * * * * * * * * * * * * * 76s    A((A,/A,r|  r'  )r^  rQ   r  r   r  r   r  r   r  r   r   r   r  r  r  r  r  r  rv  r   rP   r  r   )r!   r"   r#   r   r   WAV2VEC2_INPUTS_DOCSTRINGr   r   _CONFIG_FOR_DOCr   r   r   r   r   s   @r(   rI  rI    s        
E E E E E E
 +*+DEE+<?[[[ ,0+/)-&**.)-,0#'8 8 8 8 ] \[ FE8t* * * * * * * *r'   rI  zhTFWav2Vec2 Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).c                       e Zd Zd fdZd Zd Ze ee           e	e
e          	 	 	 	 	 	 	 	 	 	 ddd                                    ZddZ xZS )TFWav2Vec2ForCTCr   r   c                j    t                      j        |g|R i | t          |d          | _        t          j                            |j                  | _        t          j        	                    |j
        d          | _        t          |d          r|j        r|j        n|j        | _        d S )Nr+  r  lm_headr:  )r   r   r   r+  r   r   rs  final_dropoutru  rq  
vocab_sizerU  hasattrr:  output_hidden_sizerD  r6  s       r(   r   zTFWav2Vec2ForCTC.__init__/  s    3&333F333+FDDD|++F,@AA|))&*;))LL)0)G)GvFL^vF%%djdv 	r'   c                b    t          j        dt                     |                                  dS z
        Calling this function will disable the gradient computation for the feature encoder so that its parameters will
        not be updated during training.
        zThe method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5. Please use the equivalent `freeze_feature_encoder` method instead.Nrd  re  rg  freeze_feature_encoderr   s    r(   freeze_feature_extractorz)TFWav2Vec2ForCTC.freeze_feature_extractor9  ;    
 	Q	
 	
 	

 	##%%%%%r'   c                (    d| j         j        _        dS z
        Calling this function will disable the gradient computation for the feature encoder so that its parameter will
        not be updated during training.
        FNr+  r  r  r   s    r(   r]  z'TFWav2Vec2ForCTC.freeze_feature_encoderE      
 5:'111r'   rK  NFr^  rQ   r  r   r  r  r  r   r  r  labelsr  r  rv  rP   #TFCausalLMOutput | tuple[tf.Tensor]c                   |>t          j        |          | j        j        k    rt	          d| j        j                   |                     ||||||||	|
|
  
        }|d         }|                     ||          }|                     |          }|||nt          j        |t           j	                  }| j        
                    t          j        |d                    }t          j        |dk    t           j                  }t          j        |d          }t           j                            ||||| j        j        d	
          }| j        j        dk    rt          j        |          }| j        j        dk    rt          j        |          }t          j        |d          }nd}|
s|f|t(          d         z   }||f|z   n|S t+          |||j        |j                  S )a  
        labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_values` docstring) Tokens with indices set to `-100` are ignored (masked),
            the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`

        Returns:

        Example:

        ```python
        >>> import tensorflow as tf
        >>> from transformers import AutoProcessor, TFWav2Vec2ForCTC
        >>> from datasets import load_dataset
        >>> from torchcodec.decoders import AudioDecoder

        >>> processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
        >>> model = TFWav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")


        >>> def map_to_array(example):
        ...     example["speech"] = example["audio"]["array"]
        ...     return example


        >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
        >>> ds = ds.map(map_to_array)

        >>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values  # Batch size 1
        >>> logits = model(input_values).logits
        >>> predicted_ids = tf.argmax(logits, axis=-1)

        >>> transcription = processor.decode(predicted_ids[0])

        >>> # compute loss
        >>> target_transcription = "A MAN SAID TO THE UNIVERSE SIR I EXIST"

        >>> # Pass transcription as `text` to encode labels
        >>> labels = processor(text=transcription, return_tensors="tf").input_ids

        >>> loss = model(input_values, labels=labels).loss
        ```Nz$Label values must be <= vocab_size: rN  r   ry  rW   r8   r9   F)logitsrd  logit_lengthlabel_lengthblank_indexlogits_time_majorsumr   rV   lossrg  r   r    )r*   
reduce_maxr   rW  rY   r+  ru  rU  rf   r]   r  r   r\   r_   r/   ctc_losspad_token_idctc_loss_reductionreduce_meanr;   _HIDDEN_STATES_START_POSITIONr	   r   r    )r   r^  r  r  r  r  r   r  rd  r  r  rv  r   r   rg  r  labels_masktarget_lengthsrn  r  s                       r(   r   zTFWav2Vec2ForCTC.callL  s   v "-"7"74;;Q"Q"Q\DKDZ\\]]]--%))%'/!5#   
 
  
]XFFm,,"0"<",|cecmBnBnBn  !MJJ2=YgnpKqKqKqrrM '&A+rx88K];R@@@N5>>*+ K4"' "  D {-66}T**{-77~d++:dD))DDD 	FY)F)G)G!HHF)-)9TGf$$vE!/)	
 
 
 	
r'   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           [t          j        | j        j                  5  | j                            d d | j        g           d d d            d S # 1 swxY w Y   d S d S )NTr+  rU  )	r   r(  r*   r)  r+  r   r   rU  rY  r   s     r(   r   zTFWav2Vec2ForCTC.build  sq   : 	F
4T**6t}122 * *##D)))* * * * * * * * * * * * * * *4D))5t|011 J J""D$0G#HIIIJ J J J J J J J J J J J J J J J J J 65s$    A''A+.A+!#CCCr|  )
NNNNNNNNNF)r^  rQ   r  r   r  r   r  r   r  r   r   r   r  r  rd  r   r  r  r  r  rv  r  rP   re  r   )r!   r"   r#   r   r^  r]  r   r   rP  r   r	   rQ  r   r   r   r   s   @r(   rS  rS  *  s        

 
 
 
 
 

& 
& 
&: : : **+DEE+;/ZZZ ,0+/)-&**.)-#',0#' %r
 r
 r
 r
 [Z FE ]r
h	J 	J 	J 	J 	J 	J 	J 	Jr'   rS  c                  ^     e Zd Z fdZd Zd Zd Ze	 	 	 	 	 	 ddd            ZddZ	 xZ
S )#TFWav2Vec2ForSequenceClassificationc                "   t                                          |           t          |d          | _        |j        dz   | _        t          j        |                                           5  |j	        r$| 
                    | j        fddd          | _        d d d            n# 1 swxY w Y   || _        t          j                            |j        d	          | _        t          j                            |j        d d
          | _        d S )Nr+  r  r   rc   Tlayer_weightsr  	projector)rn  r   
classifier)rn  r"  r   )r   r   r   r+  r  
num_layersr*   r)  _name_scopeuse_weighted_layer_sumr   r{  r   r   r   rq  classifier_proj_sizer|  
num_labelsr}  )r   r   r   s     r(   r   z,TFWav2Vec2ForSequenceClassification.__init__  s4      +FDDD 2Q6]4++--.. 	 	, %)__?,&DWf &5 & &"	 	 	 	 	 	 	 	 	 	 	 	 	 	 	
 ++&2MT_+``,,,63DQU\h,iis   .,B&&B*-B*c                b    t          j        dt                     |                                  dS r[  r\  r   s    r(   r^  z<TFWav2Vec2ForSequenceClassification.freeze_feature_extractor  r_  r'   c                (    d| j         j        _        dS ra  rb  r   s    r(   r]  z:TFWav2Vec2ForSequenceClassification.freeze_feature_encoder  rc  r'   c                2    | j         j        D ]	}d|_        
dS )z
        Calling this function will disable the gradient computation for the base model so that its parameters will not
        be updated during training. Only the classification head will be updated.
        FN)r+  r   r  )r   rX  s     r(   freeze_base_modelz5TFWav2Vec2ForSequenceClassification.freeze_base_model  s+    
 ]) 	$ 	$E#EOO	$ 	$r'   NFr^  rQ   r  r   r  r  r  r  rd  rv  r   rP   -TFSequenceClassifierOutput | tuple[tf.Tensor]c           	        ||n| j         j        }| j         j        rdn|}|                     ||||||          }| j         j        rx|t                   }	t          j        |	d          }	t
          j                            | j	        d          }
t          j
        |	t          j        |
g d          z  d          }	n|d         }	|                     |	          }	|t          j        |	d          }n|                     t          |	          d         |          }t          j        ||	j                  }t          j        |	t          j        |d                    }	t          j        t          j
        |	d          t          j        t          j
        |d          d                    }|                     |          }d }|_t,          j                            d          } |t          j        |dg          t          j        |d| j         j        g                    }|s|f|t          d          z   }||f|z   n|S t5          |||j        |j        	          S )
NTr#  r   r9   r8   )r8   r   r   r   )from_logitsrm  )r   use_return_dictr  r+  rt  r*   r   r/   softmaxr{  r   r;   r|  rs  rD  r   r\   rX   multiplyr=   divider}  r   lossesSparseCategoricalCrossentropyr  r
   r   r    )r   r^  r  r  r  r  rd  rv  r   r   norm_weightspooled_outputpadding_maskpadding_mask_floatrg  rn  loss_fnr  s                     r(   r   z(TFWav2Vec2ForSequenceClassification.call  sp    &1%<kk$+B]'+{'IcttOc--)/!5#   
 
 ;- 	'#$ABMH];;;M5==);"=EELM-"*\S]S]S]:^:^*^efgggMM#AJM}55!N=qAAAMMBB:mC\C\]^C_aoppL!#}7J!K!KKr~FX_a7b7b7bccMIm!444bnR]SelmEnEnEnuv6w6w6w M //l@@T@RRG72:frd33RZT[McHd5e5effD 	FY)F)G)G!HHF)-)9TGf$$vE)!/)	
 
 
 	
r'   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j	        g           d d d            n# 1 swxY w Y   t          | dd           `t          j        | j
        j                  5  | j
                            d d | j        j        g           d d d            d S # 1 swxY w Y   d S d S )NTr+  r|  r}  )r   r(  r*   r)  r+  r   r   r|  r   rD  r}  r  r   s     r(   r   z)TFWav2Vec2ForSequenceClassification.build0  s"   : 	F
4T**6t}122 * *##D)))* * * * * * * * * * * * * * *4d++7t~233 L L$$dD$+2I%JKKKL L L L L L L L L L L L L L L4t,,8t344 V V%%tT4;3S&TUUUV V V V V V V V V V V V V V V V V V 98s6    A''A+.A+!(CCC(EEE)NNNNNF)r^  rQ   r  r   r  r  r  r  r  r  rd  r   rv  r   rP   r  r   )r!   r"   r#   r   r^  r]  r  r   r   r   r   r   s   @r(   ry  ry    s        j j j j j
& 
& 
&: : :$ $ $  ,0)-,0#'#'5
 5
 5
 5
 ]5
nV V V V V V V Vr'   ry  )rS  rI  r*  ry  r*  )
rI   rJ   rK   rL   rM   rN   rO   rN   rP   rQ   r   )ro   rQ   rp   rq   )Jr$   
__future__r   rd  dataclassesr   typingr   numpyr  
tensorflowr*   activations_tfr   modeling_tf_outputsr   r	   r
   modeling_tf_utilsr   r   r   r   r   tf_utilsr   r   utilsr   r   r   r   r   configuration_wav2vec2r   
get_loggerr!   r4  rt  _CHECKPOINT_FOR_DOCrQ  rt   r   r6   rH   rn   rx   r   Layerrz   r  r   r  r/  r=  rB  rG  rQ  rb  ri  r  r  r  r  r  r  r   r*  WAV2VEC2_START_DOCSTRINGrP  rI  rS  ry  __all__r&   r'   r(   <module>r     s   !   " " " " " "  ! ! ! ! ! !               / / / / / / b b b b b b b b b b              3 2 2 2 2 2 2 2              3 2 2 2 2 2 
	H	%	% !" 3 "  / / / / / / / /8  O O O& 	G G G G GV
6 
6 
6 
6 
6U U U U U%,, U U Up5 5 5 5 5!4 5 5 5p@ @ @ @ @U\%7 @ @ @:G G G G G5<#5 G G GD!G !G !G !G !G5<#5 !G !G !GHG G G G G(: G G G:    U\/   !+ !+ !+ !+ !+u|1 !+ !+ !+H
 
 
 
 
!9 
 
 
N N N N N%,"4 N N NBgB gB gB gB gB%,, gB gB gBT)U )U )U )U )UEL. )U )U )UX8S 8S 8S 8S 8SU\/ 8S 8S 8Sv6S 6S 6S 6S 6SEL,> 6S 6S 6SrM& M& M& M& M&* M& M& M&`O& O& O& O& O&u|'9 O& O& O&d M
 M
 M
 M
 M
%,, M
 M
 M
`E E E E E 1 E E EP( T5 p j I* I* I* I* I*/ I* I*	 I*X r ^J ^J ^J ^J ^J0 ^J ^J	 ^JBmV mV mV mV mV*C mV mV mV` v
u
ur'   