
    Pi3                     (   d Z ddlZddlZddlmZ ddlmZ ddlm	Z	 ddlm
Z
 ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZmZ ddlmZmZ g dZdddddej        dededededej        fdZddddddddddej        ddej        dededee         d ee         d!ed"e d#eded$ed%ee         d&ededej        fd'Z!d(dd)d*dd+d,ej        d-ed.ed/ee"         d0ed1edej        fd2Z#d(dd)d*dd+d,ej        d-ed.ed/ee"         d0ed1ededej        fd3Z$dS )4zFeature inversion    N   )
get_fftlib)ParameterError)
griffinlim)db_to_power)tiny)filters)nnls	expand_to)	DTypeLike)AnyOptional)_WindowSpec_PadModeSTFT)mel_to_stftmel_to_audiomfcc_to_melmfcc_to_audioi"V  i   g       @srn_fftpowerMr   r   r   kwargsreturnc                    t          j        d||| j        d         | j        d|}t	          ||           }t          j        |d|z  |          S )a  Approximate STFT magnitude from a Mel power spectrogram.

    Parameters
    ----------
    M : np.ndarray [shape=(..., n_mels, n), non-negative]
        The spectrogram as produced by `feature.melspectrogram`
    sr : number > 0 [scalar]
        sampling rate of the underlying signal
    n_fft : int > 0 [scalar]
        number of FFT components in the resulting STFT
    power : float > 0 [scalar]
        Exponent for the magnitude melspectrogram
    **kwargs : additional keyword arguments for Mel filter bank parameters
    fmin : float >= 0 [scalar]
        lowest frequency (in Hz)
    fmax : float >= 0 [scalar]
        highest frequency (in Hz).
        If `None`, use ``fmax = sr / 2.0``
    htk : bool [scalar]
        use HTK formula instead of Slaney
    norm : {None, 'slaney', or number} [scalar]
        If 'slaney', divide the triangular mel weights by the width of
        the mel band (area normalization).
        If numeric, use `librosa.util.normalize` to normalize each filter
        by to unit l_p norm. See `librosa.util.normalize` for a full
        description of supported norm values (including `+-np.inf`).
        Otherwise, leave all the triangles aiming for a peak value of 1.0
    dtype : np.dtype
        The data type of the output basis.
        By default, uses 32-bit (single-precision) floating point.

    Returns
    -------
    S : np.ndarray [shape=(..., n_fft, t), non-negative]
        An approximate linear magnitude spectrogram

    See Also
    --------
    librosa.feature.melspectrogram
    librosa.stft
    librosa.filters.mel
    librosa.util.nnls

    Examples
    --------
    >>> y, sr = librosa.load(librosa.ex('trumpet'))
    >>> S = librosa.util.abs2(librosa.stft(y))
    >>> mel_spec = librosa.feature.melspectrogram(S=S, sr=sr)
    >>> S_inv = librosa.feature.inverse.mel_to_stft(mel_spec, sr=sr)

    Compare the results visually

    >>> import matplotlib.pyplot as plt
    >>> fig, ax = plt.subplots(nrows=3, sharex=True, sharey=True)
    >>> img = librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max, top_db=None),
    ...                          y_axis='log', x_axis='time', ax=ax[0])
    >>> ax[0].set(title='Original STFT')
    >>> ax[0].label_outer()
    >>> librosa.display.specshow(librosa.amplitude_to_db(S_inv, ref=np.max, top_db=None),
    ...                          y_axis='log', x_axis='time', ax=ax[1])
    >>> ax[1].set(title='Reconstructed STFT')
    >>> ax[1].label_outer()
    >>> librosa.display.specshow(librosa.amplitude_to_db(np.abs(S_inv - S),
    ...                                                  ref=S.max(), top_db=None),
    ...                          vmax=0, y_axis='log', x_axis='time', cmap='magma', ax=ax[2])
    >>> ax[2].set(title='Residual error (dB)')
    >>> fig.colorbar(img, ax=ax, format="%+2.f dB")
    )r   r   n_melsdtype      ?)out )r	   melshaper   r
   npr   )r   r   r   r   r   	mel_basisinverses          k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/librosa/feature/inverse.pyr   r      se    Z  U172;ag AG I 9a  G8GS5[g6666    hannTconstant    )r   r   
hop_length
win_lengthwindowcenterpad_moder   n_iterlengthr   r-   r.   r/   r0   r1   r2   r3   r   c                V    t          | f|||d|}t          ||	|||||||
|
  
        S )a&
  Invert a mel power spectrogram to audio using Griffin-Lim.

    This is primarily a convenience wrapper for:

        >>> S = librosa.feature.inverse.mel_to_stft(M)
        >>> y = librosa.griffinlim(S)

    Parameters
    ----------
    M : np.ndarray [shape=(..., n_mels, n), non-negative]
        The spectrogram as produced by `feature.melspectrogram`
    sr : number > 0 [scalar]
        sampling rate of the underlying signal
    n_fft : int > 0 [scalar]
        number of FFT components in the resulting STFT
    hop_length : None or int > 0
        The hop length of the STFT.  If not provided, it will default to ``n_fft // 4``
    win_length : None or int > 0
        The window length of the STFT.  By default, it will equal ``n_fft``
    window : string, tuple, number, function, or np.ndarray [shape=(n_fft,)]
        A window specification as supported by `stft` or `istft`
    center : boolean
        If `True`, the STFT is assumed to use centered frames.
        If `False`, the STFT is assumed to use left-aligned frames.
    pad_mode : string
        If ``center=True``, the padding mode to use at the edges of the signal.
        By default, STFT uses zero padding.
    power : float > 0 [scalar]
        Exponent for the magnitude melspectrogram
    n_iter : int > 0
        The number of iterations for Griffin-Lim
    length : None or int > 0
        If provided, the output ``y`` is zero-padded or clipped to exactly ``length``
        samples.
    dtype : np.dtype
        Real numeric type for the time-domain signal.  Default is 32-bit float.
    **kwargs : additional keyword arguments for Mel filter bank parameters
    fmin : float >= 0 [scalar]
        lowest frequency (in Hz)
    fmax : float >= 0 [scalar]
        highest frequency (in Hz).
        If `None`, use ``fmax = sr / 2.0``
    htk : bool [scalar]
        use HTK formula instead of Slaney
    norm : {None, 'slaney', or number} [scalar]
        If 'slaney', divide the triangular mel weights by the width of
        the mel band (area normalization).
        If numeric, use `librosa.util.normalize` to normalize each filter
        by to unit l_p norm. See `librosa.util.normalize` for a full
        description of supported norm values (including `+-np.inf`).
        Otherwise, leave all the triangles aiming for a peak value of 1.0

    Returns
    -------
    y : np.ndarray [shape(..., n,)]
        time-domain signal reconstructed from ``M``

    See Also
    --------
    librosa.griffinlim
    librosa.feature.melspectrogram
    librosa.filters.mel
    librosa.feature.inverse.mel_to_stft
    r   )	r2   r-   r.   r   r/   r0   r   r3   r1   )r   r   )r   r   r   r-   r.   r/   r0   r1   r   r2   r3   r   r   stfts                 r(   r   r   n   sX    ` qDRuEDDVDDD   r)      orthor    r   dct_typenormrefliftermfccr   r9   r:   r;   r<   c                ~   |dk    r| j         d         }t          j        dd|z   | j                  }t	          || j        d          }d|dz  t          j        t          j        |z  |z            z  z   }t          j        t          j	        |          t          j
        |j                  j        k               rt          j        dt          d	           | |t          |           z   z  } n|dk    rt!          d
          t#                      }	|	                    | d|||          }
t'          |
|          }|S )aW  Invert Mel-frequency cepstral coefficients to approximate a Mel power
    spectrogram.

    This inversion proceeds in two steps:

        1. The inverse DCT is applied to the MFCCs
        2. `librosa.db_to_power` is applied to map the dB-scaled result to a power spectrogram

    Parameters
    ----------
    mfcc : np.ndarray [shape=(..., n_mfcc, n)]
        The Mel-frequency cepstral coefficients
    n_mels : int > 0
        The number of Mel frequencies
    dct_type : {1, 2, 3}
        Discrete cosine transform (DCT) type
        By default, DCT type-2 is used.
    norm : None or 'ortho'
        If ``dct_type`` is `2 or 3`, setting ``norm='ortho'`` uses an orthonormal
        DCT basis.
        Normalization is not supported for `dct_type=1`.
    ref : float
        Reference power for (inverse) decibel calculation
    lifter : number >= 0
        If ``lifter>0``, apply inverse liftering (inverse cepstral filtering)::
            M[n, :] <- M[n, :] / (1 + sin(pi * (n + 1) / lifter) * lifter / 2)

    Returns
    -------
    M : np.ndarray [shape=(..., n_mels, n)]
        An approximate Mel power spectrum recovered from ``mfcc``

    Warns
    -----
    UserWarning
        due to critical values in lifter array that invokes underflow.

    See Also
    --------
    librosa.feature.mfcc
    librosa.feature.melspectrogram
    scipy.fft.dct
    r   r      )r   )ndimaxesg      ?z@lifter array includes critical values that may invoke underflow.r   )messagecategory
stacklevelz1MFCC to mel lifter must be a non-negative number.)axistyper:   n)r;   )r$   r%   aranger   r   r@   sinpianyabsfinfoepswarningswarnUserWarningr   r   r   idctr   )r=   r   r9   r:   r;   r<   n_mfccidxlifter_sinefftlogmelmelspecs               r(   r   r      s4   h zzBi1v:TZ888$)"555&3,f0D)E)EEE 6"&%%1B(C(C(GGHH 	MZ$    {T$ZZ/0	1PQQQ
,,CXXd(XHHF%f#666GNr)   c                F    t          | |||||          }t          |fi |S )a  Convert Mel-frequency cepstral coefficients to a time-domain audio signal

    This function is primarily a convenience wrapper for the following steps:

        1. Convert mfcc to Mel power spectrum (`mfcc_to_mel`)
        2. Convert Mel power spectrum to time-domain audio (`mel_to_audio`)

    Parameters
    ----------
    mfcc : np.ndarray [shape=(..., n_mfcc, n)]
        The Mel-frequency cepstral coefficients
    n_mels : int > 0
        The number of Mel frequencies
    dct_type : {1, 2, 3}
        Discrete cosine transform (DCT) type
        By default, DCT type-2 is used.
    norm : None or 'ortho'
        If ``dct_type`` is `2 or 3`, setting ``norm='ortho'`` uses an orthonormal
        DCT basis.
        Normalization is not supported for ``dct_type=1``.
    ref : float
        Reference power for (inverse) decibel calculation
    lifter : number >= 0
        If ``lifter>0``, apply inverse liftering (inverse cepstral filtering)::
            M[n, :] <- M[n, :] / (1 + sin(pi * (n + 1) / lifter)) * lifter / 2
    **kwargs : additional keyword arguments to pass through to `mel_to_audio`
    M : np.ndarray [shape=(..., n_mels, n), non-negative]
        The spectrogram as produced by `feature.melspectrogram`
    sr : number > 0 [scalar]
        sampling rate of the underlying signal
    n_fft : int > 0 [scalar]
        number of FFT components in the resulting STFT
    hop_length : None or int > 0
        The hop length of the STFT.  If not provided, it will default to ``n_fft // 4``
    win_length : None or int > 0
        The window length of the STFT.  By default, it will equal ``n_fft``
    window : string, tuple, number, function, or np.ndarray [shape=(n_fft,)]
        A window specification as supported by `stft` or `istft`
    center : boolean
        If `True`, the STFT is assumed to use centered frames.
        If `False`, the STFT is assumed to use left-aligned frames.
    pad_mode : string
        If ``center=True``, the padding mode to use at the edges of the signal.
        By default, STFT uses zero padding.
    power : float > 0 [scalar]
        Exponent for the magnitude melspectrogram
    n_iter : int > 0
        The number of iterations for Griffin-Lim
    length : None or int > 0
        If provided, the output ``y`` is zero-padded or clipped to exactly ``length``
        samples.
    dtype : np.dtype
        Real numeric type for the time-domain signal.  Default is 32-bit float.
    **kwargs : additional keyword arguments for Mel filter bank parameters
    fmin : float >= 0 [scalar]
        lowest frequency (in Hz)
    fmax : float >= 0 [scalar]
        highest frequency (in Hz).
        If `None`, use ``fmax = sr / 2.0``
    htk : bool [scalar]
        use HTK formula instead of Slaney

    Returns
    -------
    y : np.ndarray [shape=(..., n)]
        A time-domain signal reconstructed from `mfcc`

    See Also
    --------
    mfcc_to_mel
    mel_to_audio
    librosa.feature.mfcc
    librosa.griffinlim
    scipy.fft.dct
    r8   )r   r   )r=   r   r9   r:   r;   r<   r   mel_specs           r(   r   r     s>    j VhTs6  H ++F+++r)   )%__doc__rO   numpyr%   core.fftr   util.exceptionsr   core.spectrumr   r   
util.utilsr    r	   utilr
   r   numpy.typingr   typingr   r   _typingr   r   __all__ndarrayfloatintr   float32boolr   strr   r   r"   r)   r(   <module>rm      s          ! ! ! ! ! ! , , , , , , & & & & & & ' ' ' ' ' '             " " " " " " " " " " " " " "                 / / / / / / / /
I
I
I U7 U7 U7	zU7 	U7 	U7
 U7 U7 ZU7 U7 U7 U7v  $ $ ' z] ] ]	z] 	] 	]
 ] ] ] ] ] ] ] SM] ] ] Z] ] ] ]F !K K K
*K K 	K
 3-K 
K K ZK K K Kb !Y, Y, Y,
*Y, Y, 	Y,
 3-Y, 
Y, Y, Y, ZY, Y, Y, Y, Y, Y,r)   