
    *`i/                     n   d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlZd dlZd dlmZmZmZmZ erd dlmZ  ej        e          Z e            r*d dlZ ej                    Z edd	 eD                       Zn edd
d
i          Z e            rd dlZd ej                                        D             Z  G d d          Z!de"ej#        z  de"ej#        z  fdZ$dej#        dej#        fdZ%dej#        dej#        dej#        fdZ&ede'de'de"de"de'dej#        fd            Z(dS )    N)Enum)cache)Path)TYPE_CHECKING)assert_soundfile_installedassert_soxr_installedis_soundfile_installedis_soxr_installed)RawAudioAudioFormatc                     i | ]}||S  r   ).0format_names     h/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/mistral_common/audio.py
<dictcomp>r       s    &e&e&eK{K&e&e&e    nonec                 @    g | ]}|j                                         S r   )valuelower)r   vs     r   
<listcomp>r   '   s"    TTTa!'--//TTTr   c                   6   e Zd Zdej        dededdfdZdefdZddZ	e
defd	            Zeddededd fd            Zeddededd fd            Zeddededd fd            Zeddededd fd            ZddededefdZedd            ZdeddfdZdS ) Audioaudio_arraysampling_rateformatreturnNc                 X    || _         || _        || _        |                                  dS )a  Initialize an Audio instance with audio data, sampling rate, and format.

        Args:
            audio_array: The audio data as a numpy array.
            sampling_rate: The sampling rate of the audio in Hz.
            format: The format of the audio file.
        N)r   r   r   _check_valid)selfr   r   r   s       r   __init__zAudio.__init__+   s3     '*r   c                 l    d| j          dt          | j                  | j         z  dd| j        j         S )NzAudio - sampling_rate=z Hz, duration=z.2fz	s, shape=)r   lenr   shaper"   s    r   __repr__zAudio.__repr__8   sQ    .T%7 . .D,--0BBK. .%+. .	
r   c                 @   t          | j        t          j                  s!J t	          t          j                              | j        j        dk    sJ d| j        j                    t                       | j        t          v sJ d| j        dt                      d S )N   zself.audio_array.ndim=zself.format= not in EXPECTED_FORMAT_VALUES=)	
isinstancer   npndarraytypendimr   r   EXPECTED_FORMAT_VALUESr'   s    r   r!   zAudio._check_valid?   s    $*BJ77IIbj9I9III7$)))+Fd.>.C+F+F)))"$$${44446h6h6hOe6h6h44444r   c                 :    | j         j        d         | j        z  }|S )z{Calculate the duration of the audio in seconds.

        Returns:
           The duration of the audio in seconds.
        r   )r   r&   r   )r"   durations     r   r3   zAudio.durationE   s!     *03d6HHr   Turlstrictc                 ,   	 t          j        |           }|                                 t                              |j        |          S # t           j        $ r}t          d|            |d}~wt          $ r}t          d|  d          |d}~ww xY w)zCreate an Audio instance from a URL.

        Args:
            url: The URL of the audio file.
            strict: Whether to strictly enforce mono audio.

        Returns:
            An instance of the Audio class.
        r5   z#Failed to download audio from URL: Nz*Failed to create Audio instance from URL: z .)	requestsgetraise_for_statusr   
from_bytescontentRequestException
ValueError	Exception)r4   r5   responsees       r   from_urlzAudio.from_urlP   s    	Z|C((H%%'''##H$4V#DDD( 	Q 	Q 	QH3HHIIqP 	Z 	Z 	ZQ#QQQRRXYY	Zs$   AA BA--B:BBaudio_base64c                 &   t                       t          j        d|           r|                     d          d         } 	 t	          j        |           }n"# t          $ r}t          d          |d}~ww xY wt          	                    ||          S )a  Create an Audio instance from a base64 encoded string.

        Args:
            audio_base64: The base64 encoded audio data.
            strict: Whether to strictly enforce mono audio. Defaults to True.

        Returns:
            An instance of the Audio class.
        z^data:audio/\w+;base64,,r*   zHbase64 decoding failed. Please check the input string is a valid base64.Nr7   )
r   rematchsplitbase64	b64decoder?   r>   r   r;   )rC   r5   audio_bytesrA   s       r   from_base64zAudio.from_base64d   s     	#$$$8.== 	6'--c2215L	p *<88KK 	p 	p 	pghhnoo	p F;;;s    A 
A4A//A4filec                    t                       t          | t                    r|                     d          r
| dd         } t	          |                                           st          d| d          t          | d          5 }|                                }ddd           n# 1 swxY w Y   t          
                    ||          S )zCreate an Audio instance from an audio file.

        Args:
            file: Path to the audio file.
            strict: Whether to strictly enforce mono audio. Defaults to True.

        Returns:
            An instance of the Audio class.
        zfile://   Nzfile=z does not existrbr7   )r   r,   str
startswithr   existsFileNotFoundErroropenreadr   r;   )rM   r5   frK   s       r   	from_filezAudio.from_file{   s     	#$$$dC   	T__Y%?%? 	8DDzz  "" 	?#$=t$=$=$=>>>$ 	#&&((K	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# F;;;s   B((B,/B,rK   c                    t          j        |           5 }t          j        |          5 }|                    d          }|j        }|j        }ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   t          |          }|j        	                                }|j
        dk    r/|rt          d|j
                  |                    d          }t          |||          S )zCreate an Audio instance from bytes.

        Args:
            audio_bytes: The audio data as bytes.
            strict: Whether to strictly enforce mono audio. Defaults to True.

        Returns:
            An instance of the Audio class.
        float32)dtypeNr*   zaudio_array.ndim=axis)r   r   r   )ioBytesIOsf	SoundFilerV   
samplerater   r   r   r   r0   r>   meanr   )	rK   r5   
audio_filerW   r   r   audio_formatformat_enumr   s	            r   r;   zAudio.from_bytes   sh    Z$$ 	(
j)) (Qff9f55 ! x	( ( ( ( ( ( ( ( ( ( ( ( ( ( (	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( ",//"((**q   7 !7K$4!7!7888)..A.66MRXYYYYs4   A2%AA2A	A2"A	#A22A69A6Fprefixc                    t                       |t          v sJ d|dt                      t          j                    5 }t	          j        || j        | j        |                                           |	                    d           t          j        |                                                              d          }ddd           n# 1 swxY w Y   |rd|                                 d| }|S )	a  Convert the audio data to a base64 encoded string.

        Args:
            format: The format to encode the audio in.
            prefix: Whether to add a data prefix to the base64 encoded string.

        Returns:
            The base64 encoded audio data.
        zformat=r+   )r   r   zutf-8Nzdata:audio/z;base64,)r   r1   r^   r_   r`   writer   r   upperseekrI   	b64encoderV   decoder   )r"   r   rg   rd   
base64_strs        r   	to_base64zAudio.to_base64   s9    	#$$$////1^F1^1^E[1^1^///Z\\ 	MZHZ!143Efllnn]]]]OOA)*//*;*;<<CCGLLJ	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M  	LKv||~~KKzKKJs    BCCCaudior   c                 .   t          | j        t                    rt                              | j                  S t          | j        t
                    rt                              | j                  S t          dt          | j                             )zCreate an Audio instance from a RawAudio object.

        Args:
            audio: The RawAudio object containing audio data.

        Returns:
            An instance of the Audio class.
        zUnsupported audio data type: )	r,   databytesr   r;   rQ   rL   r>   r/   )rp   s    r   from_raw_audiozAudio.from_raw_audio   s{     ej%(( 	Q##EJ///
C(( 	Q$$UZ000OT%*=M=MOOPPPr   new_sampling_ratec                     | j         |k    rdS t                       t          j        | j        | j         |d          | _        || _         dS )zResample audio data to a new sampling rate.

        Args:
            new_sampling_rate: The new sampling rate to resample the audio to.
        NHQ)quality)r   r   soxrresampler   )r"   ru   s     r   rz   zAudio.resample   sU     !222F=)94;MO`jnooo.r   )r   N)T)F)rp   r   r   r   )__name__
__module____qualname__r-   r.   intrQ   r#   r(   r!   propertyfloatr3   staticmethodboolrB   rL   rX   rs   r;   ro   rt   rz   r   r   r   r   r   *   s       BJ s C TX    
# 
 
 
 
i i i i %    X Z Zc Z4 Z7 Z Z Z \Z& < <# <t <w < < < \<, < < <T <W < < < \<. Z Z Zt Zw Z Z Z \Z:  T c    , Q Q Q \Q /# /$ / / / / / /r   r   freqr   c                    d}d}dt          j        d          z  }d| z  dz  }t          | t           j                  r^t          |t           j                  sJ t	          |                      | |k    }|t          j        | |         |z            |z  z   ||<   n#| |k    r|t          j        | |z            |z  z   }|S )zConvert frequency from hertz to mels using the "slaney" mel-scale.

    Args:
        freq: The frequency, or multiple frequencies, in hertz (Hz).

    Returns:
        The frequencies on the mel scale.
         @@      .@      ;@皙@      @      i@)r-   logr,   r.   r/   )r   min_log_hertzmin_log_mellogstepmels
log_regions         r   hertz_to_melr      s     MKRVC[[ G:D$
## D$
++77T$ZZ77+]*
&Z0@=0P)Q)QT[)[[Z			RVD=$899GCCKr   r   c                     d}d}t          j        d          dz  }d| z  dz  }| |k    }|t          j        || |         |z
  z            z  ||<   |S )zConvert frequency from mels to hertz using the "slaney" mel-scale.

    Args:
        mels: The frequency, or multiple frequencies, in mels.

    Returns:
        The frequencies in hertz.
    r   r   r   r   r   r   )r-   r   exp)r   r   r   r   r   r   s         r   mel_to_hertzr      se     MKfSkkD G4<#D$J$rvgj9IK9W.X'Y'YYDKr   	fft_freqsfilter_freqsc                 ^   t          j        |          }t          j        |d          t          j        | d          z
  }|ddddf          |dd         z  }|ddddf         |dd         z  }t          j        t          j        d          t          j        ||                    }|S )a@  Creates a triangular filter bank.

    Adapted from *torchaudio* and *librosa*.

    Args:
        fft_freqs: Discrete frequencies of the FFT bins in Hz.
        filter_freqs: Center frequencies of the triangular filters to create, in Hz.

    Returns:
        array of shape `(num_frequency_bins, num_mel_filters)`
    r   r*   N   )r-   diffexpand_dimsmaximumzerosminimum)r   r   filter_diffslopesdown_slopes	up_slopesfilter_banks          r   _create_triangular_filter_bankr     s     ',''K^L!,,r~i/K/KKF!!!SbS&>/K$44Kqqq!""uABB/I j!bji6X6XYYKr   num_frequency_binsnum_mel_binsmin_frequencymax_frequencyr   c                 6   | dk     rt          d|  d          ||k    rt          d| d|           t          |          }t          |          }t          j        |||dz             }t	          |          }t          j        d|dz  |           }	t          |	|          }
d|d|dz            |d|         z
  z  }|
t          j        |d          z  }
|
                    d	          d
k                                    rt          d| d|  d          |
S )aV  Create a Mel filter bank matrix for converting frequency bins to the Mel scale.

    This function generates a filter bank matrix that can be used to transform a
    spectrum represented in frequency bins to the Mel scale. The Mel scale is a
    perceptual scale of pitches judged by listeners to be equal in distance from one another.

    Args:
        num_frequency_bins: The number of frequency bins in the input spectrum.
        num_mel_bins: The number of desired Mel bins in the output.
        min_frequency: The minimum frequency (in Hz) to consider.
        max_frequency: The maximum frequency (in Hz) to consider.
        sampling_rate: The sampling rate of the audio signal.

    Returns:
        A filter bank matrix of shape (num_mel_bins, num_frequency_bins)
        that can be used to project frequency bin energies onto Mel bins.
    r   zRequire num_frequency_bins: z >= 2zRequire min_frequency: z <= max_frequency: r   g       @Nr\   g        zNAt least one mel filter has all zero values. The value for `num_mel_filters` (z?) may be set too high. Or, the value for `num_frequency_bins` (z) may be set too low.)	r>   r   r-   linspacer   r   r   maxany)r   r   r   r   r   mel_minmel_max	mel_freqsr   r   mel_filtersenorms               r   mel_filter_bankr   &  sf   2 AQ8JQQQRRR}$$d=ddUbddeee =))G=))GGWlQ.>??I	**L A}13EFFI0LIIK <L1$4 45]l]8SSTE2>%+++KQ3&++-- 
:0<: : #	: : :
 
 	
 r   ))rI   r^   loggingrF   enumr   	functoolsr   pathlibr   typingr   numpyr-   r8   mistral_common.importsr   r   r	   r
   &mistral_common.protocol.instruct.chunkr   	getLoggerr{   logger	soundfiler`   available_formatsr   ry   __members__valuesr1   r   r   r.   r   r   r   r~   r   r   r   r   <module>r      s    				  				                                                @??????		8	$	$ 	8 -,.. $}&e&eSd&e&e&effKK$}vv&677K KKKTT;3J3Q3Q3S3STTT z/ z/ z/ z/ z/ z/ z/ z/zurz) ebj.@    0rz bj    &bj 
 WYWa    ( 555 5 	5
 5 Z5 5 5 5 5 5r   