
    Pi#                         d Z ddlZddlZddlZddlmZ ddlmZmZm	Z	m
Z
mZmZmZ ddlZddlmZ ddlmZ ddlmZmZ ddlmZ  G d	 d
ej                  ZdS )z.Abstract base class for phonemization backends    N)Logger)OptionalListAnyDictTupleUnionPattern)
get_logger)Punctuation)	Separatordefault_separator)chunksc                      e Zd ZdZ	 	 	 ddedeeeef                  dedee	         fdZ
ed	             Zed
             Zed             Zeej        d                         Zeej        d                         Zeej        d                         Zeej        deeef         fd                        Zedefd            Z	 	 	 ddee         dee         dededee         f
dZedeee                  fd            Zej        dee         dedededee         f
d            Zdee         de eeee         f         ef         fdZ!dee         dedefdZ"dS ) BaseBackendaI  Abstract base class of all the phonemization backends

    Provides a common interface to all backends. The central method is
    `phonemize()`

    Parameters
    ----------
    language: str
        The language code of the input text, must be supported by
        the backend. If ``backend`` is 'segments', the language can be a file with
        a grapheme to phoneme mapping.

    preserve_punctuation: bool
        When True, will keep the punctuation in the
        phonemized output. Not supported by the 'espeak-mbrola' backend. Default
        to False and remove all the punctuation.

    punctuation_marks: str
        The punctuation marks to consider when dealing with punctuation, either for removal or preservation.
        Can be defined as a string or regular expression. Default to Punctuation.default_marks().

    logger: logging.Logger
        the logging instance where to send
        messages. If not specified, use the default system logger.

    Raises
    ------
    RuntimeError
        if the backend is not available of if the `language` cannot be initialized.

    NFlanguagepunctuation_markspreserve_punctuationloggerc           	         |t          j                    }|t                      }|                                 s4t	          d                    |                                                     || _        | j                            d|                                 d	                    d | 
                                D                                  |                     |          | _        || _        t          |          | _        d S )Nz{} not installed on your systemzinitializing backend %s-%s.c              3   4   K   | ]}t          |          V  d S )N)str).0vs     k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/phonemizer/backend/base.py	<genexpr>z'BaseBackend.__init__.<locals>.<genexpr>S   s(      !A!AQ#a&&!A!A!A!A!A!A    )r   default_marksr   is_availableRuntimeErrorformatname_loggerinfojoinversion_init_language	_language_preserve_punctuation_punctuator)selfr   r   r   r   s        r   __init__zBaseBackend.__init__@   s    
 $ + 9 ; ;>\\F   "" 	G188EEG G G (IIKK!A!A$,,..!A!A!AAA	C 	C 	C
 ,,X66 &:"&'899r   c                     |                      |          s(t          d| d|                                  d          |S )zpLanguage initialization

        This method may be overloaded in child classes (see Segments backend)

        z
language "z" is not supported by the z backend)is_supported_languager!   r#   clsr   s     r   r(   zBaseBackend._init_language\   s^     ((22 	)(X ( (88::( ( () ) ) r   c                     | j         S )z0A logging.Logger instance where to send messages)r$   r,   s    r   r   zBaseBackend.loggeri   s     |r   c                     | j         S )z9The language code configured to be used for phonemization)r)   r3   s    r   r   zBaseBackend.languagen   s     ~r   c                      dS )zThe name of the backendN r6   r   r   r#   zBaseBackend.names         r   c                     dS )z9Returns True if the backend is installed, False otherwiseNr6   r1   s    r   r    zBaseBackend.is_availablex   r7   r   c                     dS )z;Return the backend version as a tuple (major, minor, patch)Nr6   r9   s    r   r'   zBaseBackend.version}   r7   r   returnc                      dS )z@Return a dict of language codes -> name supported by the backendNr6   r6   r   r   supported_languageszBaseBackend.supported_languages   r7   r   c                 .    ||                                  v S )z6Returns True if `language` is supported by the backend)r=   r0   s     r   r/   z!BaseBackend.is_supported_language   s     3224444r      text	separatorstripnjobsc           	          t          |t                    rt          d          t                               |          \  }}|dk    r                     |d          }n j                            d                                 |            t          j
        |           fdt          t          ||           D                       }                     |          }                     ||          S )a  Returns the `text` phonemized for the given language

        Parameters
        ----------
        text: list of str
            The text to be phonemized. Each string in the list
            is considered as a separated line. Each line is considered as a text
            utterance. Any empty utterance will be ignored.

        separator: Separator
            string separators between phonemes, syllables
            and words, default to separator.default_separator. Syllable separator
            is considered only for the festival backend. Word separator is
            ignored by the 'espeak-mbrola' backend.

        strip: bool
            If True, don't output the last word and phone separators
            of a token, default to False.

        njobs : int
            The number of parallel jobs to launch. The input text is
            split in ``njobs`` parts, phonemized on parallel instances of the
            backend and the outputs are finally collapsed.

        Returns
        -------
        phonemized text: list of str
            The input ``text`` phonemized for the given ``language`` and ``backend``.

        Raises
        ------
        RuntimeError
            if something went wrong during the phonemization

        z;input text to phonemize() is str but it must be list of strNr?   r   zrunning %s on %s jobs)n_jobsc              3   z   K   | ]5} t          j        j                  |d          |d                   V  6dS )r   r?   N)joblibdelayed_phonemize_aux)r   chunkr,   rA   rB   s     r   r   z(BaseBackend.phonemize.<locals>.<genexpr>   sc       78 78  4t233!HeAh	5: :78 78 78 78 78 78r   )
isinstancer   r!   r   _phonemize_preprocessrI   r   r%   r#   rG   Parallelzipr   _flatten_phonemize_postprocess)r,   r@   rA   rB   rC   r   
phonemizeds   ` ``   r   	phonemizezBaseBackend.phonemize   s2   N dC   	OMO O O )I"&"<"<T"B"BA::,,T1iGGJJ K4diikk5III 7666 78 78 78 78 78 78 !&u"5"56	78 78 78 8 8J z22J**:7H)UZ[[[r   rQ   c                 8    t          t          j        |            S )zFlatten a list of lists into a single one

        From [[1, 2], [3], [4]] returns [1, 2, 3, 4]. This method is used to
        format the output as obtained using multiple jobs.

        )list	itertoolschain)rQ   s    r   rO   zBaseBackend._flatten   s     IOZ0111r   offsetc                     dS )a  The "concrete" phonemization method

        Must be implemented in child classes. `separator` and `strip`
        parameters are as given to the phonemize() method. `text` is as
        returned by _phonemize_preprocess(). `offset` is line number of the
        first line in `text` with respect to the original text (this is only
        usefull with running on chunks in multiple jobs. When using a single
        jobs the offset is 0).

        Nr6   )r,   r@   rW   rA   rB   s        r   rI   zBaseBackend._phonemize_aux   r7   r   c                 |    | j         r| j                            |          S | j                            |          g fS )zPreprocess the text before phonemization

        Removes the punctuation (keep trace of punctuation marks for further
        restoration if required by the `preserve_punctuation` option).

        )r*   r+   preserveremove)r,   r@   s     r   rL   z!BaseBackend._phonemize_preprocess   sB     % 	3#,,T222&&t,,b00r   c                 N    | j         r| j                            ||||          S |S )z\Postprocess the raw phonemized output

        Restores the punctuation as needed.

        )r*   r+   restore)r,   rQ   r   rA   rB   s        r   rP   z"BaseBackend._phonemize_postprocess   s5     % 	]#++J8I9V[\\\r   )NFN)NFr?   )#__name__
__module____qualname____doc__r   r   r	   r
   boolr   r-   classmethodr(   propertyr   r   staticmethodabcabstractmethodr#   r    r'   r   r=   r/   r   r   intrR   r   rO   rI   r   rL   rP   r6   r   r   r   r      s        B EI.3,0: : :$,U3<-@$A:'+: "&): : : :8 
 
 [
   X   X & &  \& H H  [H J J  [J Oc3h O O O  \O 5S 5 5 5 [5
 48 % C\ C\d3i C\%i0C\C\ C\ &*#YC\ C\ C\ C\J 2T$s)_ 2 2 2 \2 	
49 
c 
i 
X\ 
aefiaj 
 
 
 

1$s) 
1eCcN>SUY>Y8Z 
1 
1 
1 
1c *3 '+     r   r   )ra   rf   rU   reloggingr   typingr   r   r   r   r   r	   r
   rG   phonemizer.loggerr   phonemizer.punctuationr   phonemizer.separatorr   r   phonemizer.utilsr   ABCr   r6   r   r   <module>rq      s
   5 4 



     				       C C C C C C C C C C C C C C C C C C  ( ( ( ( ( ( . . . . . . = = = = = = = = # # # # # #` ` ` ` `#' ` ` ` ` `r   