
    Pi=                         d Z ddlZddlZddlZddlZddlmZmZmZm	Z	m
Z
 ddlmZ  G d d          Zd Zd Zd	 Zd
 Zed             Zedk    r e             dS dS )zBCommand-line phonemizer tool, have a 'phonemizer --help' to get in    N)	phonemize	separatorversionloggerpunctuation)BACKENDSc                   4    e Zd ZdZd Zd Zed             ZdS )CatchExceptionsa  Decorator wrapping a function in a try/except block

    When an exception occurs, display a user friendly message on
    standard output before exiting with error code 1.

    The detected exceptions are ValueError, OSError, RuntimeError,
    AssertionError and KeyboardInterrupt.

    Parameters
    ----------
    function :
        The function to wrap in a try/except block

    c                     || _         d S )N)function)selfr   s     c/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/phonemizer/main.py__init__zCatchExceptions.__init__*   s         c                 $   	 |                                   dS # t          t          t          t          t
          f$ r3}|                     d                    |                     Y d}~dS d}~wt          $ r |                     d           Y dS w xY w)z9Executes the wrapped function and catch common exceptionszfatal error: {}Nzkeyboard interruption, exiting)	r   IOError
ValueErrorOSErrorRuntimeErrorAssertionErrorexitformatKeyboardInterrupt)r   errs     r   __call__zCatchExceptions.__call__-   s    	8MMOOOOOWn. 	5 	5 	5II'..s33444444444  	8 	8 	8II6777777	8s    #B(A))"BBc                     t           j                            |                                 dz              t          j        d           dS )z0Write `msg` on stderr and exit with error code 1
   N)sysstderrwritestripr   )msgs    r   r   zCatchExceptions.exit9   s8     	
t+,,,r   N)__name__
__module____qualname____doc__r   r   staticmethodr    r   r   r
   r
      sW         ! ! !
8 
8 
8   \  r   r
   c            	      P   t          j        t           j        dd          } |                     dddd           |                                 }|                    d	d
dd           |                    dddd           |                     ddt
          ddd           |                     d          }|                    dt          j        ddd           |                    ddt          j	        dd           |                    dd d!dd"d#$           |                    d%dd&           |                     d'          }|                    d(d)d"d*g d+d,-           |                    d.d/dd0           |                     d1          }|                    d2d3d4d5d67           |                     d8          }|                    d9d:d"t          j        j        d;7           |                    d<d=d"t          j        j        d>7           |                    d?d@d"t          j        j        dA7           |                    dBddC           |                     dD          }	 t          dE                                         }n# t"          $ r d*}Y nw xY w|                    dFd*t$          dGdH| dIJ           |                    dKdd d!dLdMN           |                    dOddP           |                    dQdRg dSdTU           |                    dVdWg dXdYU           |                     dZ          }	 t          d[                                         }n# t"          $ r d*}Y nw xY w|                    d\d*t$          d]d^| d_J           |                     d`dab          }|                    dcddd           |                    det$          d"t(          j                                        df           |                    dgddh           |                                 S )iz,Argument parser for the phonemization scripta  Multilingual text to phonemes converter

The 'phonemize' program allows simple phonemization of words and texts
in many language using four backends: espeak, espeak-mbrola, festival
and segments.

- espeak is a text-to-speech software supporting multiple languages
  and IPA (International Phonetic Alphabet) output. See
  http://espeak.sourceforge.net or
  https://github.com/espeak-ng/espeak-ng

- espeak-mbrola uses the SAMPA phonetic alphabet, it requires mbrola to be
  installed as well as additional mbrola voices. It does not support word or
  syllable tokenization. See
  https://github.com/espeak-ng/espeak-ng/blob/master/docs/mbrola.md

- festival is also a text-to-speech software. Currently only American
  English is supported and festival uses a custom phoneset
  (http://www.festvox.org/bsv/c4711.html), but festival is the only
  backend supporting tokenization at the syllable
  level. See http://www.cstr.ed.ac.uk/projects/festival

- segments is a Unicode tokenizer that build a phonemization from a
  grapheme to phoneme mapping provided as a file by the user. See
  https://github.com/cldf/segments.

See the '--list-languages' option below for details on the languages
supported by each backend.

un  
Examples:

* Phonemize a US English text with espeak

   $ echo 'hello world' | phonemize -l en-us -b espeak
   həloʊ wɜːld

* Phonemize a US English text with festival

   $ echo 'hello world' | phonemize -l en-us -b festival
   hhaxlow werld

* Phonemize a Japanese text with segments

  $ echo 'konnichiwa tsekai' | phonemize -l japanese -b segments
  konnitʃiwa t͡sekai

* Add a separator between phones

  $ echo 'hello world' | phonemize -l en-us -b festival -p '-' --strip
  hh-ax-l-ow w-er-l-d

* Phonemize some French text file using espeak

  $ phonemize -l fr-fr -b espeak text.txt -o phones.txt
        )formatter_classdescriptionepilogz-Vz	--version
store_truez"show version information and exit.)actionhelpz-vz	--verbosezEwrite all log messages to stderr (displays only warnings by default).z-qz--quietz.do not display any log message, even warnings.z-jz--njobsz<int>r   z0number of parallel jobs, default is %(default)s.)typemetavardefaultr0   zinput/outputinput?z<file>z?input text file to phonemize, if not specified read from stdin.)r3   nargsr2   r0   z-oz--outputz<output text file to write, if not specified write to stdout.)r3   r2   r0   z--prepend-textFTz<str>a:  prepend each line of the phonemized output text with its
        matching input text. If a string is specified as option value, use it
        as field separator, else use one of "|", "||", "|||", "||||" by
        selecting the first one that is not configured as a token separator
        (see -p/-s/-w options).)r3   constr6   r2   r0   z--preserve-empty-lineszUpreserve the empty lines in the phonemized output, default is
        to remove them.backendsz-bz	--backendN)espeakespeak-mbrolafestivalsegmentsztthe phonemization backend, must be 'espeak', 'espeak-mbrola',
        'festival' or 'segments'. Default is 'espeak'.)r2   r3   choicesr0   z-Lz--list-languageszllist available languages (and exit) for the specified backend,
        or for all backends if none selected.languagez-lz
--languagez
<str|file>zen-usz~the language code of the input text, use '--list-languages'
        for a list of supported languages. Default is %(default)s.)r2   r3   r0   ztoken separatorsz-pz--phone-separatorz*phone separator, default is "%(default)s".z-wz--word-separatorzVword separator, not valid for espeak-mbrola backend,
        default is "%(default)s".z-sz--syllable-separatorzsyllable separator, only valid for festival backend,
        this option has no effect if another backend is used.
        Default is "%(default)s".z--stripz0removes the end separators in phonemized tokens.zspecific to espeak backendr9   z--espeak-libraryz	<library>zthe path to the espeak shared library to use (*.so on Linux,
        *.dylib on Mac and *.dll on Windows, useful to overload the default
        espeak version installed on the system). Default to
        zc. This path can also be specified
        using the PHONEMIZER_ESPEAK_LIBRARY environment variable.)r3   r1   r2   r0   z--tiez<chr>u   when the option is set, use a tie character within multi-letter
        phoneme names, default to U+361 (as in d͡ʒ), 'z' means ZWJ character,
        only compatible with espeak>1.48 and incompatible with the
        -p/--phone-separator option)r6   r3   r7   r2   r0   z--with-stressu   when the option is set, the stresses on phonemes are present
        (stresses characters are ˈ'ˌ). By default stresses are removed.z--language-switch
keep-flags)r?   zremove-flagszremove-utterancea)  espeak can pronounce some words in another language (typically
        English) when phonemizing a text. This option setups the policy to use
        when such a language switch occurs. Three values are available:
        'keep-flags' (the default), 'remove-flags' or 'remove-utterance'. The
        'keep-flags' policy keeps the language switching flags, for example
        (en) or (jp), in the output. The 'remove-flags' policy removes them and
        the 'remove-utterance' policy removes the whole line of text including
        a language switch.)r3   r=   r0   z--words-mismatchignore)r@   warnremovea  espeak can join two consecutive words or drop some words,
        yielding a word count mismatch between orthographic and phonemized
        text. This option setups the policy to use when such a words count
        mismatch occurs. Three values are available: 'ignore' (the default)
        which do nothing, 'warn' which issue a warning for each mismatched
        line, and 'remove' which remove the mismatched lines from the
        output.zspecific to festival backendr;   z--festival-executablez<executable>zthe path to the festival executable to use (useful to
        overload the default festival installed on the system). Default to
        zh. This path can also be specified using the
        PHONEMIZER_FESTIVAL_EXECUTABLE environment variable.zpunctuation processingz'not available for espeak-mbrola backend)r,   z--preserve-punctuationz[preserve the punctuation marks in the phonemized output,
        default is to remove them.z--punctuation-markszythe marks to consider during punctuation processing (either
        for removal or preservation). Default is %(default)s.z--punctuation-marks-is-regexzfinterpret the '--punctuation-marks' parameter as a regex.
        Default is to interpret as a string.)argparseArgumentParserRawDescriptionHelpFormatteradd_argumentadd_mutually_exclusive_groupintadd_argument_groupr   stdinstdoutr   default_separatorphonewordsyllabler   libraryr   str
executabler   Punctuationdefault_marks
parse_args)parsergroupespeak_libraryfestival_executables       r   rU   rU   @   s'   $ <<A: : :Fz k1  3 3 3
 //11E	k/  0 0 0
 
i=  ? ? ?
 i'1?  A A A %%n55E		hN  P P P
 
j
HK  M M M
 
Tg#  $ $ $ 
      %%j11E	kCCC:	  ; ; ; 
 1  2 2 2 %%j11E	lgF  G G G %%&899E	!!<!B9  ; ; ;
 
 !<!A%  & & & 
$!<!E%  & & & 
?  A A A
 %%&BCCE!(+3355    
3E 
E E E  F F F 
5g'  ( ( ( 
M  N N N
 
BBB	     
">">">  	 	 	 %%&DEEE#&z2==?? # # #"# 
3@	@ @ @  A A A %% = & ? ?E 
 &  ' ' '
 
''5577A	  B B B 
&0  1 1 1 s$   )J	 	JJ=M M,+M,c                    | st          j                    n| gD ]q}t          d| dd                    d t	          t           |                                                                                   D                       z              rdS )z@Returns the available languages for the given `backend` as a strzsupported languages for z are:
r   c              3   ,   K   | ]\  }}d | d| V  dS )	z	->	Nr)   ).0kvs      r   	<genexpr>z!list_languages.<locals>.<genexpr>7  sO       B BDAq'1''A'' B B B B B Br   N)r   keysprintjoinsortedsupported_languagesitems)args_backendbackends     r   list_languagesri   2  s    *6J8=???\N C C7w777II B BF!5577==??5A 5A B B B B BB	C 	C 	C 	CC Cr   c                 B    d}| rd}n|rd}t          j        |          S )zReturns a configured loggernormalverbosequiet)	verbosity)r   
get_logger)rl   rm   rn   s      r   ro   ro   ;  s7    I 			 	y1111r   c                 T    t          | t                    rt          | |d          S | S )z,If `stream` is a filename, open it as a fileutf8)encoding)
isinstancerQ   open)streammodes     r   setup_streamrw   E  s.    &# 3FD62222Mr   c                     t                      } | j        r%t          d                             | j                   | j        r%t          d                             | j                   | j        r"t          t          j                               dS | j        r#t          t          | j	                             dS | j	        pd| _	        t          | j        | j                  }t          | j        d          }|                    d|j                   t          | j        d          }|                    d|j                   | j	        dk    r2|                    d	           t%          j        | j        dd
          }n&t%          j        | j        | j        | j        
          }|                    d|           | j        r1|                    | j                  |                    d           nd| j        r	 |                    d| j                   t7          j        | j                  | _        nR# t6          j        $ r@ |                                 |                                 t?          d| j                   w xY wtA          |!                                | j"        | j	        || j#        | j        | j$        | j%        | j        | j&        | j'        | j(        | j)        | j*        |          }|rOrM|+                    tX          j-        .                    fd|D                       tX          j-        z              dS |rA|+                    tX          j-        .                    |          tX          j-        z              dS dS )z,Phonemize a text from command-line argumentsr9   r;   Nrzreading from %swzwriting to %sr:   z4using espeak-mbrola backend: ignoring word separator)rM   rO   rN   zseparator is %sz/prepend input text to output, separator is "%s"Fzpunctuation marks is regex %sz!can't compile regex pattern from )r>   rh   r   r"   prepend_textpreserve_empty_linespreserve_punctuationpunctuation_markswith_stresstielanguage_switchwords_mismatchnjobsr   c              3   D   K   | ]}|d           d d|d          V  dS )r    r   Nr)   )r]   lineinput_output_separators     r   r`   zmain.<locals>.<genexpr>  sV       ! ! 7??3??d1g??! ! ! ! ! !r   )/rU   rX   r   set_libraryrY   set_executabler   rb   ri   rh   ro   rl   rm   rw   r4   debugnameoutputr   	Separatorphone_separatorsyllable_separatorword_separatorr{   r   punctuation_marks_is_regexr~   recompileerrorcloser   r   	readlinesr>   r"   r|   r}   r   r   r   r   r   r!   oslineseprc   )argslogstreamin	streamoutsepoutr   s         @r   mainr   M  s    <<D  <&&t':;;; F++D,DEEE | go     nT\**+++ <+8DL T\4:
.
.C DJ,,HII///T[#..IIIoy~... |&&		HIII!&  
 !&,$& & & II%%% '!$!;!;D<M!N!N		="	$ 	$ 	$ 	$ "'& [	[II5t7MNNN%'Z0F%G%GD""x 	[ 	[ 	[NNOOYAWYYZZZ		[ j&!6!60$H,*j  C"  ;% ;JOO ! ! ! !! ! ! ! ! j	 	 	 	 	
 
 ;
,,rz9:::::; ;s   >9H8 8AJ__main__)r'   rC   r   r   r   
phonemizerr   r   r   r   r   phonemizer.backendr   r
   rU   ri   ro   rw   r   r$   r)   r   r   <module>r      s    I H  				 



 				 I I I I I I I I I I I I I I ' ' ' ' ' '" " " " " " " "Jo o odC C C2 2 2   Z; Z; Z;z zDFFFFF r   