
    Pi.                        d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
  e j        de j                            e j                            e          d                    ZefdZd Zd	 Zd
 Zd Z e             	  e            Z e
            Zn,# e$ r$Z ede             e
            Zi ZY dZ[ndZ[ww xY wdedefdZefdedefdZefdedefdZdS )    N)	phonemize)EspeakWrapper)VietnameseTTSNormalizerPHONEME_DICT_PATHzphoneme_dict.jsonc                     	 t          | dd          5 }t          j        |          cddd           S # 1 swxY w Y   dS # t          $ r t          d|  d          w xY w)z'Load phoneme dictionary from JSON file.rzutf-8)encodingNz Phoneme dictionary not found at zA. Please create it or set PHONEME_DICT_PATH environment variable.)openjsonloadFileNotFoundError)pathfs     o/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vieneu_utils/phonemize_text.pyload_phoneme_dictr      s    
$g... 	 !9Q<<	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  
 
 
Nt N N N
 
 	

s&   A 5A 9A 9A A c                      t          j                    } | dk    rt                       dS | dk    rt                       dS | dk    rt	                       dS t          d|             dS )z8Configure eSpeak library path based on operating system.WindowsLinuxDarwinzWarning: Unsupported OS: N)platformsystem_setup_windows_espeak_setup_linux_espeak_setup_macos_espeakprint)r   s    r   setup_espeak_libraryr      s    _F	7			8		2&22333    c                      d} t           j                            |           rt          j        |            dS t          d           dS )zSetup eSpeak for Windows.z+C:\Program Files\eSpeak NG\libespeak-ng.dllu   ⚠️ eSpeak-NG is not installed. The system will use the built-in dictionary, but it is recommended to install eSpeak-NG for maximum performance and accuracy.N)osr   existsr   set_libraryr   )default_paths    r   r   r   )   s\    AL	w~~l## r!,/////  q  	r  	r  	r  	r  	rr   c                      g d} | D ]I}t          j         |          }|r1t          j        t          |t                    d                     dS Jt          d           dS )zSetup eSpeak for Linux.)z*/usr/lib/x86_64-linux-gnu/libespeak-ng.so*z'/usr/lib/x86_64-linux-gnu/libespeak.so*z/usr/lib/libespeak-ng.so*z/usr/lib64/libespeak-ng.so*z/usr/local/lib/libespeak-ng.so*)keyr   Nu   ⚠️ eSpeak-NG is not installed on Linux. The system will use the built-in dictionary, but it is recommended to install eSpeak-NG (sudo apt install espeak-ng) for maximum performance.)globr   r!   sortedlenr   )search_patternspatternmatchess      r   r   r   1   s      O #  )G$$ 	%fW#&>&>&>q&ABBBFF	 
  F  G  G  G  G  Gr   c                      t           j                            d          } | dddg}|D ]:}|r6t           j                            |          rt          j        |            dS ;t          d           dS )zSetup eSpeak for macOS.PHONEMIZER_ESPEAK_LIBRARYz$/opt/homebrew/lib/libespeak-ng.dylibz!/usr/local/lib/libespeak-ng.dylibz!/opt/local/lib/libespeak-ng.dylibNu   ⚠️ eSpeak-NG is not installed on macOS. The system will use the built-in dictionary, but it is recommended to install eSpeak-NG (brew install espeak-ng) for maximum performance.)r   environgetr   r    r   r!   r   )
espeak_libpaths_to_checkr   s      r   r   r   C   s     ;<<J 	.++	N    	BGNN4(( 	%d+++FF	  B  C  C  C  C  Cr   zInitialization error: textreturnc                 `    t                               |           } t          | ddddd          S )zv
    Convert text to phonemes (simple version without dict, without EN tag).
    Kept for backward compatibility.
    viespeakTremove-flagslanguagebackendpreserve_punctuationwith_stresslanguage_switch)
normalizer	normalizer   )r1   s    r   phonemize_textr?   a   s@    
 %%D!&   r   c                    t                               |           } t          j        d| t          j                  }g }g }g }g }g }g }t          |          D ]\  }	}
t          j        d|
t          j                  rtt          j        dd|
t          j                                                  }|	                    |           |	                    |	           |	                    d           |
                                }g }t          |          D ]\  }}t          j        d|          }|r|
                                nd|df\  }}}|s|	                    |           Q||v r"|	                    | ||          |            w|	                    |           |	                    |	           |	                    |	t          |          f           |	                    d           |	                    |           |r	 t          |dd	d
d
d          }t          |t                    r|g}t          t          ||                    D ]\  }\  }	}|                                ||	<    nL# t           $ r?}t#          d|            |D ] }	||                    |	                   ||	<   !Y d}~nd}~ww xY w|r$	 t          |dd	d
d
d          }t          |t                    r|g}t          |          D ]\  }\  }	}||                                         }||         }|                                                    d          r"t          |          dk    rd|dd         z   n|}|||<   ||	         |||	         |<   nZ# t           $ rM}t#          d|            t          |          D ]!\  }\  }	}||	         ||         ||	         |<   "Y d}~nd}~ww xY wg }|D ]a}
t          |
t*                    r3|	                    d                    d |
D                                  J|
|	                    |
           bd                    |          }t          j        dd|          }|S )zJ
    Phonemize single text with dictionary lookup and EN tag support.
    (<en>.*?</en>)flags<en>.*</en></?en> N^(\W*)(.*?)(\W*)$en-usr5   Tr6   r7   z'Warning: Could not phonemize EN texts: r4   r   r      ɹ   z'Warning: Could not phonemize VI texts:  c              3   8   K   | ]}|t          |          V  d S Nstr.0ws     r   	<genexpr>z&phonemize_with_dict.<locals>.<genexpr>   s(      'N'N1A'N'Nr   \s+([.,!?;:])\1)r=   r>   resplit
IGNORECASE	enumeratematchsubstripappendgroupsr'   r   
isinstancerO   zip	Exceptionr   indexlower
startswithlistjoin)r1   phoneme_dictpartsen_texts
en_indicesvi_texts
vi_indicesvi_word_mapsprocessed_partspart_idxpart
en_contentwordsprocessed_wordsword_idxwordrZ   precoresufen_phonemesidxphonemeevi_phonemesoriginal_wordfinal_partsresults                               r   phonemize_with_dictr   q   sv    %%D H&BMBBBEHJHJLO#E** 4 4$8ND"-88 	4	2t2=IIIOOQQJOOJ'''h'''""4(((( JJLLE O"+E"2"2 1 1$!5t<<38!Lr4nT3 1#**40000\))#**c+L<3E+Ls+L+LMMMMOOD)))%%h/// ''33G3G(HIII#**40000""?3333 Q	Q#  %)  .  K +s++ ,*m,5c*k6R6R,S,S < <((h,3MMOO))< 	Q 	Q 	Q?A??@@@& Q Q,4Z5E5Eh5O5O,P))Q Q Q Q Q	Q
  H	H# %)  .  K +s++ ,*m-6|-D-D 
B 
B))h%c*0022 ( &&((33C88 R47LL14D4DdWQRR[00'G.5]+"8,8:AOH-h7
B  	H 	H 	H?A??@@@-6|-D-D H H))h"8,8:B3-OH-h7H H H H H	H K % %dD!! 	%sxx'N'N'N'N'NNNOOOOt$$$XXk""FV$eV44FMs3   A*I6 6
J? 5J::J?CN 
O&AO!!O&textsc           
      n   d | D             }g }g }g }g }g }t          |          D ]\  }}	t          j        d|	t          j                  }
g }t          |
          D ]\  }}t          j        d|t          j                  rvt          j        dd|t          j                                                  }|                    |           |                    ||f           |                    d           |                                }g }|D ]}t          j        d|          }|r|                                nd|df\  }}}|s|                    |           N||v r"|                    | ||          |            t|                    |           |                    ||t          |          f           |                    d           |                    |           |                    |           |r	 t          |d	d
ddd          }t          |t                    r|g}t          ||          D ]%\  \  }}}|                                ||         |<   &n)# t          $ r}t          d|            Y d}~nd}~ww xY w|r	 t          |dd
ddd          }t          |t                    r|g}t          |          D ]\  }\  }}}||                                         }||         }|                                                    d          r"t          |          dk    rd|dd         z   n|}|||<   |||         |         |<   n)# t          $ r}t          d|            Y d}~nd}~ww xY wg }|D ]}g }|D ]a}t          |t$                    r3|                    d                    d |D                                  J||                    |           bd                    |          }t          j        dd|          }|                    |           |S )z
    Phonemize multiple texts with optimal batching.
    
    Args:
        texts: List of text strings to phonemize
        phoneme_dict: Phoneme dictionary for lookup
    
    Returns:
        List of phonemized texts
    c                 B    g | ]}t                               |          S  )r=   r>   )rQ   r1   s     r   
<listcomp>z#phonemize_batch.<locals>.<listcomp>   s&    EEEt
,,T22EEEr   rA   rB   rD   rE   rF   NrG   rH   r5   Tr6   r7   z(Warning: Batch EN phonemization failed: r4   r   r   rI   rJ   z(Warning: Batch VI phonemization failed: rK   c              3   8   K   | ]}|t          |          V  d S rM   rN   rP   s     r   rS   z"phonemize_batch.<locals>.<genexpr>F  s(      +R+RqAMCFFMMMM+R+Rr   rT   rU   )rY   rV   rW   rX   rZ   r[   r\   r]   r^   r'   r   r_   rO   r`   ra   r   rc   rd   re   rf   ) r   rg   normalized_textsall_en_textsall_en_mapsall_vi_textsall_vi_mapsresultstext_idxr1   rh   rn   ro   rp   rq   rr   rs   ru   rZ   rv   rw   rx   ry   r{   r|   r}   rz   rt   r~   final_resultsr   r   s                                    r   phonemize_batchr      s'    FEuEEELKLKG#$455 ( ($*DFFF'.. 	8 	8NHdxbm<< 8VIr4r}MMMSSUU
##J///""Hh#7888&&t,,,,

"$! 5 5DH%94@@E7<%PU\\^^^2tR.NCs 5'..t4444--'..#/P|D7I/P3/P/PQQQQ$++D111#**HhO@T@T+UVVV'..t4444&&7777'''' B	B#  %)  .  K +s++ ,*m14[+1N1N > >-$8g.5mmoo!(++> 	B 	B 	B@Q@@AAAAAAAA	B  B	B# %)  .  K +s++ ,*m7@7M7M @ @33h(%c*0022 ,S 1 &&((33C88 R47LL14D4DdWQRR[00'G.5]+8?!(+H55@  	B 	B 	B@Q@@AAAAAAAA	B M" 
% 
%# 	) 	)D$%% )""388+R+RD+R+R+R#R#RSSSS!""4(((+&&(%88V$$$$s2   A#I+ +
J5JJCM 
N)N  N) r   r   r   r%   rV   
phonemizerr    phonemizer.backend.espeak.espeakr   vieneu_utils.normalize_textr   getenvr   rf   dirname__file__r   r   r   r   r   r   rg   r=   ra   r|   r   rO   r?   r   re   r   r   r   r   <module>r      s   				    				             : : : : : : ? ? ? ? ? ? BIGLL**,?@@  
 - 	
 	
 	
 	
  r r rG G G$C C C&     $$&&L((**JJ   	E
&1
&
&'''((**JLLLLLL	       1= l lc l l l l l^ /; n n4 nt n n n n n ns   B CB>>C