
    Pi                         d Z ddlZddlmZ ddlmZmZmZmZm	Z	 ddl
Z
ddlmZ ddlmZ ddlmZmZ  G d d	e          ZdS )
z#Segments backend for the phonemizer    N)Logger)OptionalDictListUnionPattern)BaseBackend)	Separator)get_package_resourceversion_as_tuplec                   @    e Zd ZdZ	 	 	 ddedeeeef                  dedee	         f fdZ
d	 Zed
             Zed             Zed             Zed             Zededefd            Zededej        fd            Zdee         dedededee         f
dZ xZS )SegmentsBackendzSegments backends for the phonemizer

    The phonemize method will raise a ValueError when parsing an
    unknown morpheme.

    NFlanguagepunctuation_markspreserve_punctuationloggerc                 `    d | _         t                                          ||||           d S )N)r   r   r   )
_tokenizersuper__init__)selfr   r   r   r   	__class__s        o/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/phonemizer/backend/segments.pyr   zSegmentsBackend.__init__$   sC    
 9=/!5	 	 	 	 	 	 	    c                     |                      |          }t          j        |          | _        t	          j        |          j        S )N)profile)_load_g2p_profilesegments	Tokenizerr   pathlibPathstem)r   r   r   s      r   _init_languagezSegmentsBackend._init_language0   s?    ((22",W=== |H%%**r   c                      dS )Nr    r%   r   r   namezSegmentsBackend.name8   s    zr   c                 4    t          t          j                  S )N)r   r   __version__clss    r   versionzSegmentsBackend.version<   s     4555r   c                     dS )NTr%   r)   s    r   is_availablezSegmentsBackend.is_available@   s    tr   c                  \    t          d          } d |                                 D             S )a  Returns a dict of language: file supported by the segments backend

        The supported languages have a grapheme to phoneme conversion file
        bundled with phonemizer. Users can also use their own file as
        parameter of the phonemize() function.

        r   c                 4    i | ]}|j         d k    |j        |S )z.g2p)suffixr"   ).0g2ps     r   
<dictcomp>z7SegmentsBackend.supported_languages.<locals>.<dictcomp>Q   s5     H H H#*2F2F #2F2F2Fr   )r   iterdir)	directorys    r   supported_languagesz#SegmentsBackend.supported_languagesD   sD     )44	H H$,,..H H H 	Hr   returnc                     t          j        |                                          r)	 |                     |           dS # t          $ r Y dS w xY w||                                 v S )NTF)r    r!   is_filer   RuntimeErrorr6   )r*   r   s     r   is_supported_languagez%SegmentsBackend.is_supported_languageT   sw    <!!))++ 	%%h///t   uu3224444s   ? 
AAc           
         t          j        |                                          s=	 |                                 |         }n!# t          $ r t          d|           dw xY wi }t          |dd          5 }t          |          D ]\  }}|                                	                                }t          |          dk    s4t          d                    |dz   t          |          |                    |d         ||d	         <   	 ddd           n# 1 swxY w Y   t          j        d
 |                                D              S )z,Returns a segments profile from a `language`z$grapheme to phoneme file not found: Nrutf8)encoding   zBgrapheme to phoneme file, line {} must have 2 rows but have {}: {}   r   c                     g | ]
\  }}||d S ))Graphememappingr%   )r1   kvs      r   
<listcomp>z5SegmentsBackend._load_g2p_profile.<locals>.<listcomp>x   s$    DDD11++DDDr   )r    r!   r9   r6   KeyErrorr:   open	enumeratestripsplitlenformatr   Profileitems)r*   r   r2   flangnumlineeltss          r   r   z!SegmentsBackend._load_g2p_profile^   s    |H%%--// 	--2244X> - - -""" "# #(,-- !(C&111 	'U&u-- ' '	Tzz||))++4yyA~~&**0&q#d))X*N*NP P P  $AwDG'	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' DD		DDDF 	Fs   A A!8BDD Dtextoffset	separatorrK   c                       fd|D             }|sd |D             }d |D             }d |D             }fd|D             }fd|D             }t          |          S )Nc              3   H   K   | ]}                     |d d          V  dS )rD   strict)columnerrorsN)r   )r1   rS   r   s     r   	<genexpr>z1SegmentsBackend._phonemize_aux.<locals>.<genexpr>}   sI         OOD8ODD     r   c              3       K   | ]	}|d z   V  
dS ) # Nr%   r1   ps     r   r]   z1SegmentsBackend._phonemize_aux.<locals>.<genexpr>   s&      88!e)888888r   c              3   B   K   | ]}|                     d d          V  dS )r_   z  # Nreplacer`   s     r   r]   z1SegmentsBackend._phonemize_aux.<locals>.<genexpr>   s0      GGq!))E622GGGGGGr   c              3   B   K   | ]}|                     d d          V  dS )r_   #Nrc   r`   s     r   r]   z1SegmentsBackend._phonemize_aux.<locals>.<genexpr>   s0      @@aiis++@@@@@@r   c              3   N   K   | ]}|                     d j                  V   dS ) N)rd   phoner1   ra   rW   s     r   r]   z1SegmentsBackend._phonemize_aux.<locals>.<genexpr>   s3      JJ!aiiY_55JJJJJJr   c              3   N   K   | ]}|                     d j                  V   dS )rf   N)rd   wordrj   s     r   r]   z1SegmentsBackend._phonemize_aux.<locals>.<genexpr>   s3      IIaiiY^44IIIIIIr   )list)r   rU   rV   rW   rK   
phonemizeds   `  `  r   _phonemize_auxzSegmentsBackend._phonemize_aux{   s         
  	H88Z888JGGJGGGJ A@Z@@@
JJJJzJJJ
IIIIjIII
 Jr   )NFN)__name__
__module____qualname____doc__strr   r   r   boolr   r   r#   staticmethodr&   classmethodr+   r-   r6   r;   r   rO   r   r   intr
   ro   __classcell__)r   s   @r   r   r      s         EI.3,0
 
 
$,U3<-@$A
'+
 "&)
 
 
 
 
 
+ + +   \ 6 6 [6   [ H H \H 5S 5T 5 5 5 [5 F F1A F F F [F8 49  c  i  X\  aefiaj                r   r   )rs   r    loggingr   typingr   r   r   r   r   r   phonemizer.backend.baser	   phonemizer.separatorr
   phonemizer.utilsr   r   r   r%   r   r   <module>r      s    * )        7 7 7 7 7 7 7 7 7 7 7 7 7 7  / / / / / / * * * * * * C C C C C C C Cs  s  s  s  s k s  s  s  s  s r   