
    Pi                         U d Z ddlZddlZddlmZ ddlmZmZ ddlm	Z	m
Z
mZ ddlmZ e
d         Ze	ed<   d	ed
eddfdZ G d dej                  Z G d de          Z G d de          Z G d de          ZdS )z5Manages words count mismatches for the espeak backend    N)Logger)ListTuple)	TypeAliasLiteralUnion)	Separator)warnignoreWordMismatchmodeloggerreturnBaseWordsMismatchc           
          t           t          t          d}	  ||          |          S # t          $ r< t	          d|  dd                    |                                                     dw xY w)aO  Returns a word count mismatch processor according to `mode`

    The `mode` can be one of the following:
    - `ignore` to ignore words mismatches
    - `warn` to display a warning on each mismatched utterance
    - `remove` to remove any utterance containing a words mismatch

    Raises a RuntimeError if the `mode` is unknown.

    )r   r
   removezmode z invalid, must be in z, N)IgnoreWarnRemoveKeyErrorRuntimeErrorjoinkeys)r   r   
processorss      |/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/phonemizer/backend/espeak/words_mismatch.pyget_words_mismatch_processorr      s      J
z$'''   MDMMtyy9J9J/K/KMM
 
	s
   ( AA.c            	       H   e Zd ZdZ ej        d          ZdefdZe	efde
e         deeej        f         de
e         fd            Zde
eeeef                  fd	Zd
edefdZde
e         fdZde
e         defdZej        de
e         de
e         fd            ZdS )r   z4The base class of all word count mismatch processorsz\s+r   c                 0    || _         g | _        g | _        d S N)_logger
_count_txt
_count_phn)selfr   s     r   __init__zBaseWordsMismatch.__init__:   s        textwordsepr   c                 |    t          t          j                  st          j                  fd|D             S )z;Return the number of words contained in each line of `text`c           
          g | ]@}t          d  t          j        |                                          D                       AS )c                     g | ]}||S  r+   ).0ws     r   
<listcomp>z=BaseWordsMismatch._count_words.<locals>.<listcomp>.<listcomp>I   s    AAAqqAAAAr%   )lenresplitstrip)r,   liner'   s     r   r.   z2BaseWordsMismatch._count_words.<locals>.<listcomp>H   sT        AABHWdjjll;;AAABB  r%   )
isinstancer0   Patternescape)clsr&   r'   s     `r   _count_wordszBaseWordsMismatch._count_words?   sS     '2:.. 	)i((G      	r%   c                 *   t          | j                  t          | j                  k    r9t          dt          | j                   dt          | j                             d t	          t          | j        | j                            D             S )zReturns a list of (num_line, nwords_input, nwords_output)

        Consider only the lines where nwords_input != nwords_output. Raises a
        RuntimeError if input and output do not have the same number of lines.

        zBnumber of lines in input and output must be equal, we have: input=z	, output=c                 0    g | ]\  }\  }}||k    |||fS r+   r+   )r,   ntps       r   r.   z7BaseWordsMismatch._mismatched_lines.<locals>.<listcomp>Y   s7       #!VaAvv 1Ivvr%   )r/   r!   r"   r   	enumeratezip)r#   s    r   _mismatched_linesz#BaseWordsMismatch._mismatched_linesL   s     t3t#7#7771"%do"6"61 1do..1 12 2 2
 c$/4?;;<<   	r%   	nmismatchnlinesc                 p    |r3| j                             dt          ||z  d          dz  ||           dS dS )z$Logs a high level undetailed warningz1words count mismatch on %s%% of the lines (%s/%s)   d   N)r    warninground)r#   rA   rB   s      r   _resumezBaseWordsMismatch._resume^   s^     	GL  Ci&(!,,s2IvG G G G G	G 	Gr%   c                 :    |                      |          | _        dS )z-Stores the number of words in each input lineN)r8   r!   r#   r&   s     r   
count_textzBaseWordsMismatch.count_texte   s    ++D11r%   	separatorc                 F    |                      ||j                  | _        dS )z.Stores the number of words in each output lineN)r8   wordr"   )r#   r&   rL   s      r   count_phonemizedz"BaseWordsMismatch.count_phonemizedi   s    ++D).AAr%   c                     dS )zDetects and process word count misatches according to the mode

        This method is called at the very end of phonemization, during
        post-processing.

        Nr+   rJ   s     r   processzBaseWordsMismatch.processm   s      r%   N)__name__
__module____qualname____doc__r0   compile
_RE_SPACESr   r$   classmethodr   strr   r5   intr8   r   r@   rH   rK   r	   rO   abcabstractmethodrQ   r+   r%   r   r   r   6   sx       >>F##Jv    
  /9
 
s)
 3
?+
 >B#Y
 
 
 [
4c3m(<#=    $G Gc G G G G2tCy 2 2 2 2BT#Y B9 B B B B 	DI $s)      r%   c                   :    e Zd ZdZdee         dee         fdZdS )r   zIgnores word count mismatchesr&   r   c                     |                      t          |                                           t          |                     |S r   )rH   r/   r@   rJ   s     r   rQ   zIgnore.processz   s5    S//1122CII>>>r%   NrR   rS   rT   rU   r   rY   rQ   r+   r%   r   r   r   w   sD        ''DI $s)      r%   r   c                   :    e Zd ZdZdee         dee         fdZdS )r   z Warns on every mismatch detectedr&   r   c                     |                                  }|D ]&\  }}}| j                            d|dz   ||           '|                     t	          |          t	          |                     |S )Nz>words count mismatch on line %s (expected %s words but get %s)   )r@   r    rF   rH   r/   )r#   r&   mismatchnumntxtnphns         r   rQ   zWarn.process   s~    ))++' 	% 	%OCtL  1at% % % %
 	S]]CII...r%   Nr_   r+   r%   r   r   r      sD        **	DI 	$s) 	 	 	 	 	 	r%   r   c                   :    e Zd ZdZdee         dee         fdZdS )r   z6Removes any utterance containing a word count mismatchr&   r   c                     d |                                  D             }|                     t          |          t          |                     | j                            d           |D ]}d||<   |S )Nc                     g | ]
}|d          S )r   r+   )r,   r3   s     r   r.   z"Remove.process.<locals>.<listcomp>   s    AAADGAAAr%   zremoving the mismatched lines )r@   rH   r/   r    rF   )r#   r&   rc   indexs       r   rQ   zRemove.process   sw    AA(>(>(@(@AAAS]]CII...<=== 	 	EDKKr%   Nr_   r+   r%   r   r   r      sD        @@DI $s)      r%   r   )rU   r[   r0   loggingr   typingr   r   typing_extensionsr   r   r   phonemizer.separatorr	   r   __annotations__r   ABCr   r   r   r   r+   r%   r   <module>rr      sv   < ; ; 



 				               7 7 7 7 7 7 7 7 7 7 * * * * * * ""23i 3 3 3| V H[    0> > > > > > > >B              
 
 
 
 
 
 
 
 
 
r%   