
    Pi<"                         d Z ddlZddlZddlmZmZmZmZ ddlm	Z	 ddl
mZ dZ ej        dg d          Z G d	 d
          ZdS )z(Implementation of punctuation processing    N)ListUnionTuplePattern)str2list)	Separatoru!   ;:,.!?¡¿—…"«»“”(){}[]_mark_index)indexmarkpositionc                      e Zd ZdZefdeeef         fdZe	d             Z
ed             Zej        deeef         fd            Zdeeee         f         d	eeee         f         fd
Zdeee         ef         d	eeee                  ee         f         fdZdeded	eee         ee         f         fdZedeeee         f         dee         deded	ee         f
d            ZdS )PunctuationaZ  Preserve or remove the punctuation during phonemization

    Backends behave differently with punctuation: festival and espeak ignore it
    and remove it silently whereas segments will raise an error. The
    Punctuation class solves that issue by "hiding" the punctuation to the
    phonemization backend and restoring it afterwards.

    Parameters
    ----------
    marks (str or re.Pattern) : The punctuation marks to consider for processing
        (either removal or preservation). If a string, each mark must be made of
        a single character. Default to Punctuation.default_marks().

    marksc                 0    d | _         d | _        || _        d S N)_marks	_marks_rer   )selfr   s     j/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/phonemizer/punctuation.py__init__zPunctuation.__init__/   s    '+


    c                      t           S )z1Returns the default punctuation marks as a string)_DEFAULT_MARKS r   r   default_markszPunctuation.default_marks4   s
     r   c                 <    | j         r| j         S t          d          )z!The punctuation marks as a stringzCpunctuation initialized from regex, cannot access marks as a string)r   
ValueError)r   s    r   r   zPunctuation.marks9   s%     ; 	;^___r   valuec                    t          |t                    r-t          j        d|j        z   dz             | _        d | _        d S t          |t                    r]d                    t          |                    | _        t          j        dt          j
        | j                   d          | _        d S t          d          )Nz((z)|\s)+ z(\s*[z]+\s*)+z;punctuation marks must be defined as a string or re.Pattern)
isinstancer   recompilepatternr   r   strjoinsetescaper   )r   r   s     r   r   zPunctuation.marks@   s    eW%% 	\Z(=	(IJJDNDKKKs## 	\''#e**--DK  Z(P4;1G1G(P(P(PQQDNNNZ[[[r   textreturnc                      dt           dt           f fdt          |t                     r |          S fd|D             S )zReturns the `text` with all punctuation marks replaced by spaces

        The input `text` can be a string or a list and is returned with the
        same type and punctuation removed.

        r)   r*   c                 ^    t          j        j        d|                                           S )N )r"   subr   strip)r)   r   s    r   auxzPunctuation.remove.<locals>.auxW   s%    6$.#t44::<<<r   c                 &    g | ]} |          S r   r   ).0liner0   s     r   
<listcomp>z&Punctuation.remove.<locals>.<listcomp>\   s!    +++dD		+++r   )r%   r!   )r   r)   r0   s   ` @r   removezPunctuation.removeO   sl    	=c 	=c 	= 	= 	= 	= 	= 	= dC   	3t99++++d++++r   c                     t          |          }g }g }t          |          D ](\  }}|                     ||          \  }}||z  }||z  })d |D             |fS )a  Removes punctuation from `text`, allowing for furter restoration

        This method returns the text as a list of punctuated chunks, along with
        a list of punctuation marks for furter restoration:

            'hello, my world!' -> ['hello', 'my world'], [',', '!']

        c                     g | ]}||S r   r   )r2   r3   s     r   r4   z(Punctuation.preserve.<locals>.<listcomp>o   s    88848888r   )r   	enumerate_preserve_line)r   r)   preserved_textpreserved_marksnumr3   r   s          r   preservezPunctuation.preserve^   sz     #4.."4 	% 	%IC--dC88KD%d"Nu$OO88888/IIr   r3   r<   c                    t          t          j        | j        |                    }|s|gg fS t	          |          dk    r2|d                                         |k    rg t          ||d          gfS g }|D ]}d}||d         k    r*|                    |                                          rd}n5||d         k    r)|                    |                                          rd}|	                    t          ||                                |                     g }|D ]]}|
                    |j                  }	|	d         |j                            |	dd                   }}
|	                    |
           |}^||gz   |fS )	z+Auxiliary method for Punctuation.preserve()   r   AIBEN)listr"   finditerr   lengroup
_MarkIndex
startswithendswithappendsplitr   r&   )r   r3   r<   matchesr   matchr   preserved_liner   rM   prefixsuffixs               r   r9   zPunctuation._preserve_lineq   s   r{4>48899 	62: w<<1!1!1!3!3t!;!;
3c22333  	C 	CE H
""tu{{}}'E'E"'"+%%$--*F*F%LLCAABBBB  	 	DJJty))E"1Xty~~eABBi'@'@FF!!&)))DD &--r   sepr/   c           
         t          |          }g }d}|s|rm|sK|D ]D}|s+|j        r$|                    |j                  s
||j        z   }|                    |           Eg }n|sO|                    t	          j        d|j        d                    d |D                                            g }n|d         }|j        |k    r|d         }	|dd         }t	          j        d|j        |	j                  }	|j        rF|d                             |j                  r&|d         dt          |j                            |d<   |j
        dk    r|	|d         z   |d<   n|j
        dk    rT|                    |d         |	z   |s|	                    |j                  rdn|j        z              |dd         }|dz   }n|j
        d	k    rA|                    |	|s|	                    |j                  rdn|j        z              |dz   }npt          |          dk    r|d         |	z   |d<   nN|d         }
|dd         }|
|	z   |d         z   |d<   n*|                    |d                    |dd         }|dz   }|j|m|S )
az  Restore punctuation in a text.

        This is the reverse operation of Punctuation.preserve(). It takes a
        list of punctuated chunks and a list of punctuation marks, as well as
        the separator and strip parameters used by phonemize. It returns the
        punctuated text as a list:

            ['hello', 'my world'], [',', '!'] -> ['hello, my world!']

        r   r-   r    c              3   $   K   | ]}|j         V  d S r   )r   )r2   ms     r   	<genexpr>z&Punctuation.restore.<locals>.<genexpr>   s%      D[D[PQQVD[D[D[D[D[D[r   r?   NrB   rD   r@   )r   wordrK   rL   r"   r.   r&   r
   r   rG   r   )clsr)   r   rS   r/   punctuated_textposr3   current_markr   
first_words              r   restorezPunctuation.restore   s    ~~ 4	"e 4	" 2"  1 1D  /SX /dmmCH6M6M /#ch#**40000 +"  &&rvc38RWWD[D[UZD[D[D[=[=['\'\]]]  %Qx%,, !8D!!""IE6#sx;;D x ;DG$4$4SX$>$> ;"&q'/CMM>/":Q#,33"&a.Q%.#55'..tAw~uAuPTP]P]^a^fPgPgAumpmu/vwww#ABBx!Ag%.#55'..tU7kdmmTWT\F]F]7krrcfck/lmmm!Ag t99>> '+1gnDGG)-aJ#'8D&04&7$q'&ADGG $**473338D'Ci  4	"e 4	"n r   N)__name__
__module____qualname____doc__r   r   r%   r   r   staticmethodr   propertyr   setterr   r5   r   rI   r=   intr9   classmethodr   boolr^   r   r   r   r   r      s         5C  eCL1    
   \ ` ` X` \\5g. \ \ \ \\,5d3i0 ,U3S	>5J , , , ,JU49c>2 JuT$s)_dS]N^=^7_ J J J J&.3 .S .U49d:FV;V5W . . . .B I5d3i0 IJ'II I !%S	I I I [I I Ir   r   )rb   collectionsr"   typingr   r   r   r   phonemizer.utilsr   phonemizer.separatorr   r   
namedtuplerI   r   r   r   r   <module>rn      s    / .     				 . . . . . . . . . . . . % % % % % % * * * * * * 5#[#0002 2
} } } } } } } } } }r   