
    Pi                         d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z
mZ d dlmZ d dlmZ  G d d          ZdS )    N)
TableGroupColumn)Tree)grapheme_patternc                       e Zd ZdZdZdZddddded	dd
geddgiZeddefd            Z	defdZ
dej        eddf         fdZeddd            Zeddedd fd            Zeddd            Zd ZdS )ProfilezG
    An Orthography Profile as specified by Moran and Cysouw 2018.
    GraphemeNULLtables	Tutf-8)	delimiterheaderencodingstring)namedatatyperequired)columns
primaryKey)dialecttableSchemaNreturnc                 x    t          j         | j                  }t          |pd          |d         d         d<   |S )N r   r   url)copyMDstr)clsfnamemds      d/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/segments/profile.pydefault_metadatazProfile.default_metadata,   s6    Ysv!$U[b!1!18Q	    specsc                 Z    t          j                     _        t                       _        |                    dd           _        |                    dd           _        | _        t          j
        t                    }t          |          D ]\  }} j        |vrt          d           j        r  fd|                                D             }|                     j                  }|st          d           j                            |                                           _        | j        vr| j        |<   |                    d                    |dz   |                     t)          t+           j                                                             _        dS )	a  

        Parameters
        ----------
        specs : list of dict
            A list of grapheme specifications.
        kw :
            The following keyword arguments are recognized:
            - fname: Path of the profile or profile metadata.
            - form: Unicode normalization to apply to the data in the profile before use.
            - remaining keyword arguments are assigned as dict to `Profile.metadata`.
        r!   Nformzinvalid grapheme specificationc                     i | ]<\  }}t          j        j        |          |d nt          j        j        |          =S N)unicodedata	normalizer(   ).0kvselfs      r#   
<dictcomp>z$Profile.__init__.<locals>.<dictcomp>K   sY     . . . 1  )$)Q77 !	{/DTYPQ/R/R. . .r%   zGrapheme must not be emptyz+line {0}:duplicate grapheme in profile: {1}   )collectionsOrderedDict	graphemessetcolumn_labelspopr!   r(   metadatalogging	getLogger__name__	enumerateGRAPHEME_COL
ValueErroritemsunionkeyswarningformatr   listtree)r0   r&   kwlogispecgraphemes   `      r#   __init__zProfile.__init__2   s    %022 UUVVGT**
FF64((	)) '' 	[ 	[GAt ,, !ABBBy .. . . . !%

. . .
 xx 122H ? !=>>>!%!3!9!9$))++!F!FD t~--+/x((AHHQPXYY[ [ [ [dn11334455			r%   c              #     K   | j                                         D ]g\  }}| j        |i}|                    d | j        D                        |                    d |                                D                        |V  hd S )Nc                     i | ]}|d S r*    )r-   r.   s     r#   r1   z%Profile.iteritems.<locals>.<dictcomp>a   s    <<<A4<<<r%   c                     i | ]\  }}||	S rO   rO   )r-   r.   r/   s      r#   r1   z%Profile.iteritems.<locals>.<dictcomp>b   s    666A1666r%   )r5   r@   r>   updater7   )r0   rK   rJ   ress       r#   	iteritemszProfile.iteritems^   s      "n2244 	 	NHd$h/CJJ<<);<<<===JJ66666777IIII		 	r%   c                 P    	 t          j        |          }d}nC# t          j        j        $ r, t          j                             |                    }|}Y nw xY wt          |j                  dk    rt          d          |j
        }|                    t          j        |          |           t          j                    5  t          j        d              fd|j        d                             |          D             i |}ddd           n# 1 swxY w Y   |S )	zk
        Read an orthography profile from a metadata file or a default tab-separated profile file.
        N   z2profile description must contain exactly one table)r!   r(   ignorec                 P    g | ]"}fd |                                 D             #S )c                 L    i | ] \  }}||j         k    r|j        k    rd n|!S r*   )r>   r
   )r-   r.   r/   r    s      r#   r1   z0Profile.from_file.<locals>.<listcomp>.<dictcomp>w   sJ     * * *q! qC$444chddQ * * *r%   )r@   )r-   dr    s     r#   
<listcomp>z%Profile.from_file.<locals>.<listcomp>w   sX     B B B* * * * wwyy* * * B B Br%   r   r!   )r   	from_filejsondecoderJSONDecodeError	fromvaluer$   lenr   r?   common_propsrQ   pathlibPathwarningscatch_warningssimplefilter	iterdicts)r    r!   r(   tgopfnamer9   rR   s   `      r#   r\   zProfile.from_filee   s   
	%e,,BGG|+ 	 	 	%c&:&:5&A&ABBBGGG	 ry>>QQRRR?gl511===$&& 	 	!(+++#B B B B9Q<111@@B B B 	 C	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 
s"    =AAADD"Dmappingtextc                      t          j        t          j        |                    } fd|                                D             }  | S )z
        Create a Profile instance from the Unicode graphemes found in `text`.

        Parameters
        ----------
        text
        mapping

        Returns
        -------
        A Profile instance.

        c                 Z    g | ]'\  }}t          j        j        |fd |f|fg          (S )	frequency)r3   r4   r>   )r-   rK   ro   r    rk   s      r#   rZ   z%Profile.from_text.<locals>.<listcomp>   sb     @ @ @
 $)	 #!8,i((#%% & &@ @ @r%   )r3   Counterr   findallmost_common)r    rl   rk   r5   r&   s   ` `  r#   	from_textzProfile.from_text}   ss      '(8(@(F(FGG	@ @ @ @ @
 (1'<'<'>'>@ @ @ sE{r%   c                    t          j        |                              d          5 }|                                }|                     d                    |          |          cd d d            S # 1 swxY w Y   d S )Nr   )r    rk   )rc   rd   open	readlinesrs   join)r    r!   rk   fpliness        r#   from_textfilezProfile.from_textfile   s    \%  %%w%77 	C2LLNNE==%'=BB	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	Cs   >A44A8;A8c                    t          j        |                                           }| j        D ]Q}|| j        k    rD|j        d         j        j                            t          j        || j
        d                     R|j        d                             |                                 d                              d                                          S )z]
        A Profile is represented as tab-separated lines of grapheme specifications.
        r   )r   nullNr[   utf8)r   r`   r$   r7   r>   r   r   r   appendr   r
   writerS   decodestrip)r0   ri   cols      r#   __str__zProfile.__str__   s     !$"7"7"9"9::% 	H 	HCd'''	!(077$c49%E%EFFH H H y|!!$.."2"2$!??FFvNNTTVVVr%   r*   )r   r   rv   )r<   
__module____qualname____doc__r>   r
   r   classmethoddictr$   rL   typing	GeneratorrS   r\   r   rs   r|   r   rO   r%   r#   r   r      s         LD "&" '  %1(0(,   #/	  	  

B,  T    [
*6t *6 *6 *6 *6X6+D$,<=         [.  S 	    [. C C C C [C

W 
W 
W 
W 
Wr%   r   )r   r   r:   rc   re   r3   r+   json.decoderr]   csvwr   r   segments.treer   segments.utilr   r   rO   r%   r#   <module>r      s                     # # # # # # # #       * * * * * *UW UW UW UW UW UW UW UW UW UWr%   