
    Pi              	       
   d dl mZ esdev rddlmZ nd dlZ	 d dlZn# e	$ r d dlZY nw xY wd Z
d Zd Zd	 Z G d
 de          Z G d de          Z ej        e            G d de          Z ej        e            G d de          Z ej        e            G d de          Z ej        e           d Zd Z G d de          Z ej        e            G d de          Z ej        e           d Zd dl Z d dl!Z!d dl Z d dl"Z"d dl#Z$d dl%m&Z& d dl%m'Z' d Z(d Z)ej*        Z+ej*        Z, e-edej.                    e-edej.                   ej/        e_0        ej1        e_2        d D ]Z3 e)ee3            e(e            e(e            e(e           eZ4eZ5dd!l6m7Z7  ee"j8        9                     e:e$j;        <                    d"                    d#                      G d$ d%e          Z=dS )&    )version_info.   )_sentencepieceNc                     	 d| j                                         z   }n# t          j        $ r d}Y nw xY wd| j        j        d| j        j        d|dS )Nz	proxy of  <r   z; z >)this__repr____builtin__	Exception	__class__
__module____name__)selfstrthiss     j/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/sentencepiece/__init__.py
_swig_reprr      ss    	 2 2 4 44    !^6668O8O8OQXQXQXZZs    33c                       fd}|S )Nc                 4   |dk    r | ||           d S |dk    r| j                             |           d S t          | |          r?t          t	          t          |           |          t                    r | ||           d S t          d| z            )Nr
   thisownz(You cannot add instance attributes to %s)r
   ownhasattr
isinstancegetattrtypepropertyAttributeError)r   namevaluesets      r   set_instance_attrzE_swig_setattr_nondynamic_instance_variable.<locals>.set_instance_attr   s    6>>CdE"""""YIMM%     T4   	TZT

D0I0I8%T%T 	TCdE""""" !Kd!RSSS     )r!   r"   s   ` r   *_swig_setattr_nondynamic_instance_variabler%      s)    T T T T T r#   c                       fd}|S )Nc                     t          | |          r2t          t          | |          t                    s | ||           d S t	          d| z            )Nz%You cannot add class attributes to %s)r   r   r   r   r   )clsr   r    r!   s      r   set_class_attrz?_swig_setattr_nondynamic_class_variable.<locals>.set_class_attr)   s`    3 	Pjd1C1CX&N&N 	PCT5!!!!! !H3!NOOOr#   r$   )r!   r)   s   ` r   '_swig_setattr_nondynamic_class_variabler*   (   s)    P P P P P
 r#   c                       fd}|S )zlClass decorator for adding a metaclass to a SWIG wrapped class - a slimmed down version of six.add_metaclassc                 `     | j         | j        | j                                                  S N)r   	__bases____dict__copy)r(   	metaclasss    r   wrapperz$_swig_add_metaclass.<locals>.wrapper3   s)    ys}cl6G6G6I6IJJJr#   r$   )r1   r2   s   ` r   _swig_add_metaclassr3   1   s&    K K K K KNr#   c                   2    e Zd ZdZ eej                  ZdS )_SwigNonDynamicMetazKMeta class to enforce nondynamic attributes (no new attributes) for a classN)r   r   __qualname____doc__r*   r   __setattr__r$   r#   r   r5   r5   8   s'        UU99$:JKKKKKr#   r5   c                       e Zd Z ed d d          ZeZd Zej	        Z
d Zd Zd Zd	 Zd
 Zd Zd Z ee          Z ee          Z ee          Z ee          Z ee          Z ee          Z ee          Zd Zd Zd ZeZdS )1ImmutableSentencePieceText_ImmutableSentencePiecec                 4    | j                                         S r-   r
   r   xs    r   <lambda>z:ImmutableSentencePieceText_ImmutableSentencePiece.<lambda>>        r#   c                 6    | j                             |          S r-   r<   r>   vs     r   r?   z:ImmutableSentencePieceText_ImmutableSentencePiece.<lambda>>       AFJJqMM r#   The membership flagdocc                 R    t          j        | t          j                               d S r-   )r   :ImmutableSentencePieceText_ImmutableSentencePiece_swiginit5new_ImmutableSentencePieceText_ImmutableSentencePiecer   s    r   __init__z:ImmutableSentencePieceText_ImmutableSentencePiece.__init__A   sA    QRVXf  Y]  Y_  Y_  	`  	`  	`  	`  	`r#   c                 *    t          j        |           S r-   )r   8ImmutableSentencePieceText_ImmutableSentencePiece__piecerK   s    r   _piecez8ImmutableSentencePieceText_ImmutableSentencePiece._pieceE       VW[\\\r#   c                 *    t          j        |           S r-   )r   :ImmutableSentencePieceText_ImmutableSentencePiece__surfacerK   s    r   _surfacez:ImmutableSentencePieceText_ImmutableSentencePiece._surfaceH   s    XY]^^^r#   c                 *    t          j        |           S r-   )r   5ImmutableSentencePieceText_ImmutableSentencePiece__idrK   s    r   _idz5ImmutableSentencePieceText_ImmutableSentencePiece._idK   s    STXYYYr#   c                 *    t          j        |           S r-   )r   8ImmutableSentencePieceText_ImmutableSentencePiece__beginrK   s    r   _beginz8ImmutableSentencePieceText_ImmutableSentencePiece._beginN   rP   r#   c                 *    t          j        |           S r-   )r   6ImmutableSentencePieceText_ImmutableSentencePiece__endrK   s    r   _endz6ImmutableSentencePieceText_ImmutableSentencePiece._endQ   s    TUYZZZr#   c                 *    t          j        |           S r-   )r   CImmutableSentencePieceText_ImmutableSentencePiece__surface_as_bytesrK   s    r   _surface_as_byteszCImmutableSentencePieceText_ImmutableSentencePiece._surface_as_bytesT   s    abfgggr#   c                 *    t          j        |           S r-   )r   AImmutableSentencePieceText_ImmutableSentencePiece__piece_as_bytesrK   s    r   _piece_as_byteszAImmutableSentencePieceText_ImmutableSentencePiece._piece_as_bytesW   s    _`deeer#   c                 f    d                     | j        | j        | j        | j        | j                  S )Nz3piece: "{}"
id: {}
surface: "{}"
begin: {}
end: {}
)formatpieceidsurfacebeginendrK   s    r   __str__z9ImmutableSentencePieceText_ImmutableSentencePiece.__str__b   s2     "6$*dgt|"&*dh8 8	8r#   c                     | j         |j         k    o?| j        |j        k    o/| j        |j        k    o| j        |j        k    o| j        |j        k    S r-   )re   rf   rg   rh   ri   r   others     r   __eq__z8ImmutableSentencePieceText_ImmutableSentencePiece.__eq__j   sv    Z5;&  Y47eh+>  Y4<SXS`C`  Yeieosxs~e~  Y  DH  DL  PU  PY  DY  Yr#   c                 :    t          t          |                     S r-   )hashstrrK   s    r   __hash__z:ImmutableSentencePieceText_ImmutableSentencePiece.__hash__m   s    #d))__r#   N)r   r   r6   r   r   r   r   rL   r   8delete_ImmutableSentencePieceText_ImmutableSentencePiece__swig_destroy__rO   rS   rV   rY   r\   r_   rb   re   piece_as_bytesrg   surface_as_bytesrf   rh   ri   rj   rn   rr   r$   r#   r   r:   r:   =   sk       h--/I/IOdeeeGH` ` `%^] ] ]_ _ _Z Z Z] ] ][ [ [h h hf f f HVEXo..Nhx  Gx 122	#BHVE
(4..C8 8 8Y Y Y   HHHr#   r:   c                       e Zd Z ed d d          ZeZd Zej	        Z
d Zd Zd Zd	 Zd
 Zd Z ee          Z ee          Z ee          Z G d d          Zed             Zd Zd Zd ZeZdS )ImmutableSentencePieceTextc                 4    | j                                         S r-   r<   r=   s    r   r?   z#ImmutableSentencePieceText.<lambda>v   r@   r#   c                 6    | j                             |          S r-   r<   rB   s     r   r?   z#ImmutableSentencePieceText.<lambda>v   rD   r#   rE   rF   c                 R    t          j        | t          j                               d S r-   )r   #ImmutableSentencePieceText_swiginitnew_ImmutableSentencePieceTextrK   s    r   rL   z#ImmutableSentencePieceText.__init__y   s$    :4AnApApqqqqqr#   c                 *    t          j        |           S r-   )r   'ImmutableSentencePieceText__pieces_sizerK   s    r   _pieces_sizez'ImmutableSentencePieceText._pieces_size}   s    EdKKKr#   c                 ,    t          j        | |          S r-   )r   "ImmutableSentencePieceText__piecesr   indexs     r   _piecesz"ImmutableSentencePieceText._pieces   s    @uMMMr#   c                 *    t          j        |           S r-   )r    ImmutableSentencePieceText__textrK   s    r   _textz ImmutableSentencePieceText._text   s    >tDDDr#   c                 *    t          j        |           S r-   )r   !ImmutableSentencePieceText__scorerK   s    r   _scorez!ImmutableSentencePieceText._score   s    ?EEEr#   c                 *    t          j        |           S r-   )r   ,ImmutableSentencePieceText_SerializeAsStringrK   s    r   SerializeAsStringz,ImmutableSentencePieceText.SerializeAsString       J4PPPr#   c                 *    t          j        |           S r-   )r   )ImmutableSentencePieceText__text_as_bytesrK   s    r   _text_as_bytesz)ImmutableSentencePieceText._text_as_bytes   s    GMMMr#   c                   *    e Zd Zd Zd Zd Zd ZeZdS )9ImmutableSentencePieceText.ImmutableSentencePieceIteratorc                 P    || _         | j                                         | _        d S r-   )protor   lenr   r   s     r   rL   zBImmutableSentencePieceText.ImmutableSentencePieceIterator.__init__   "    
:**,,r#   c                     | j         S r-   r   rK   s    r   __len__zAImmutableSentencePieceText.ImmutableSentencePieceIterator.__len__   	    xr#   c                 2    t          |t                    r8 fdt           j                  D             |j        |j        |j                 S |dk     r
| j        z   }|dk     s| j        k    rt          d           j        	                    |          S )Nc                 D    g | ]}j                             |          S r$   )r   r   .0ir   s     r   
<listcomp>zYImmutableSentencePieceText.ImmutableSentencePieceIterator.__getitem__.<locals>.<listcomp>   )    AAAA$*$$Q''AAAr#   r   zpiece index is out of range)
r   sliceranger   startstopstep
IndexErrorr   r   r   s   ` r   __getitem__zEImmutableSentencePieceText.ImmutableSentencePieceIterator.__getitem__   s    eU## 	eAAAAtxAAA%+ejY^YcBcd
d199$("%199))899
9z!!%(((r#   c                 @    d                     d | D                       S )N
c                 R    g | ]$}d                      t          |                    %S zpieces {{
{}}}rd   rq   r   r>   s     r   r   zUImmutableSentencePieceText.ImmutableSentencePieceIterator.__str__.<locals>.<listcomp>   -    IIIq+223q66::IIIr#   joinrK   s    r   rj   zAImmutableSentencePieceText.ImmutableSentencePieceIterator.__str__   #    yyIIDIIIJJJr#   Nr   r   r6   rL   r   r   rj   r   r$   r#   r   ImmutableSentencePieceIteratorr      T        - - -  ) ) )K K K hhhr#   r   c                 6    t                               |           S r-   )rx   r   rK   s    r   piecesz!ImmutableSentencePieceText.pieces   s    'FFtLLLr#   c                 V    |                                  |                                 k    S r-   r   rl   s     r   rn   z!ImmutableSentencePieceText.__eq__   %    ##%%)@)@)B)BBBr#   c                 D    t          |                                           S r-   rp   r   rK   s    r   rr   z#ImmutableSentencePieceText.__hash__       $((**+++r#   c                     d                     | j        | j        d                    d | j        D                                 S )Nztext: "{}"
score: {}
{}r   c                 R    g | ]$}d                      t          |                    %S r   r   r   s     r   r   z6ImmutableSentencePieceText.__str__.<locals>.<listcomp>   s-    %\%\%\1&7&>&>s1vv&F&F%\%\%\r#   )rd   textscorer   r   rK   s    r   rj   z"ImmutableSentencePieceText.__str__   sH    F49dj99%\%\PTP[%\%\%\]]_ __r#   N)r   r   r6   r   r   r   r   rL   r   !delete_ImmutableSentencePieceTextrt   r   r   r   r   r   r   r   text_as_bytesr   r   r   rn   rr   rj   r$   r#   r   rx   rx   u   sf       h--/I/IOdeeeGHr r r%GL L LN N NE E EF F FQ Q QN N N 8E??DH^,,MHVE       , M M XMC C C, , ,_ _ _ HHHr#   rx   c                       e Zd Z ed d d          ZeZd Zej	        Z
d Zd Zd Z G d	 d
          Zed             Zd Zd Zd ZeZdS )ImmutableNBestSentencePieceTextc                 4    | j                                         S r-   r<   r=   s    r   r?   z(ImmutableNBestSentencePieceText.<lambda>   r@   r#   c                 6    | j                             |          S r-   r<   rB   s     r   r?   z(ImmutableNBestSentencePieceText.<lambda>   rD   r#   rE   rF   c                 R    t          j        | t          j                               d S r-   )r   (ImmutableNBestSentencePieceText_swiginit#new_ImmutableNBestSentencePieceTextrK   s    r   rL   z(ImmutableNBestSentencePieceText.__init__   s$    ?nFxFzFz{{{{{r#   c                 *    t          j        |           S r-   )r   ,ImmutableNBestSentencePieceText__nbests_sizerK   s    r   _nbests_sizez,ImmutableNBestSentencePieceText._nbests_size   r   r#   c                 ,    t          j        | |          S r-   )r   'ImmutableNBestSentencePieceText__nbestsr   s     r   _nbestsz'ImmutableNBestSentencePieceText._nbests   s    EdERRRr#   c                 *    t          j        |           S r-   )r   1ImmutableNBestSentencePieceText_SerializeAsStringrK   s    r   r   z1ImmutableNBestSentencePieceText.SerializeAsString   s    OPTUUUr#   c                   *    e Zd Zd Zd Zd Zd ZeZdS )BImmutableNBestSentencePieceText.ImmutableSentencePieceTextIteratorc                 P    || _         | j                                         | _        d S r-   )r   r   r   r   s     r   rL   zKImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__init__   r   r#   c                     | j         S r-   r   rK   s    r   r   zJImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__len__   r   r#   c                 2    t          |t                    r8 fdt           j                  D             |j        |j        |j                 S |dk     r
| j        z   }|dk     s| j        k    rt          d           j        	                    |          S )Nc                 D    g | ]}j                             |          S r$   )r   r   r   s     r   r   zbImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__getitem__.<locals>.<listcomp>   r   r#   r   znbests index is out of range)
r   r   r   r   r   r   r   r   r   r   r   s   ` r   r   zNImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__getitem__   s    eU## 	eAAAAtxAAA%+ejY^YcBcd
d199$("%199))9::
:z!!%(((r#   c                 @    d                     d | D                       S )Nr   c                 R    g | ]$}d                      t          |                    %S znbests {{
{}}}r   r   s     r   r   z^ImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__str__.<locals>.<listcomp>   r   r#   r   rK   s    r   rj   zJImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__str__   r   r#   Nr   r$   r#   r   "ImmutableSentencePieceTextIteratorr      r   r#   r   c                 6    t                               |           S r-   )r   r   rK   s    r   nbestsz&ImmutableNBestSentencePieceText.nbests   s    ,OOPTUUUr#   c                 V    |                                  |                                 k    S r-   r   rl   s     r   rn   z&ImmutableNBestSentencePieceText.__eq__   r   r#   c                 D    t          |                                           S r-   r   rK   s    r   rr   z(ImmutableNBestSentencePieceText.__hash__   r   r#   c                 J    d                     d | j        D                       S )Nr   c                 R    g | ]$}d                      t          |                    %S r   r   r   s     r   r   z;ImmutableNBestSentencePieceText.__str__.<locals>.<listcomp>   s-    NNNQ)00Q88NNNr#   )r   r   rK   s    r   rj   z'ImmutableNBestSentencePieceText.__str__   s%    YYNN$+NNNOOOr#   N)r   r   r6   r   r   r   r   rL   r   &delete_ImmutableNBestSentencePieceTextrt   r   r   r   r   r   rn   rr   rj   r$   r#   r   r   r      s       h--/I/IOdeeeGH| | |%LQ Q QS S SV V V       , V V XVC C C, , ,P P P HHHr#   r   c                      e Zd Z ed d d          ZeZd Zej	        Z
d Zd Zd Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z d Z!d Z"d Z#d Z$d  Z%d! Z&d" Z'd# Z(d$ Z)d% Z*d& Z+d' Z,d( Z-d) Z.d* Z/d+ Z0d, Z1d- Z2d. Z3d/ Z4d0 Z5d1 Z6d2 Z7d3 Z8d4 Z9d5 Z:d6 Z;d7 Z<d8 Z=d9 Z>d: Z?d; Z@d< ZAd= ZBd>d>eCd?d?d?d?d?d@dAd@fdBZD	 	 	 	 	 	 	 	 	 didCZEdD ZFdE ZGdF ZHdG ZIdjdHZJdjdIZKdjdJZLdjdKZM	 	 	 	 	 	 dkdLZNdldMZOdldNZPdldOZQdldPZR	 	 	 	 	 	 	 	 	 didQZSdjdRZTdjdSZUdjdTZVdjdUZWeXd>fdVZYeXfdWZZeXfdXZ[dmdZZ\dmd[Z]dnd]Z^dnd^Z_dld_Zdld`Z`da Zadb Zbdc Zcdd Zdde Zedf Zfdg ZgdjdhZhd>S )oSentencePieceProcessorc                 4    | j                                         S r-   r<   r=   s    r   r?   zSentencePieceProcessor.<lambda>   r@   r#   c                 6    | j                             |          S r-   r<   rB   s     r   r?   zSentencePieceProcessor.<lambda>   rD   r#   rE   rF   c                 R    t          j        | t          j                               d S r-   )r   SentencePieceProcessor_swiginitnew_SentencePieceProcessorrK   s    r   rL   zSentencePieceProcessor.__init__   s$    6t^=f=h=hiiiiir#   c                 ,    t          j        | |          S r-   )r   .SentencePieceProcessor_LoadFromSerializedProtor   
serializeds     r   LoadFromSerializedProtoz.SentencePieceProcessor.LoadFromSerializedProto   s    LTS]^^^r#   c                 ,    t          j        | |          S r-   )r   ,SentencePieceProcessor_SetEncodeExtraOptionsr   extra_options     r   SetEncodeExtraOptionsz,SentencePieceProcessor.SetEncodeExtraOptions      J4Q]^^^r#   c                 ,    t          j        | |          S r-   )r   ,SentencePieceProcessor_SetDecodeExtraOptionsr   s     r   SetDecodeExtraOptionsz,SentencePieceProcessor.SetDecodeExtraOptions  r   r#   c                 ,    t          j        | |          S r-   )r   $SentencePieceProcessor_SetVocabulary)r   valid_vocabs     r   SetVocabularyz$SentencePieceProcessor.SetVocabulary  s    B4UUUr#   c                 *    t          j        |           S r-   )r   &SentencePieceProcessor_ResetVocabularyrK   s    r   ResetVocabularyz&SentencePieceProcessor.ResetVocabulary  s    DTJJJr#   c                 .    t          j        | ||          S r-   )r   %SentencePieceProcessor_LoadVocabulary)r   filename	thresholds      r   LoadVocabularyz%SentencePieceProcessor.LoadVocabulary  s    CD(T]^^^r#   c                 &    t          j        | g|R  S r-   )r   'SentencePieceProcessor_CalculateEntropyr   argss     r   CalculateEntropyz'SentencePieceProcessor.CalculateEntropy  s    EdRTRRRRr#   c                 *    t          j        |           S r-   )r   #SentencePieceProcessor_GetPieceSizerK   s    r   GetPieceSizez#SentencePieceProcessor.GetPieceSize  s    A$GGGr#   c                 ,    t          j        | |          S r-   )r    SentencePieceProcessor_PieceToIdr   re   s     r   	PieceToIdz SentencePieceProcessor.PieceToId  s    >tUKKKr#   c                 ,    t          j        | |          S r-   )r    SentencePieceProcessor_IdToPiecer   rf   s     r   	IdToPiecez SentencePieceProcessor.IdToPiece      >tRHHHr#   c                 ,    t          j        | |          S r-   )r   SentencePieceProcessor_GetScorer  s     r   GetScorezSentencePieceProcessor.GetScore      =dBGGGr#   c                 ,    t          j        | |          S r-   )r    SentencePieceProcessor_IsUnknownr  s     r   	IsUnknownz SentencePieceProcessor.IsUnknown   r  r#   c                 ,    t          j        | |          S r-   )r    SentencePieceProcessor_IsControlr  s     r   	IsControlz SentencePieceProcessor.IsControl#  r  r#   c                 ,    t          j        | |          S r-   )r   SentencePieceProcessor_IsUnusedr  s     r   IsUnusedzSentencePieceProcessor.IsUnused&  r  r#   c                 ,    t          j        | |          S r-   )r   SentencePieceProcessor_IsByter  s     r   IsBytezSentencePieceProcessor.IsByte)  s    ;D"EEEr#   c                 *    t          j        |           S r-   )r   SentencePieceProcessor_unk_idrK   s    r   unk_idzSentencePieceProcessor.unk_id,      ;DAAAr#   c                 *    t          j        |           S r-   )r   SentencePieceProcessor_bos_idrK   s    r   bos_idzSentencePieceProcessor.bos_id/  r,  r#   c                 *    t          j        |           S r-   )r   SentencePieceProcessor_eos_idrK   s    r   eos_idzSentencePieceProcessor.eos_id2  r,  r#   c                 *    t          j        |           S r-   )r   SentencePieceProcessor_pad_idrK   s    r   pad_idzSentencePieceProcessor.pad_id5  r,  r#   c                 *    t          j        |           S r-   )r   -SentencePieceProcessor_serialized_model_protorK   s    r   serialized_model_protoz-SentencePieceProcessor.serialized_model_proto8  s    KDQQQr#   c                 ,    t          j        | |          S r-   )r   #SentencePieceProcessor_LoadFromFiler   args     r   LoadFromFilez#SentencePieceProcessor.LoadFromFile;  s    A$LLLr#   c	                 :    t          j        | ||||||||	  	        S r-   )r   #SentencePieceProcessor__EncodeAsIds	r   r   enable_sampling
nbest_sizealphaadd_bosadd_eosreverseemit_unk_pieces	            r   _EncodeAsIdsz#SentencePieceProcessor._EncodeAsIds>  s>    A$o_ikpry  |C  EL  N\  ]  ]  	]r#   c	                 :    t          j        | ||||||||	  	        S r-   )r   &SentencePieceProcessor__EncodeAsPiecesr@  s	            r   _EncodeAsPiecesz&SentencePieceProcessor._EncodeAsPiecesA  s?    DT4Q`blnsu|  F  HO  Q_  `  `  	`r#   c	                 :    t          j        | ||||||||	  	        S r-   )r   /SentencePieceProcessor__EncodeAsSerializedProtor@  s	            r   _EncodeAsSerializedProtoz/SentencePieceProcessor._EncodeAsSerializedProtoD  sD    MdTXZikuw|  F  HO  QX  Zh  i  i  	ir#   c	                 :    t          j        | ||||||||	  	        S r-   )r   .SentencePieceProcessor__EncodeAsImmutableProtor@  s	            r   _EncodeAsImmutableProtoz.SentencePieceProcessor._EncodeAsImmutableProtoG  sD    LTSWYhjtv{  ~E  GN  PW  Yg  h  h  	hr#   c
                 <    t          j        | |||||||||	
  
        S r-   )r   (SentencePieceProcessor__EncodeAsIdsBatch
r   insnum_threadsrA  rB  rC  rD  rE  rF  rG  s
             r   _EncodeAsIdsBatchz(SentencePieceProcessor._EncodeAsIdsBatchJ  sJ    FtSR]_npz  }B  DK  MT  V]  _m  n  n  	nr#   c
                 <    t          j        | |||||||||	
  
        S r-   )r   +SentencePieceProcessor__EncodeAsPiecesBatchrT  s
             r   _EncodeAsPiecesBatchz+SentencePieceProcessor._EncodeAsPiecesBatchM  sK    I$PSU`bqs}  @E  GN  PW  Y`  bp  q  q  	qr#   c
                 <    t          j        | |||||||||	
  
        S r-   )r   4SentencePieceProcessor__EncodeAsSerializedProtoBatchrT  s
             r   _EncodeAsSerializedProtoBatchz4SentencePieceProcessor._EncodeAsSerializedProtoBatchP  sP    RSWY\^ikz  }G  IN  PW  Y`  bi  ky  z  z  	zr#   c
                 <    t          j        | |||||||||	
  
        S r-   )r   3SentencePieceProcessor__EncodeAsImmutableProtoBatchrT  s
             r   _EncodeAsImmutableProtoBatchz3SentencePieceProcessor._EncodeAsImmutableProtoBatchS  sP    QRVX[]hjy  |F  HM  OV  X_  ah  jx  y  y  	yr#   c                 ,    t          j        | |          S r-   )r   !SentencePieceProcessor__DecodeIdsr   idss     r   
_DecodeIdsz!SentencePieceProcessor._DecodeIdsV  s    ?cJJJr#   c                 ,    t          j        | |          S r-   )r   (SentencePieceProcessor__DecodeIdsAsBytesrc  s     r   _DecodeIdsAsBytesz(SentencePieceProcessor._DecodeIdsAsBytesY  s    FtSQQQr#   c                 ,    t          j        | |          S r-   )r   $SentencePieceProcessor__DecodePiecesr   r   s     r   _DecodePiecesz$SentencePieceProcessor._DecodePieces\  s    B4PPPr#   c                 ,    t          j        | |          S r-   )r   2SentencePieceProcessor__DecodeIdsAsSerializedProtorc  s     r   _DecodeIdsAsSerializedProtoz2SentencePieceProcessor._DecodeIdsAsSerializedProto_  s    PQUWZ[[[r#   c                 ,    t          j        | |          S r-   )r   5SentencePieceProcessor__DecodePiecesAsSerializedProtork  s     r   _DecodePiecesAsSerializedProtoz5SentencePieceProcessor._DecodePiecesAsSerializedProtob  s    STXZ`aaar#   c                 ,    t          j        | |          S r-   )r   1SentencePieceProcessor__DecodeIdsAsImmutableProtorc  s     r   _DecodeIdsAsImmutableProtoz1SentencePieceProcessor._DecodeIdsAsImmutableProtoe  s    OPTVYZZZr#   c                 ,    t          j        | |          S r-   )r   4SentencePieceProcessor__DecodePiecesAsImmutableProtork  s     r   _DecodePiecesAsImmutableProtoz4SentencePieceProcessor._DecodePiecesAsImmutableProtoh  s    RSWY_```r#   c                 .    t          j        | ||          S r-   )r   &SentencePieceProcessor__DecodeIdsBatchr   rU  rV  s      r   _DecodeIdsBatchz&SentencePieceProcessor._DecodeIdsBatchk  s    DT3P[\\\r#   c                 .    t          j        | ||          S r-   )r   -SentencePieceProcessor__DecodeIdsAsBytesBatchr{  s      r   _DecodeIdsAsBytesBatchz-SentencePieceProcessor._DecodeIdsAsBytesBatchn  s    KDRUWbcccr#   c                 .    t          j        | ||          S r-   )r   7SentencePieceProcessor__DecodeIdsAsSerializedProtoBatchr{  s      r    _DecodeIdsAsSerializedProtoBatchz7SentencePieceProcessor._DecodeIdsAsSerializedProtoBatchq  s    UVZ\_almmmr#   c                 .    t          j        | ||          S r-   )r   6SentencePieceProcessor__DecodeIdsAsImmutableProtoBatchr{  s      r   _DecodeIdsAsImmutableProtoBatchz6SentencePieceProcessor._DecodeIdsAsImmutableProtoBatcht  s    TUY[^`klllr#   c                 .    t          j        | ||          S r-   )r   )SentencePieceProcessor__DecodePiecesBatchr{  s      r   _DecodePiecesBatchz)SentencePieceProcessor._DecodePiecesBatchw  s    GcS^___r#   c                 .    t          j        | ||          S r-   )r   :SentencePieceProcessor__DecodePiecesAsSerializedProtoBatchr{  s      r   #_DecodePiecesAsSerializedProtoBatchz:SentencePieceProcessor._DecodePiecesAsSerializedProtoBatchz  s    XY]_bdopppr#   c                 .    t          j        | ||          S r-   )r   9SentencePieceProcessor__DecodePiecesAsImmutableProtoBatchr{  s      r   "_DecodePiecesAsImmutableProtoBatchz9SentencePieceProcessor._DecodePiecesAsImmutableProtoBatch}  s    WX\^acnooor#   c           	      6    t          j        | ||||||          S r-   )r   (SentencePieceProcessor__NBestEncodeAsIdsr   r   rB  rD  rE  rF  rG  s          r   _NBestEncodeAsIdsz(SentencePieceProcessor._NBestEncodeAsIds  s1    FtTS]_fhoqx  {I  J  J  	Jr#   c           	      6    t          j        | ||||||          S r-   )r   +SentencePieceProcessor__NBestEncodeAsPiecesr  s          r   _NBestEncodeAsPiecesz+SentencePieceProcessor._NBestEncodeAsPieces  s2    I$PTV`bikrt{  ~L  M  M  	Mr#   c           	      6    t          j        | ||||||          S r-   )r   4SentencePieceProcessor__NBestEncodeAsSerializedProtor  s          r   _NBestEncodeAsSerializedProtoz4SentencePieceProcessor._NBestEncodeAsSerializedProto  s7    RSWY]_ikrt{  ~E  GU  V  V  	Vr#   c           	      6    t          j        | ||||||          S r-   )r   3SentencePieceProcessor__NBestEncodeAsImmutableProtor  s          r   _NBestEncodeAsImmutableProtoz3SentencePieceProcessor._NBestEncodeAsImmutableProto  s7    QRVX\^hjqsz  }D  FT  U  U  	Ur#   c
                 <    t          j        | |||||||||	
  
        S r-   )r   1SentencePieceProcessor__SampleEncodeAndScoreAsIds
r   r   num_samplesrC  worinclude_bestrD  rE  rF  rG  s
             r   _SampleEncodeAndScoreAsIdsz1SentencePieceProcessor._SampleEncodeAndScoreAsIds  sL    OPTVZ\ginps  vB  DK  MT  V]  _m  n  n  	nr#   c
                 <    t          j        | |||||||||	
  
        S r-   )r   4SentencePieceProcessor__SampleEncodeAndScoreAsPiecesr  s
             r   _SampleEncodeAndScoreAsPiecesz4SentencePieceProcessor._SampleEncodeAndScoreAsPieces  sL    RSWY]_jlqsv  yE  GN  PW  Y`  bp  q  q  	qr#   c
                 <    t          j        | |||||||||	
  
        S r-   )r   =SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProtor  s
             r   &_SampleEncodeAndScoreAsSerializedProtoz=SentencePieceProcessor._SampleEncodeAndScoreAsSerializedProto  sL    [\`bfhsuz|  BN  PW  Y`  bi  ky  z  z  	zr#   c
                 <    t          j        | |||||||||	
  
        S r-   )r   <SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProtor  s
             r   %_SampleEncodeAndScoreAsImmutableProtoz<SentencePieceProcessor._SampleEncodeAndScoreAsImmutableProto  sL    Z[_aegrty{~  AM  OV  X_  ah  jx  y  y  	yr#   c                 ,    t          j        | |          S r-   )r   !SentencePieceProcessor__Normalizer   r   s     r   
_Normalizez!SentencePieceProcessor._Normalize  s    ?dKKKr#   c                 ,    t          j        | |          S r-   )r   ,SentencePieceProcessor__NormalizeWithOffsetsr  s     r   _NormalizeWithOffsetsz,SentencePieceProcessor._NormalizeWithOffsets  s    J4QUVVVr#   c                 .    t          j        | ||          S r-   )r   (SentencePieceProcessor__CalculateEntropy)r   r   rC  s      r   _CalculateEntropyz(SentencePieceProcessor._CalculateEntropy  s    FtTSXYYYr#   c                 0    t          j        | |||          S r-   )r   -SentencePieceProcessor__CalculateEntropyBatch)r   rU  rC  rV  s       r   _CalculateEntropyBatchz-SentencePieceProcessor._CalculateEntropyBatch  s    KDRUW\^ijjjr#   c                 ,    t          j        | |          S r-   )r   .SentencePieceProcessor__OverrideNormalizerSpecr
  s     r   _OverrideNormalizerSpecz.SentencePieceProcessor._OverrideNormalizerSpec  s    LTSWXXXr#   NFg?c                     t          |            || _        || _        || _        || _        || _        || _        |	| _        |
| _        || _	        |s|r| 
                    ||           dS dS )a  Initialzie sentencepieceProcessor.

      Args:
        model_file: The sentencepiece model file path.
        model_proto: The sentencepiece model serialized proto.
        out_type: output type. int or str.
        add_bos: Add <s> to the result (Default = false)
        add_eos: Add </s> to the result (Default = false) <s>/</s> is added after
          reversing (if enabled).
        reverse: Reverses the tokenized sequence (Default = false)
        emit_unk_piece: Emits the unk literal string (Default = false)
        nbest_size: sampling parameters for unigram. Invalid in BPE-Dropout.
                    nbest_size = {0,1}: No sampling is performed.
                    nbest_size > 1: samples from the nbest_size results.
                    nbest_size < 0: assuming that nbest_size is infinite and samples
                      from the all hypothesis (lattice) using
                      forward-filtering-and-backward-sampling algorithm.
        alpha: Soothing parameter for unigram sampling, and dropout probability of
               merge operations for BPE-dropout.
        num_threads: number of threads in batch processing (Default = -1, auto-detected)
      )
model_filemodel_protoN)$_sentencepiece_processor_init_native	_out_type_add_bos_add_eos_reverse_emit_unk_piece_enable_sampling_nbest_size_alpha_num_threadsLoad)r   r  r  out_typerD  rE  rF  rG  rA  rB  rC  rV  s               r   InitzSentencePieceProcessor.Init  s    D +4000dndmdmdm+d-d#ddk%d	 B{ B		Z[	AAAAAB Br#   c                    || j         }|| j        }|| j        }|| j        }|| j        }|| j        }|| j        }|	| j        }	|
| j        }
|dk    r||dk    s|dk    s|	t          d          |
t          |
          t          urt          d          t          |          t          u r|t          u r|                     ||
|||	||||	  	        S |t          u r|                     ||
|||	||||	  	        S |dk    s|dk    r|                     ||
|||	||||	  	        S |d	k    r|                     ||
|||	||||	  	        S |t          u r|                     ||||	||||          S |t          u r|                     ||||	||||          S |dk    s|dk    r|                     ||||	||||          S |d	k    r|                     ||||	||||          S t          d
                    |                    )a~  Encode text input to segmented ids or tokens.

        Args:
        input: input string. accepsts list of string.
        out_type: output type. int or str.
        add_bos: Add <s> to the result (Default = false)
        add_eos: Add </s> to the result (Default = false) <s>/</s> is added after
                 reversing (if enabled).
        reverse: Reverses the tokenized sequence (Default = false)
        emit_unk_piece: Emits the unk literal string (Default = false)
        nbest_size: sampling parameters for unigram. Invalid in BPE-Dropout.
                    nbest_size = {0,1}: No sampling is performed.
                    nbest_size > 1: samples from the nbest_size results.
                    nbest_size < 0: assuming that nbest_size is infinite and samples
                    from the all hypothesis (lattice) using
                    forward-filtering-and-backward-sampling algorithm.
        alpha: Soothing parameter for unigram sampling, and merge probability for
               BPE-dropout (probablity 'p' in BPE-dropout paper).
        num_threads: the number of threads used in the batch processing (Default = -1).
      NTr   r   a  When enable_sampling is True, We must specify "nbest_size > 1" or "nbest_size = -1", and "alpha". "nbest_size" is enabled only on unigram mode ignored in BPE-dropout. when "nbest_size = -1" , this method samples from all candidates on the lattice instead of nbest segmentations.num_threads must be intserialized_protor   immutable_protozunknown out_type={})r  r  r  r  r  r  r  r  r  RuntimeErrorr   intlistrW  rq   rZ  r]  r`  rH  rK  rN  rQ  rd   )r   inputr  rD  rE  rF  rG  rA  rB  rC  rV  s              r   EncodezSentencePieceProcessor.Encode  s   @ 
	>	-	-	-		-		 /		%
			'	D	 	 j&8J!OO&0Aoo.
 
 	
 
	[ 1 1 < <4555	e		s??''{OZ(-w.Z Z Zs??**5+PZ+0'7G^] ] ])))X-@-@33E;Yc497GWVdf f f(((225+Xb38'7GUce e e 
S  !&'>S S 	S	S##E?J$)7GWnV V 	V	'	'	'8w+>+>,,UOZ-2GWg~_ _ 	_	&	&	&++E?J,17GWn^ ^ 	^ .55h??@@@r#   c                 ,     | j         d|t          d|S Nr  r  r$   r  rq   r   r  kwargss      r   EncodeAsPiecesz%SentencePieceProcessor.EncodeAsPieces5  !    T[=us==f===r#   c                 ,     | j         d|t          d|S r  r  r  r  s      r   EncodeAsIdsz"SentencePieceProcessor.EncodeAsIds9  r  r#   c                 "     | j         d|dd|S )Nr  r  r$   r  r  s      r   EncodeAsSerializedProtoz.SentencePieceProcessor.EncodeAsSerializedProto=  s"    T[Lu/ALLVLLLr#   c                 "     | j         d|dd|S )Nr  r  r$   r  r  s      r   EncodeAsImmutableProtoz-SentencePieceProcessor.EncodeAsImmutableProtoA  s"    T[Ku/@KKFKKKr#   c           	      2     | j         d|||t          dd|S NTr  rB  rC  r  rA  r$   r  r   r  rB  rC  r  s        r   SampleEncodeAsPiecesz+SentencePieceProcessor.SampleEncodeAsPiecesE  ;    T[ Gu5"%tG G?EG G Gr#   c           	      2     | j         d|||t          dd|S r  r  r  s        r   SampleEncodeAsIdsz(SentencePieceProcessor.SampleEncodeAsIdsJ  r  r#   c           	      (     | j         d|||ddd|S )Nr  Tr  r$   r  r  s        r   SampleEncodeAsSerializedProtoz4SentencePieceProcessor.SampleEncodeAsSerializedProtoO  s;    T[ Vu5"4dV VNTV V Vr#   c           	      (     | j         d|||ddd|S )Nr  Tr  r$   r  r  s        r   SampleEncodeAsImmutableProtoz3SentencePieceProcessor.SampleEncodeAsImmutableProtoT  s;    T[ Uu5"3TU UMSU U Ur#   c                      j          j         j         j         j         j        dk    rd fdt          |          t          u rfd|D             S  |          S )a  NBestEncode text input to segmented ids or tokens.

        Args:
        input: input string. accepsts list of string.
        out_type: output type. int or str.
        add_bos: Add <s> to the result (Default = false)
        add_eos: Add </s> to the result (Default = false) <s>/</s> is added after reversing (if enabled).
        reverse: Reverses the tokenized sequence (Default = false)
        emit_unk_piece: Emits the unk literal string (Default = false)
        nbest_size: nbest size
      Nr   r   c                 :   t           u r                    |           S t          u r                    |           S dk    sdk    r                    |           S dk    r                    |           S t          d          )Nr  r   r  zunknown out_type)r  r  rq   r  r  r  r  )r   rD  rE  rG  rB  r  rF  r   s    r   _encodez3SentencePieceProcessor.NBestEncode.<locals>._encode}  s    s??''j(/'>S S Ss??**4+2GWnV V V)))X-@-@33D*4;Wg~_ _ _(((2243:GWn^ ^ ^ -...r#   c                 &    g | ]} |          S r$   r$   r   nr  s     r   r   z6SentencePieceProcessor.NBestEncode.<locals>.<listcomp>  !    ***q

***r#   )r  r  r  r  r  r  r   r  )	r   r  r  rD  rE  rF  rG  rB  r  s	   ` ``````@r   NBestEncodez"SentencePieceProcessor.NBestEncodeY  s    ( 
	>	-	-	-		-		%
	q
/ / / / / / / / / / /  
e		****E****WU^^r#   c                 .     | j         d||t          d|S Nr  rB  r  r$   )r  rq   r   r  rB  r  s       r   NBestEncodeAsPiecesz*SentencePieceProcessor.NBestEncodeAsPieces  2    T 6Ej'*6 6.46 6 6r#   c                 .     | j         d||t          d|S r  )r  r  r  s       r   NBestEncodeAsIdsz'SentencePieceProcessor.NBestEncodeAsIds  r  r#   c                 $     | j         d||dd|S )Nr  r  r$   r  r  s       r   NBestEncodeAsSerializedProtoz3SentencePieceProcessor.NBestEncodeAsSerializedProto  s8    T EEj'9E E=CE E Er#   c                 $     | j         d||dd|S )Nr  r  r$   r  r  s       r   NBestEncodeAsImmutableProtoz2SentencePieceProcessor.NBestEncodeAsImmutableProto  s8    T DEj'8D D<BD D Dr#   c           
      \   	
  j          j         j         j         j        dd	d	
d
dk    rt          d          
r	st          d          
 	f
dt          |          t          u rfd	|D             S  |          S )
a   SampleEncodeAndScore text input to segmented ids or tokens.

        Args:
        input: input string. accepsts list of string.
        out_type: output type. int or str or 'serialized_proto' or 'immutable_proto'
        add_bos: Add <s> to the result (Default = false)
        add_eos: Add </s> to the result (Default = false) <s>/</s> is added after reversing (if enabled).
        reverse: Reverses the tokenized sequence (Default = false)
        emit_unk_piece: Emits the unk literal string (Default = false)
        num_samples: How many samples to return (Default = 1)
        alpha: inverse temperature for sampling
        wor: whether to sample without replacement (Default = false)
        include_best: whether to include the best tokenization, requires wor=True (Default = false)
      Nr   g      ?Fr   znum_examples must be positivez8When include_best is True, We must specify "wor = True".c                 R  
 t           u r	                    | 
	  	        S t          u r	                    | 
	  	        S dk    sdk    r	                    | 
	  	        S dk    r	                    | 
	  	        S t          d          )Nr  r   r  zunknown output type)r  r  rq   r  r  r  r  )r   rD  rE  rC  rG  r  r  r  rF  r   r  s    r   r  z<SentencePieceProcessor.SampleEncodeAndScore.<locals>._encode  s   s??00{E3P\18'7N\ \ \s??33D+ucS_4;Wg~_ _ _ )))X-@-@<<T;PUWZ\h=DgwXfh h h (((;;D+uVY[g<CWgWeg g g 0111r#   c                 &    g | ]} |          S r$   r$   r  s     r   r   z?SentencePieceProcessor.SampleEncodeAndScore.<locals>.<listcomp>  r  r#   )r  r  r  r  r  r  r   r  )r   r  r  rD  rE  rF  rG  r  rC  r  r  r  s   ` `````````@r   SampleEncodeAndScorez+SentencePieceProcessor.SampleEncodeAndScore  s3   4 
	>	-	-	-		-									:;;;	 Wc WUVVV2 2 2 2 2 2 2 2 2 2 2 2 2 2& 
e		****E****WU^^r#   c                 0     | j         d|||t          d|S Nr  r  rC  r  r$   )r
  rq   r   r  r  rC  r  s        r   SampleEncodeAndScoreAsPiecesz3SentencePieceProcessor.SampleEncodeAndScoreAsPieces  5    &T& ?USX03? ?7=? ? ?r#   c                 0     | j         d|||t          d|S r  )r
  r  r  s        r   SampleEncodeAndScoreAsIdsz0SentencePieceProcessor.SampleEncodeAndScoreAsIds  r  r#   c                 &     | j         d|||dd|S )Nr  r  r$   r
  r  s        r   %SampleEncodeAndScoreAsSerializedProtoz<SentencePieceProcessor.SampleEncodeAndScoreAsSerializedProto  s;    &T& NUSX0BN NFLN N Nr#   c                 &     | j         d|||dd|S )Nr  r  r$   r  r  s        r   $SampleEncodeAndScoreAsImmutableProtoz;SentencePieceProcessor.SampleEncodeAndScoreAsImmutableProto  s;    &T& MUSX0AM MEKM M Mr#   c                    || j         }|t          |          t          urt          d          |sdS |t          u rt          |          t          u r|                     |g          S t          |          t          u r|                     |g          S t          |          t          u rt          |          dk    st          |d                   t          u r|                     |          S t          |d                   t          u r|                     |          S t          |d                   t          u rt          |d                   dk    s"t          |d         d                   t          u r| 	                    ||          S t          |d         d                   t          u r| 
                    ||          S |t          u rt          |          t          u r|                     |g          S t          |          t          u r|                     |g          S t          |          t          u rt          |          dk    st          |d                   t          u r|                     |          S t          |d                   t          u r|                     |          S t          |d                   t          u rt          |d                   dk    s"t          |d         d                   t          u r|                     ||          S t          |d         d                   t          u r| 
                    ||          S |dk    rt          |          t          u r|                     |g          S t          |          t          u r|                     |g          S t          |          t          u rt          |          dk    st          |d                   t          u r|                     |          S t          |d                   t          u r|                     |          S t          |d                   t          u rt          |d                   dk    s"t          |d         d                   t          u r|                     ||          S t          |d         d                   t          u r|                     ||          S |dk    rt          |          t          u r|                     |g          S t          |          t          u r|                     |g          S t          |          t          u rt          |          dk    st          |d                   t          u r|                     |          S t          |d                   t          u r|                     |          S t          |d                   t          u rt          |d                   dk    s"t          |d         d                   t          u r|                     ||          S t          |d         d                   t          u r|                     ||          S t          d          )zDecode processed id or token sequences.

      Args:
        out_type: output type. str, bytes or 'serialized_proto' or 'immutable_proto' (Default = str)
        num_threads: the number of threads used in the batch processing (Default = -1).
      Nr  r   r   r  r  zunknown output or input type)r  r   r  r  rq   re  rl  r  r   r|  r  bytesrh  r  ro  rr  r  r  ru  rx  r  r  )r   r  r  rV  s       r   DecodezSentencePieceProcessor.Decode  s    
	'		[ 1 1 < <4555 r	S;;#%))
);;###UG,,
,;;$ZZ1__U1X# 5 5??5)))%(^^s""%%e,,,%(^^t##58}}!!T%(1+%6%6#%=%=((<<<E!HQK  C''++E;???	U		;;#''00
0;;###UG,,
,;;$ZZ1__U1X# 5 5))%000%(^^s""%%e,,,%(^^t##58}}!!T%(1+%6%6#%=%=//{CCCE!HQK  C''++E;???	'	'	';;#115'::
:;;#44eW==
=;;$ZZ1__U1X# 5 533E:::%(^^s""66u===%(^^t##58}}!!T%(1+%6%6#%=%=99%MMME!HQK  C''<<UKPPP 
&	&	&;;#00%99
9;;#33UG<<
<;;$ZZ1__U1X# 5 5225999%(^^s""55e<<<%(^^t##58}}!!T%(1+%6%6#%=%=88LLLE!HQK  C'';;E;OOO 7888r#   c                 "     | j         d||d|S r  r  r   r  r  r  s       r   DecodePiecesz#SentencePieceProcessor.DecodePiecesg  !    T[BuxBB6BBBr#   c                 "     | j         d||d|S r  r  r  s       r   	DecodeIdsz SentencePieceProcessor.DecodeIdsk  r  r#   r  c                 "     | j         d||d|S r  r  r  s       r   DecodePiecesAsSerializedProtoz4SentencePieceProcessor.DecodePiecesAsSerializedProtoo  r  r#   c                 "     | j         d||d|S r  r  r  s       r   DecodeIdsAsSerializedProtoz1SentencePieceProcessor.DecodeIdsAsSerializedProtos  r  r#   r  c                 "     | j         d||d|S r  r  r  s       r   DecodePiecesAsImmutableProtoz3SentencePieceProcessor.DecodePiecesAsImmutableProtow  r  r#   c                 "     | j         d||d|S r  r  r  s       r   DecodeIdsAsImmutableProtoz0SentencePieceProcessor.DecodeIdsAsImmutableProto{  r  r#   c                     t          |          t          u rG|| j        }|t          |          t          urt	          d          |                     |||          S |                     ||          S )zCalculate sentence entropyNr  )r   r  r  r  r  r  r  )r   r  rC  rV  s       r   r  z'SentencePieceProcessor.CalculateEntropy  sv    	e		)+${"3"33">">677
7**5%EEE##E5111r#   c                 r      fdt          |          t          u rfd|D             S  |          S )Nc                 \    r                     |           S                     |           S r-   r  r  r   r   with_offsetss    r   
_normalizez4SentencePieceProcessor.Normalize.<locals>._normalize  1     	2++D11
1t$$$r#   c                 &    g | ]} |          S r$   r$   r   r>   r0  s     r   r   z4SentencePieceProcessor.Normalize.<locals>.<listcomp>  !    ---!

1---r#   r   r  r   r  r/  r0  s   ` `@r   	Normalizez SentencePieceProcessor.Normalize  `    % % % % % %
 
e		----u----Zr#   c                     i }|                                 D ]\  }}t          |          ||<   |                     |          S r-   )itemsrq   r  )r   r  
new_kwargskeyr    s        r   OverrideNormalizerSpecz-SentencePieceProcessor.OverrideNormalizerSpec  sG    j % %*#ue**
3))*555r#   c                 *    |                                  S r-   r  rK   s    r   
piece_sizez!SentencePieceProcessor.piece_size         r#   c                 *    |                                  S r-   r?  rK   s    r   
vocab_sizez!SentencePieceProcessor.vocab_size  rA  r#   c                 *    |                                  S r-   r8  rK   s    r   __getstate__z#SentencePieceProcessor.__getstate__      ((***r#   c                 X    |                                   |                     |           d S r-   rL   r   r   r8  s     r   __setstate__z#SentencePieceProcessor.__setstate__  *    
mmooo
""#9:::::r#   c                 *    |                                  S r-   r?  rK   s    r   r   zSentencePieceProcessor.__len__  rA  r#   c                 ,    |                      |          S r-   )r  r  s     r   r   z"SentencePieceProcessor.__getitem__  s    ^^E"""r#   c                     |r|rt          d          |r|                     |          S |                     |          S )a  Overwride SentencePieceProcessor.Load to support both model_file and model_proto.

      Args:
        model_file: The sentencepiece model file path.
        model_proto: The sentencepiece model serialized proto. Either `model_file`
          or `model_proto` must be set.
      z-model_file and model_proto must be exclusive.)r  r   r=  )r   r  r  s      r   r  zSentencePieceProcessor.Load  sT     
 L LJKKK	 9++K888z***r#   )	NNNNNNNNNNN)NNNNNNr-   )r  )r  )ir   r   r6   r   r   r   r   rL   r   delete_SentencePieceProcessorrt   r   r   r   r   r  r  r  r  r  r  r  r  r"  r%  r(  r+  r/  r2  r5  r8  r=  rH  rK  rN  rQ  rW  rZ  r]  r`  re  rh  rl  ro  rr  ru  rx  r|  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r
  r  r  r  r  rq   r  r  r!  r#  r%  r'  r)  r7  r=  r@  rC  rF  rK  r   r   r  r$   r#   r   r   r      s       h--/I/IOdeeeGHj j j%C_ _ __ _ __ _ _V V VK K K_ _ _S S SH H HL L LI I IH H HI I II I IH H HF F FB B BB B BB B BB B BR R RM M M] ] ]` ` `i i ih h hn n nq q qz z zy y yK K KR R RQ Q Q\ \ \b b b[ [ [a a a] ] ]d d dn n nm m m` ` `q q qp p pJ J JM M MV V VU U Un n nq q qz z zy y yL L LW W WZ Z Zk k kY Y Y !"-B -B -B -Bd "#[ [ [ [|> > >> > >M M ML L LG G G G
G G G G
V V V V
U U U U "   #'#7 7 7 7t6 6 6 6
6 6 6 6
E E E E
D D D D '+%)%)%),0)-#'!%*.J J J JZ? ? ? ?
? ? ? ?
N N N N
M M M M
 &)d \ \ \ \~ ,/ C C C C ), C C C CC C C CC C C CC C C CC C C C	2 	2 	2 	2   6 6 6! ! !! ! !+ + +; ; ;
! ! !# # #+ + + + + +r#   r   c                 *    t          j        |           S r-   )r   SetRandomGeneratorSeed)seeds    r   rS  rS    s    0666r#   c                 *    t          j        |           S r-   )r   SetMinLogLevel)rC   s    r   rV  rV    s    (+++r#   c                       e Zd Z ed d d          Zd ZeZed             Z	ed             Z
ed             Zed	             Zed
             Zedd            Zedd            ZdS )SentencePieceTrainerc                 4    | j                                         S r-   r<   r=   s    r   r?   zSentencePieceTrainer.<lambda>  r@   r#   c                 6    | j                             |          S r-   r<   rB   s     r   r?   zSentencePieceTrainer.<lambda>  rD   r#   rE   rF   c                      t          d          )NzNo constructor defined)r   )r   r  r  s      r   rL   zSentencePieceTrainer.__init__  s    5666r#   c                 *    t          j        |           S r-   )r   %SentencePieceTrainer__TrainFromString)r<  s    r   _TrainFromStringz%SentencePieceTrainer._TrainFromString  s    CCHHHr#   c                 *    t          j        |           S r-   )r   "SentencePieceTrainer__TrainFromMapr  s    r   _TrainFromMapz"SentencePieceTrainer._TrainFromMap  s    @FFFr#   c                 ,    t          j        | |          S r-   )r   #SentencePieceTrainer__TrainFromMap2r  iters     r   _TrainFromMap2z#SentencePieceTrainer._TrainFromMap2      A$MMMr#   c                 *    t          j        |           S r-   )r   #SentencePieceTrainer__TrainFromMap3ra  s    r   _TrainFromMap3z#SentencePieceTrainer._TrainFromMap3  s    A$GGGr#   c                 ,    t          j        | |          S r-   )r   #SentencePieceTrainer__TrainFromMap4re  s     r   _TrainFromMap4z#SentencePieceTrainer._TrainFromMap4  rh  r#   Nc                    | 0t          |           t          u rt                              |           S d }d}d}i }|                                D ]!\  }}|dv r|}|dv r|} ||          ||<   "|rN|rt                              ||          }nt                              |          }|                    |           n7|rt                              ||          S t          	                    |          S dS )zDTrain Sentencepiece model. Accept both kwargs and legacy string arg.Nc                 D   t          |           t          u r|t          j        d         dk    rt	                      }nt                      }t          j        |d          }|                    d | D                        |	                                S t          |           S )zEncode value to CSV..r      r   )lineterminatorc                 ,    g | ]}t          |          S r$   )rq   )r   rC   s     r   r   z@SentencePieceTrainer._Train.<locals>._encode.<locals>.<listcomp>  s    111a3q66111r#   )r   r  sysr   StringIOBytesIOcsvwriterwriterowgetvaluerq   )r    frx  s      r   r  z,SentencePieceTrainer._Train.<locals>._encode  s    ;;$a A%%

AA		A:a333&
//115111
2
2
2
U
r#   )sentence_iteratorsentence_reader)model_writer)
r   rq   rX  r^  r:  rn  rk  writerg  rb  )	r<  r  r  r|  r~  r;  r<  r    r  s	            r   _TrainzSentencePieceTrainer._Train  s9    
T#YY#--#44S999   lj + +*#u:::#

$$$,,#GENN*S//	 @ 	H,;;J;LN N++ -;;JGG+;'''' 	@%44ZARSS
S%33J??
?Tr#   c                 ~    t          |          5  t          j        dd| i| d d d            d S # 1 swxY w Y   d S )N)ostreamr<  r$   )
_LogStreamrX  r  )r<  	logstreamr  s      r   TrainzSentencePieceTrainer.Train  s    i((( 7 7#666v6667 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7s   266r-   rP  )r   r   r6   r   r   rL   r   r   staticmethodr^  rb  rg  rk  rn  r  r  r$   r#   r   rX  rX    s       h--/I/IOdeeeG7 7 7HI I \I G G \G N N \N H H \H N N \N * * * \*X 7 7 7 \7 7 7r#   rX  c                       e Zd Z ed d d          ZeZd Zej	        Z
d Zd Zd Zd	 Zd
 Zd Zd Zd Z	 	 	 	 	 	 	 ddZddZd Zd ZdS )SentencePieceNormalizerc                 4    | j                                         S r-   r<   r=   s    r   r?   z SentencePieceNormalizer.<lambda>  r@   r#   c                 6    | j                             |          S r-   r<   rB   s     r   r?   z SentencePieceNormalizer.<lambda>  rD   r#   rE   rF   c                 R    t          j        | t          j                               d S r-   )r    SentencePieceNormalizer_swiginitnew_SentencePieceNormalizerrK   s    r   rL   z SentencePieceNormalizer.__init__   s$    7n>h>j>jkkkkkr#   c                 ,    t          j        | |          S r-   )r   /SentencePieceNormalizer_LoadFromSerializedProtor   s     r   r   z/SentencePieceNormalizer.LoadFromSerializedProto$  s    MdT^___r#   c                 ,    t          j        | |          S r-   )r   'SentencePieceNormalizer_LoadFromRuleTSV)r   r  s     r   LoadFromRuleTSVz'SentencePieceNormalizer.LoadFromRuleTSV'  s    EdHUUUr#   c                 ,    t          j        | |          S r-   )r   (SentencePieceNormalizer_LoadFromRuleName)r   r   s     r   LoadFromRuleNamez(SentencePieceNormalizer.LoadFromRuleName*  s    FtTRRRr#   c                 *    t          j        |           S r-   )r   .SentencePieceNormalizer_serialized_model_protorK   s    r   r8  z.SentencePieceNormalizer.serialized_model_proto-  s    LTRRRr#   c                 ,    t          j        | |          S r-   )r   $SentencePieceNormalizer_LoadFromFiler;  s     r   r=  z$SentencePieceNormalizer.LoadFromFile0  s    B4MMMr#   c                 ,    t          j        | |          S r-   )r   "SentencePieceNormalizer__Normalizer  s     r   r  z"SentencePieceNormalizer._Normalize3  s    @tLLLr#   c                 ,    t          j        | |          S r-   )r   -SentencePieceNormalizer__NormalizeWithOffsetsr  s     r   r  z-SentencePieceNormalizer._NormalizeWithOffsets6  s    KDRVWWWr#   c                 .    t          j        | ||          S r-   )r   &SentencePieceNormalizer__SetProtoField)r   r   r    s      r   _SetProtoFieldz&SentencePieceNormalizer._SetProtoField9  s    DT4QVWWWr#   NFc                    t          |            |r|                     |          }nW|r|                     |          }n?|r|                     |          }n'|r|                     |          }nt          d          |rD|                     d|           |                     d|           |                     d|           dS dS )a  Initialzie sentencePieceNormalizer.

      Args:
        model_file: The sentencepiece model file path.
        model_proto: The sentencepiece model serialized proto.
        rule_tsv: The normalization rule file in TSV format.
        rule_name: Pre-defined normalization name.
        add_dummy_prefix: add dummy prefix.
        escape_whitespaces: escape whitespaces.
        remove_extra_whitespaces: remove extra whitespaces.
      zno model is specifiedadd_dummy_prefixescape_whitespacesremove_extra_whitespacesN)%_sentencepiece_normalizer_init_nativer=  r   r  r  r  r  )	r   r  r  rule_tsv	rule_namer  r  r  statuss	            r   r  zSentencePieceNormalizer.Init<  s    ( ,D111	 	4"":.. 4--k:: 4%%h// 4&&y112333	 R.0@AAA02DEEE68PQQQQQR Rr#   c                 r      fdt          |          t          u rfd|D             S  |          S )Nc                 \    r                     |           S                     |           S r-   r-  r.  s    r   r0  z5SentencePieceNormalizer.Normalize.<locals>._normalizec  r1  r#   c                 &    g | ]} |          S r$   r$   r3  s     r   r   z5SentencePieceNormalizer.Normalize.<locals>.<listcomp>i  r4  r#   r5  r6  s   ` `@r   r7  z!SentencePieceNormalizer.Normalizeb  r8  r#   c                 *    |                                  S r-   rE  rK   s    r   rF  z$SentencePieceNormalizer.__getstate__m  rG  r#   c                 X    |                                   |                     |           d S r-   rI  rJ  s     r   rK  z$SentencePieceNormalizer.__setstate__q  rL  r#   )NNNNFFFr-   )r   r   r6   r   r   r   r   rL   r   delete_SentencePieceNormalizerrt   r   r  r  r8  r=  r  r  r  r  r7  rF  rK  r$   r#   r   r  r    sE       h--/I/IOdeeeGHl l l%D` ` `V V VS S SS S SN N NM M MX X XX X X # %&+$R $R $R $RL   + + +; ; ; ; ;r#   r  c                 *    t          j        |           S r-   )r   
SetDataDir)data_dirs    r   r  r  y  s    $X...r#   )ru  )rv  c                 J   i }| j                                         D ][\  }}t          j        d|          rAt          j        dd|                                                              dd          }|||<   \|                                D ]\  }}t          | ||           dS )z1Added snake_cased method from CammelCased method.z^[A-Z]+z(?<!^)(?=[A-Z])_n_bestnbestN)r/   r:  rematchsublowerreplacesetattr)	classname	snake_mapkrC   snakes        r   _add_snake_caser    s     ) &&((  da	x
A f' ' : : ioo  daIq! r#   c                 b    t          | |d          fdfd}t          | ||           dS )z4Enables batch request for the method classname.name.Nc                     t          |          t          u r-|dk     s||                                 k    rt          d           | |          S )Nr   zpiece id is out of range.)r   r  r@  r   )rC   r  funcs     r   _funcz_batchnize.<locals>._func  sK    Aww#~~1q55A$7$7233341::r#   c                 h     t          |          t          u r fd|D             S   |          S )Nc                 (    g | ]} |          S r$   r$   )r   r  r  r   s     r   r   z5_batchnize.<locals>._batched_func.<locals>.<listcomp>  s#    ***eeD!nn***r#   r5  )r   r<  r  s   ` r   _batched_funcz!_batchnize.<locals>._batched_func  sD    CyyD*****c****U4r#   )r   r  )r  r   r  r  r  s      @@r   
_batchnizer    sa    	D$	'	'$    
     
)T=)))))r#   rL   )r  r  r  r  r"  r%  r(  )__version__sentencepiecepackage_datac                   "    e Zd ZddZd Zd ZdS )r  Nc                 l    || _         | j         %t          j                                        | _        d S d S r-   )r  rt  stderrfilenoorig_stream_fileno)r   r  s     r   rL   z_LogStream.__init__  s5    DL| #
 1 1 3 3d  r#   c                     | j         Qt          j        | j                  | _        t          j        | j                                         | j                   d S d S r-   )r  osdupr  orig_stream_dupdup2r  rK   s    r   	__enter__z_LogStream.__enter__  sP    |VD$;<<dgdl!!##T%<=====  r#   c                     | j         lt          j        | j                   t          j        | j        | j                   t          j        | j                   | j                                          d S d S r-   )r  r  closer  r  r  )r   r   r    	tracebacks       r   __exit__z_LogStream.__exit__  sj    |ht&'''gd"D$;<<<ht#$$$
l	  r#   r-   )r   r   r6   rL   r  r  r$   r#   r   r  r    sF        4 4 4 4
> > >
    r#   r  )>rt  r   _swig_python_version_info__package__r   r   r   builtinsr   ImportErrorr   r%   r*   r3   r   r5   objectr:   >ImmutableSentencePieceText_ImmutableSentencePiece_swigregisterrx   'ImmutableSentencePieceText_swigregisterr   ,ImmutableNBestSentencePieceText_swigregisterr   #SentencePieceProcessor_swigregisterrS  rV  rX  !SentencePieceTrainer_swigregisterr  $SentencePieceNormalizer_swigregisterr  r  rw  r  importlib.resources	importlibioru  rv  r  r  rL   r  r  r  r  r  Tokenizer  
Detokenizemset_random_generator_seedset_min_log_level_versionr  pathr   rq   	resourcesfilesr  r$   r#   r   <module>r     s   : 9 9 9 9 9 #//       """""   [ [ [
 
 
    L L L L L$ L L L
3 3 3 3 3 3 3 3n N MN  A  A  AD D D D D D D DP 7 67Q R R R4 4 4 4 4f 4 4 4p < ;<[ \ \ \J+ J+ J+ J+ J+V J+ J+ J+\ 3 23I J J J7 7 7, , ,K7 K7 K7 K7 K76 K7 K7 K7^ 1 01E F F FW; W; W; W; W;f W; W; W;v 4 34K L L L/ / / 
			 



 



 				                
 
 
* * *" (>'F $(?(H % 
,B,G H H H -D-I J J J"8"?  $:$A  !
 ( (A *#Q'''' & ' ' ' $ % % % ' ( ( (2 "  ! ! ! ! ! ! 

27<<I/55oFFGGXX Y Y Y         s    	**