
    Pi$                        d dl mZ d dlZd dlmZmZmZ d dlZd dlmZ  G d dej	                  Z
 G d de
          Z G d	 d
e
          ZdS )    )annotationsN)ListOptionalAny)r   c                  \    e Zd Zej        	 ddd	            Zej        	 	 ddd            Zd
S )BaseLlamaTokenizerTtextbytesadd_bosboolspecialreturn	List[int]c                    t           )zTokenize the text into tokens.

        Args:
            text: The utf-8 encoded string to tokenize.
            add_bos: Whether to add a beginning of sequence token.
            special: Whether to tokenize special tokens.
        NotImplementedErrorselfr	   r   r   s       m/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/llama_cpp/llama_tokenizer.pytokenizezBaseLlamaTokenizer.tokenize   s
     "!    NFtokensprev_tokensOptional[List[int]]c                    t           )a  Detokenize the tokens into text.

        Args:
            tokens: The list of tokens to detokenize.
            prev_tokens: The list of previous tokens. Offset mapping will be performed if provided.
            special: Whether to detokenize special tokens.
        r   r   r   r   r   s       r   
detokenizezBaseLlamaTokenizer.detokenize   s
     "!r   TTr	   r
   r   r   r   r   r   r   NFr   r   r   r   r   r   r   r
   )__name__
__module____qualname__abcabstractmethodr   r    r   r   r   r      so        AE
" 
" 
" 
" 
" 	 ,0	" " " " " " "r   r   c                  \    e Zd ZddZ	 dddZ	 	 dddZ	 dddZddZed d            Z	dS )!LlamaTokenizerllamallama_cpp.Llamac                    |j         | _         d S N)_model)r   r*   s     r   __init__zLlamaTokenizer.__init__.   s    lr   Tr	   r
   r   r   r   r   r   c                <    | j                             |||          S )Nr   r   )r.   r   r   s       r   r   zLlamaTokenizer.tokenize1   s!     {##D'7#KKKr   NFr   r   r   c                :    | j                             ||          S )N)r   )r.   r   r   s       r   r   zLlamaTokenizer.detokenize6   s     {%%fg%>>>r   strc                \    |                      |                    dd          ||          S )Nutf-8ignoreerrorsr1   )r   encoder   s       r   r9   zLlamaTokenizer.encode>   s6     }}KKK117G  
 
 	
r   c                V    |                      |                              dd          S )Nr5   r6   r7   )r   decode)r   r   s     r   r;   zLlamaTokenizer.decodeE   s'    v&&--gh-GGGr   path'LlamaTokenizer'c                @     | t          j        |d                    S )NT)
model_path
vocab_only)	llama_cppLlama)clsr<   s     r   from_ggml_filezLlamaTokenizer.from_ggml_fileH   s!    s9?dtDDDEEEr   )r*   r+   r   r   r    r!   )r	   r3   r   r   r   r   r   r   )r   r   r   r3   )r<   r3   r   r=   )
r"   r#   r$   r/   r   r   r9   r;   classmethodrD   r'   r   r   r)   r)   -   s        # # # # BFL L L L L ,0	? ? ? ? ? @D
 
 
 
 
H H H H F F F [F F Fr   r)   c                  H    e Zd ZddZ	 dddZ	 	 dddZedd            ZdS )LlamaHFTokenizerhf_tokenizerr   c                    || _         d S r-   )rH   )r   rH   s     r   r/   zLlamaHFTokenizer.__init__N   s    (r   Tr	   r
   r   r   r   r   r   c                d    | j                             |                    dd          |          S )Nr5   r6   r7   )add_special_tokens)rH   r9   r;   r   s       r   r   zLlamaHFTokenizer.tokenizeQ   s9      ''KKK11g ( 
 
 	
r   NFr   r   r   c                f   | }||| j                             ||z   |                              dd          }| j                             ||                              dd          }|t          |          d          S | j                             ||                              dd          S )N)skip_special_tokensr5   r6   r7   )rH   r;   r9   len)r   r   r   r   rM   r	   	prev_texts          r   r   zLlamaHFTokenizer.detokenizeX   s     #*k"$++f$:M ,  fWXf..  )001D 1  fWXf..  I(())$++,? ,  fWXf../r   pretrained_model_name_or_pathr3   'LlamaHFTokenizer'c                    	 ddl m} n# t          $ r t          d          w xY w|                    |          } | |          S )Nr   )AutoTokenizerzsThe `transformers` library is required to use the `HFTokenizer`.You can install it with `pip install transformers`.)rP   )transformersrS   ImportErrorfrom_pretrained)rC   rP   rS   rH   s       r   rV   z LlamaHFTokenizer.from_pretrainedl   s    	2222222 	 	 	F  	
 %44*G 5 
 
 s<   s   	 #)rH   r   r   r   r    r!   )rP   r3   r   rQ   )r"   r#   r$   r/   r   r   rE   rV   r'   r   r   rG   rG   M   s        ) ) ) ) BF
 
 
 
 
 ,0	/ / / / /( ! ! ! [! ! !r   rG   )
__future__r   r%   typingr   r   r   rA   llama_cpp.llama_typesABCr   r)   rG   r'   r   r   <module>r[      s   " " " " " " 



              & & & & & &" " " " " " " ">F F F F F' F F F@+! +! +! +! +!) +! +! +! +! +!r   