
    
`i                         d dl m Z  d dlmZmZ d dlZddlmZ 	 	 	 ddej        dee         dee         d	eee	                  d
ef
dZ
dS )    )copy)ListOptionalN   )LLTokenizerhf_tokenizern_vocab	eos_tokenslicesreturnc                 ,   t          | t          j                  rlt          | j                  }|                                 |                                 |                                }|| j        }t          ||||          S t          d          )aN  
    Create a new tokenizer from a fast Hugging Face tokenizer.
    This is an expensive operation (~1s), so the result should be cached.
    It currently only supports fast tokenizers, which are then handled
    by the Rust tokenizers library.

    Args:
        hf_tokenizer: transformers.PreTrainedTokenizerFast - the tokenizer to wrap
        n_vocab: int - override the size of the vocabulary
        eos_token: int - override the EOS token
        slices: List[str] - configuration for slicer optimization; pass [] to disable,
            or None to use the default configuration
    N)r	   r
   r   z"Only fast tokenizers are supported)
isinstancetransformersPreTrainedTokenizerFastr   backend_tokenizer
no_paddingno_truncationto_streos_token_idr   
ValueError)r   r	   r
   r   r   ss         a/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/llguidance/hf.pyfrom_tokenizerr   	   s    ( , DEE ? !*
 
 	$$&&&'')))$$&& $1I1g6RRRR=>>>    )NNN)r   typingr   r   r   _libr   r   intstrr    r   r   <module>r       s          ! ! ! ! ! ! ! !          
 "#"&	&? &?6&?c]&? }&? T#Y	&?
 &? &? &? &? &? &?r   