
    Pizs                       d dl mZ d dlZd dlZd dlmZmZmZmZm	Z	m
Z
mZ d dlmZmZ d dlmZ d dlZd dlmZ ddlT ddlmZ dd	lmZ d dlmZ  G d
 d          Z G d d          Z G d d          Z G d d          Zd Z e G d d                      Z!e G d d                      Z" G d d          Z# G d d          Z$dS )    )annotationsN)DictListTupleOptionalSequenceCallableUnion)	dataclassfield)	ExitStack   )*)LlamaGrammar)suppress_stdout_stderrc                     e Zd ZdZddd8d
Zd Zd Zd9dZd9dZd9dZ	d9dZ
d:dZd;dZd9dZd9dZd<dZd=dZd>dZd?dZd9dZd9d Zd9d!Zd9d"Zd9d#Zd9d$Zd9d%Zd9d&Zd9d'Zd@d(Zd@d)ZdAd.ZdBdCd0ZdBdDd3Z dEd5Z!e"d6             Z#d7S )F
LlamaModelzIntermediate Python wrapper for a llama.cpp llama_model.
    NOTE: For stability it's recommended you use the Llama class instead.Tverbose
path_modelstrparamsllama_cpp.llama_model_paramsr   boolc               R    | _         | _        | _        t                       _        d }t
          j                            |          st          d|           t          |          5  t          j         j                             d           j                  }d d d            n# 1 swxY w Y   |t          d|           t          j        |          }|t          d|           | _        | _        d  _         fd} j                            |           d S )NzModel path does not exist: )disableutf-8z Failed to load model from file: z Failed to get vocab from model: c                 Z     j         d S t          j         j                    d  _         d S N)model	llama_cppllama_model_freeselfs   h/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/llama_cpp/_internals.py
free_modelz'LlamaModel.__init__.<locals>.free_modelE   .    z!&tz222DJJJ    )r   r   r   r   _exit_stackospathexists
ValueErrorr   r!   llama_model_load_from_fileencodellama_model_get_vocabr    vocabsamplercallback)r$   r   r   r   r    r1   r&   s   `      r%   __init__zLlamaModel.__init__#   s    %$;;w~~j)) 	IG:GGHHH#G444 	 	8&&w// E	 	 	 	 	 	 	 	 	 	 	 	 	 	 	
 =L
LLMMM/66=L
LLMMM

	 	 	 	 	 	!!*-----s   -3B,,B03B0c                    | j         Mt          | j                  D ]\  }}t          j        | j         |            | j                                         | j                                         d S r   )r2   reversedcustom_samplersr!   llama_sampler_chain_removeclearr)   close)r$   i_s      r%   r:   zLlamaModel.closeM   sr    <# !566 F F14T\1EEEE &&(((     r(   c                .    |                                   d S r   r:   r#   s    r%   __del__zLlamaModel.__del__U       

r(   returnintc                4    t          j        | j                  S r   )r!   llama_vocab_typer1   r#   s    r%   
vocab_typezLlamaModel.vocab_typeX       )$*555r(   c                4    t          j        | j                  S r   )r!   llama_vocab_n_tokensr1   r#   s    r%   n_vocabzLlamaModel.n_vocab[       -dj999r(   c                4    t          j        | j                  S r   )r!   llama_model_n_ctx_trainr    r#   s    r%   n_ctx_trainzLlamaModel.n_ctx_train^       0<<<r(   c                4    t          j        | j                  S r   )r!   llama_model_n_embdr    r#   s    r%   n_embdzLlamaModel.n_embda   s    +DJ777r(   floatc                4    t          j        | j                  S r   )r!   !llama_model_rope_freq_scale_trainr    r#   s    r%   rope_freq_scale_trainz LlamaModel.rope_freq_scale_traind   s    :4:FFFr(   c                    t          j        d          }t          j        | j        |d           |j                            d          S )N   r   )ctypescreate_string_bufferr!   llama_model_descr    valuedecode)r$   bufs     r%   desczLlamaModel.descg   s?    )$//"4:sD999y(((r(   c                4    t          j        | j                  S r   )r!   llama_model_sizer    r#   s    r%   sizezLlamaModel.sizel   rF   r(   c                4    t          j        | j                  S r   )r!   llama_model_n_paramsr    r#   s    r%   n_paramszLlamaModel.n_paramso   rJ   r(   namectypes.c_void_pc                     t          d          )Nz*get_tensor is not implemented in llama.cppNotImplementedError)r$   re   s     r%   
get_tensorzLlamaModel.get_tensorr   s    !"NOOOr(   tokenc                \    t          j        | j        |                              d          S Nr   )r!   llama_vocab_get_textr1   r\   r$   rk   s     r%   token_get_textzLlamaModel.token_get_textw   s%    -dj%@@GGPPPr(   c                6    t          j        | j        |          S r   )r!   llama_vocab_get_scorer1   ro   s     r%   token_get_scorezLlamaModel.token_get_scorez   s    .tz5AAAr(   c                6    t          j        | j        |          S r   )r!   llama_vocab_get_attrr1   ro   s     r%   token_get_attrzLlamaModel.token_get_attr}   s    -dj%@@@r(   c                4    t          j        | j                  S r   )r!   llama_vocab_bosr1   r#   s    r%   	token_boszLlamaModel.token_bos       (444r(   c                4    t          j        | j                  S r   )r!   llama_vocab_eosr1   r#   s    r%   	token_eoszLlamaModel.token_eos   rz   r(   c                4    t          j        | j                  S r   )r!   llama_vocab_clsr1   r#   s    r%   	token_clszLlamaModel.token_cls   rz   r(   c                4    t          j        | j                  S r   )r!   llama_vocab_sepr1   r#   s    r%   	token_sepzLlamaModel.token_sep   rz   r(   c                4    t          j        | j                  S r   )r!   llama_vocab_nlr1   r#   s    r%   token_nlzLlamaModel.token_nl   s    '
333r(   c                4    t          j        | j                  S r   )r!   llama_vocab_fim_prer1   r#   s    r%   token_prefixzLlamaModel.token_prefix       ,TZ888r(   c                4    t          j        | j                  S r   )r!   llama_vocab_fim_midr1   r#   s    r%   token_middlezLlamaModel.token_middle   r   r(   c                4    t          j        | j                  S r   )r!   llama_vocab_fim_sufr1   r#   s    r%   token_suffixzLlamaModel.token_suffix   r   r(   c                4    t          j        | j                  S r   )r!   llama_vocab_eotr1   r#   s    r%   	token_eotzLlamaModel.token_eot   rz   r(   c                4    t          j        | j                  S r   )r!   llama_vocab_get_add_bosr1   r#   s    r%   add_bos_tokenzLlamaModel.add_bos_token   rN   r(   c                4    t          j        | j                  S r   )r!   llama_vocab_get_add_eosr1   r#   s    r%   add_eos_tokenzLlamaModel.add_eos_token   rN   r(   textbytesadd_bosspecialc           	        |                                  }t          j        |z              }t          j        | j        |t          |          ||||          }|dk     rlt          |          }t          j        |z              }t          j        | j        |t          |          ||||          }|dk     rt          d| d|           t          |d |                   S )Nr   zFailed to tokenize: text="z" n_tokens=)	rM   r!   llama_tokenllama_tokenizer1   lenabsRuntimeErrorlist)r$   r   r   r   n_ctxtokensn_tokenss          r%   tokenizezLlamaModel.tokenize   s      ""'%/22+Jc$ii
 
 a<<8}}H+h699F /
D#d))VXw H !||"LLL(LL   F9H9%&&&r(   Fc                    t          j        d          }t          j        | j        ||dd|           t          |          S )N    r   )rX   rY   r!   llama_token_to_piecer1   r   )r$   rk   r   r]   s       r%   token_to_piecezLlamaModel.token_to_piece   s;    )"--&tz5#r1gNNNSzzr(   r   	List[int]c           	     z   d}d}t          j        |z              }|D ]T}t          j        | j        t          j        |          ||d|          }||k    sJ |t          |d |                   z  }Ut          |          dk    r6|d         |                                 k    r|dd         dk    r
|dd          n|S )Nr(   r   r   r       )	rX   c_charr!   r   r1   r   r   r   ry   )r$   r   r   outputra   bufferrk   ns           r%   
detokenizezLlamaModel.detokenize   s    -$&)) 	( 	(E.
I1%88&$7 A 9999eF2A2J'''FF
 6{{Q6!90@0@#@#@VAaC[TXEXEX 122JJ	
r(   Dict[str, str]c                j   i }d}t          j        |          }d|z  |_        t          t	          j        | j                            D ]}t	          j        | j        |||          }||k    r5|dz   }t          j        |          }t	          j        | j        |||          }|j                            d          }t	          j	        | j        |||          }||k    r5|dz   }t          j        |          }t	          j	        | j        |||          }|j                            d          }|||<   |S )NrW       r   r   )
rX   rY   r[   ranger!   llama_model_meta_countr    llama_model_meta_key_by_indexr\   !llama_model_meta_val_str_by_index)r$   metadatabuffer_sizer   r;   nbyteskeyr[   s           r%   r   zLlamaModel.metadata   sJ   #%,[99{*y7
CCDD 	" 	"A<
Av{ F ##$qj4[AA"@J6;  ,%%g..C@
Av{ F ##$qj4[AA"DJ6;  L''00E!HSMMr(   c                 (    t          j                    S )z#Get the default llama_model_params.)r!   llama_model_default_params r(   r%   default_paramszLlamaModel.default_params   s     3555r(   N)r   r   r   r   r   r   rA   rB   )rA   rR   )rA   r   )re   r   rA   rf   )rk   rB   rA   r   )rk   rB   rA   rR   )rk   rB   rA   rB   )rA   r   )r   r   r   r   r   r   )F)rk   rB   r   r   rA   r   )r   r   r   r   rA   r   )rA   r   )$__name__
__module____qualname____doc__r4   r:   r?   rE   rI   rM   rQ   rU   r^   ra   rd   rj   rp   rs   rv   ry   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   staticmethodr   r   r(   r%   r   r      s       M M (. (. (. (. (. (.T! ! !  6 6 6 6: : : := = = =8 8 8 8G G G G) ) ) )
6 6 6 6: : : :P P P P
Q Q Q QB B B BA A A A
5 5 5 55 5 5 55 5 5 55 5 5 54 4 4 49 9 9 99 9 9 99 9 9 95 5 5 5= = = == = = =
' ' ' '$    

 
 
 
 
&   > 6 6 \6 6 6r(   r   c                  8   e Zd ZdZdddRd
Zd Zd ZdSdZdSdZd Z	dTdZ
dUdZdVdZdWdZdSdZdXdZdXd ZdYd#Zd$ ZdZd&Zd' ZdZd(ZdVd)Zd[d+Zd\d5Zd]d6Zd^d9Zd_d;Zd_d<Zd_d=Zd`d?ZdadBZdbdHZ dcdIZ!dddJZ"dddKZ#dedMZ$dN Z%dO Z&e'dP             Z(dQS )fLlamaContextzIntermediate Python wrapper for a llama.cpp llama_context.
    NOTE: For stability it's recommended you use the Llama class instead.Tr   r    r   r   llama_cpp.llama_context_paramsr   r   c               X    | _         | _        | _        t                       _        t          j         j         j          j                  }|t          d          | _        t          j	         j                   _
        d  _         fd} j                            |           d S )NzFailed to create llama_contextc                 Z     j         d S t          j         j                    d  _         d S r   )ctxr!   
llama_freer#   s   r%   free_ctxz'LlamaContext.__init__.<locals>.free_ctx  s.    x ***DHHHr(   )r    r   r   r   r)   r!   llama_init_from_modelr-   r   llama_get_memorymemoryr2   r3   )r$   r    r   r   r   r   s   `     r%   r4   zLlamaContext.__init__   s     
$;;-dj.>LL;=>>>0::	 	 	 	 	 	!!(+++++r(   c                8    | j                                          d S r   r)   r:   r#   s    r%   r:   zLlamaContext.close           r(   c                .    |                                   d S r   r>   r#   s    r%   r?   zLlamaContext.__del__  r@   r(   rA   rB   c                4    t          j        | j                  S r   )r!   llama_n_ctxr   r#   s    r%   r   zLlamaContext.n_ctx  s    $TX...r(   c                4    t          j        | j                  S r   )r!   llama_pooling_typer   r#   s    r%   pooling_typezLlamaContext.pooling_type  s    +DH555r(   c                \    | j         
J d            t          j        | j         d           d S )NMemory is not initializedT)r   r!   llama_memory_clearr#   s    r%   kv_cache_clearzLlamaContext.kv_cache_clear!  s4    {&&(C&&&$T[$77777r(   seq_idp0p1c                t    | j         
J d            |dk    r|nd}t          j        | j         |||           d S )Nr   r   )r   r!   llama_memory_seq_rm)r$   r   r   r   s       r%   kv_cache_seq_rmzLlamaContext.kv_cache_seq_rm%  sI    {&&(C&&&!Q;;A%dk62rBBBBBr(   
seq_id_src
seq_id_dstc                b    | j         
J d            t          j        | j         ||||           d S Nr   )r   r!   llama_memory_seq_cp)r$   r   r   r   r   s        r%   kv_cache_seq_cpzLlamaContext.kv_cache_seq_cp*  s:    {&&(C&&&%dk:z2rRRRRRr(   c                \    | j         
J d            t          j        | j         |           d S r   )r   r!   llama_memory_seq_keepr$   r   s     r%   kv_cache_seq_keepzLlamaContext.kv_cache_seq_keep.  s4    {&&(C&&&'V<<<<<r(   shiftc                b    | j         
J d            t          j        | j         ||||           d S r   )r   r!   llama_memory_seq_add)r$   r   r   r   r   s        r%   kv_cache_seq_shiftzLlamaContext.kv_cache_seq_shift2  s:    {&&(C&&&&t{FBEJJJJJr(   c                4    t          j        | j                  S r   )r!   llama_state_get_sizer   r#   s    r%   get_state_sizezLlamaContext.get_state_size6      -dh777r(   batch
LlamaBatchc                t    t          j        | j        |j                  }|dk    rt	          d|           d S )Nr   zllama_decode returned )r!   llama_decoder   r   r   r$   r   return_codes      r%   r\   zLlamaContext.decodeA  J    ,HK
 
 !EEEFFF r(   c                t    t          j        | j        |j                  }|dk    rt	          d|           d S )Nr   zllama_encode returned )r!   llama_encoder   r   r   r   s      r%   r/   zLlamaContext.encodeI  r   r(   	n_threadsn_threads_batchc                <    t          j        | j        ||           d S r   )r!   llama_set_n_threadsr   )r$   r  r  s      r%   set_n_threadszLlamaContext.set_n_threadsQ  s    %dh	?KKKKKr(   c                4    t          j        | j                  S r   )r!   llama_get_logitsr   r#   s    r%   
get_logitszLlamaContext.get_logitsT  s    )$(333r(   r;   c                6    t          j        | j        |          S r   )r!   llama_get_logits_ithr   r$   r;   s     r%   get_logits_ithzLlamaContext.get_logits_ithW  s    -dh:::r(   c                4    t          j        | j                  S r   )r!   llama_get_embeddingsr   r#   s    r%   get_embeddingszLlamaContext.get_embeddingsZ  r   r(   c                6    t          j        | j        |          S r   )r!   llama_get_embeddings_ithr   r  s     r%   get_embeddings_ithzLlamaContext.get_embeddings_ith]  s    1$(A>>>r(   c                6    t          j        | j        |          S r   )r!   llama_get_embeddings_seqr   r   s     r%   get_embeddings_seqzLlamaContext.get_embeddings_seq`  s    1$(FCCCr(   seedc                     t          d          )Nz4set_rng_seed is deprecated, use LlamaSampler insteadrh   )r$   r  s     r%   set_rng_seedzLlamaContext.set_rng_seede      !"XYYYr(   
candidates'_LlamaTokenDataArray'last_tokens_data('llama_cpp.Array[llama_cpp.llama_token]'penalty_last_npenalty_repeatrR   penalty_freqpenalty_presentc                     t          d          )NzCsample_repetition_penalties is deprecated, use LlamaSampler insteadrh   )r$   r  r  r   r!  r"  r#  s          r%   sample_repetition_penaltiesz(LlamaContext.sample_repetition_penaltiesh  s     ""ghhhr(   c                     t          d          )Nz6sample_softmax is deprecated, use LlamaSampler insteadrh   r$   r  s     r%   sample_softmaxzLlamaContext.sample_softmaxs      !"Z[[[r(   kmin_keepc                     t          d          )Nz4sample_top_k is deprecated, use LlamaSampler insteadrh   )r$   r  r*  r+  s       r%   sample_top_kzLlamaContext.sample_top_kv  r  r(   pc                     t          d          )Nz4sample_top_p is deprecated, use LlamaSampler insteadrh   r$   r  r.  r+  s       r%   sample_top_pzLlamaContext.sample_top_py  r  r(   c                     t          d          )Nz4sample_min_p is deprecated, use LlamaSampler insteadrh   r0  s       r%   sample_min_pzLlamaContext.sample_min_p|  r  r(   c                     t          d          )Nz6sample_typical is deprecated, use LlamaSampler insteadrh   r0  s       r%   sample_typicalzLlamaContext.sample_typical  s     ""Z[[[r(   tempc                     t          d          )Nz3sample_temp is deprecated, use LlamaSampler insteadrh   )r$   r  r6  s      r%   sample_tempzLlamaContext.sample_temp  s    !"WXXXr(   grammarr   c                     t          d          )Nz6sample_grammar is deprecated, use LlamaSampler insteadrh   )r$   r  r9  s      r%   sample_grammarzLlamaContext.sample_grammar  r)  r(   tauetammu,llama_cpp.CtypesPointerOrRef[ctypes.c_float]c                     t          d          )Nz=sample_token_mirostat is deprecated, use LlamaSampler insteadrh   )r$   r  r<  r=  r>  r?  s         r%   sample_token_mirostatz"LlamaContext.sample_token_mirostat  s     ""abbbr(   c                     t          d          )Nz@sample_token_mirostat_v2 is deprecated, use LlamaSampler insteadrh   )r$   r  r<  r=  r?  s        r%   sample_token_mirostat_v2z%LlamaContext.sample_token_mirostat_v2  s     ""deeer(   c                     t          d          )Nz;sample_token_greedy is deprecated, use LlamaSampler insteadrh   r'  s     r%   sample_token_greedyz LlamaContext.sample_token_greedy  s    !"_```r(   c                     t          d          )Nz4sample_token is deprecated, use LlamaSampler insteadrh   r'  s     r%   sample_tokenzLlamaContext.sample_token  r  r(   rk   c                     t          d          )Nz<grammar_accept_token is deprecated, use LlamaSampler insteadrh   )r$   r9  rk   s      r%   grammar_accept_tokenz!LlamaContext.grammar_accept_token  s    !"`aaar(   c                8    t          j        | j                   d S r   )r!   llama_perf_context_resetr   r#   s    r%   reset_timingszLlamaContext.reset_timings      *4844444r(   c                8    t          j        | j                   d S r   )r!   llama_perf_context_printr   r#   s    r%   print_timingszLlamaContext.print_timings  rN  r(   c                 (    t          j                    S )z%Get the default llama_context_params.)r!   llama_context_default_paramsr   r(   r%   r   zLlamaContext.default_params  s     5777r(   N)r    r   r   r   r   r   r   )r   rB   r   rB   r   rB   )r   rB   r   rB   r   rB   r   rB   )r   rB   )r   rB   r   rB   r   rB   r   rB   )r   r   )r  rB   r  rB   )r;   rB   r  rB   )r  r  r  r  r   rB   r!  rR   r"  rR   r#  rR   )r  r  )r  r  r*  rB   r+  rB   )r  r  r.  rR   r+  rB   )r  r  r6  rR   )r  r  r9  r   )r  r  r<  rR   r=  rR   r>  rB   r?  r@  rA   rB   )
r  r  r<  rR   r=  rR   r?  r@  rA   rB   )r  r  rA   rB   )r9  r   rk   rB   ))r   r   r   r   r4   r:   r?   r   r   r   r   r   r   r   r   r\   r/   r  r
  r  r  r  r  r  r%  r(  r-  r1  r3  r5  r8  r;  rB  rD  rF  rH  rJ  rM  rQ  r   r   r   r(   r%   r   r      s7       M M , , , , , ,:! ! !  / / / /6 6 6 68 8 8C C C C
S S S S= = = =K K K K8 8 8 8G G G GG G G GL L L L4 4 4; ; ; ;8 8 8? ? ? ?D D D D
Z Z Z Z	i 	i 	i 	i\ \ \ \Z Z Z ZZ Z Z ZZ Z Z Z\ \ \ \
Y Y Y Y\ \ \ \c c c cf f f fa a a aZ Z Z Zb b b b5 5 55 5 5 8 8 \8 8 8r(   r   c                  F    e Zd Zdddd	Zd
 Zd ZddZd ZddZddZ	dS )r   Tr   r   rB   embd	n_seq_maxr   r   c               ,    | _         | _        | _        | _        t	                       _        t          j         j          j         j                  }|t          d          | _	        d  _
         fd} j                            |           d S )NzFailed to create llama_batchc                 Z     j         d S t          j         j                    d  _         d S r   )r   r!   llama_batch_freer#   s   r%   
free_batchz'LlamaBatch.__init__.<locals>.free_batch  r'   r(   )	_n_tokensrV  rW  r   r   r)   r!   llama_batch_initr-   r   r2   r3   )r$   r   rV  rW  r   r   r[  s   `      r%   r4   zLlamaBatch.__init__  s     "	"$;;*4>49dnUU=;<<<
	 	 	 	 	 	!!*-----r(   c                8    | j                                          d S r   r   r#   s    r%   r:   zLlamaBatch.close  r   r(   c                .    |                                   d S r   r>   r#   s    r%   r?   zLlamaBatch.__del__  r@   r(   rA   c                    | j         j        S r   r   r   r#   s    r%   r   zLlamaBatch.n_tokens  s    z""r(   c                    d| j         _        d S )Nr   ra  r#   s    r%   resetzLlamaBatch.reset  s    
r(   r   Sequence[int]n_past
logits_allc                8   t          |          }|| j        _        t          |          D ]\}||         | j        j        |<   ||z   | j        j        |<   d| j        j        |         d<   d| j        j        |<   || j        j        |<   ]d| j        j        |dz
  <   d S Nr   r   T	r   r   r   r   rk   posr   n_seq_idlogits)r$   r   re  rf  r   r;   s         r%   	set_batchzLlamaBatch.set_batch  s    u::&
x 	. 	.A"'(DJQ &
DJN1&'DJa #%&DJ"#-DJa  *.
(Q,'''r(   r   c                f   t          |          }| j        j        }| j        xj        |z  c_        t          |          D ]^}||z   }||         | j        j        |<   || j        j        |<   || j        j        |         d<   d| j        j        |<   || j        j        |<   _d| j        j        |dz
  <   d S rh  ri  )r$   r   r   rf  r   	n_tokens0r;   js           r%   add_sequencezLlamaBatch.add_sequence  s    u::J'	
x'x 	. 	.AAA"'(DJQ !DJN1&,DJa #%&DJ"#-DJa  *.
(Q,'''r(   N)r   rB   rV  rB   rW  rB   r   r   r   )r   rd  re  rB   rf  r   )r   rd  r   rB   rf  r   )
r   r   r   r4   r:   r?   r   rc  rm  rq  r   r(   r%   r   r     s        KO. . . . . .2! ! !  # # # #     	/ 	/ 	/ 	// / / / / /r(   r   c                      e Zd ZddZd	dZdS )
LlamaTokenDataArrayrI   rB   c          	        || _         t          j        | j         ft          j        dt          j        fdt          j        fdt          j        fgd                    | _        t          j        | j        j	        
                    t          j                  | j         d          | _        t          j        | j         t          j                  | _        t          j        | j         t          j                  | _        d | _        d S )	Nidlogitr.  T)align)dtypeF)datara   sorted)rI   nprecarrayrx  intcsinglecandidates_datar!   llama_token_data_arrayrX   data_asllama_token_data_pr  arangedefault_candidates_data_idzerosdefault_candidates_data_pr2   )r$   rI   s     r%   r4   zLlamaTokenDataArray.__init__  s    !{\O(7BI"6bi8HIQU   
  
  
 $:%,44Y5QRR
 
 

 +-)DL*P*P*P')+$,bi)P)P)P&r(   rl  npt.NDArray[np.single]c                    | j         | j        j        d d <   || j        j        d d <   | j        | j        j        d d <   d| j        _        | j        | j        _	        d S )NF)
r  r  ru  rv  r  r.  r  rz  rI   ra   )r$   rl  s     r%   copy_logitszLlamaTokenDataArray.copy_logits  s`    %)%D"(."111%$($Bqqq!!&#|r(   N)rI   rB   )rl  r  )r   r   r   r4   r  r   r(   r%   rs  rs    s<           ", , , , , ,r(   rs  c                    t          t          j                            |                     dk    r| S fd| D             S )N        c                    g | ]}|z  S r   r   ).0vnorms     r%   
<listcomp>z'normalize_embedding.<locals>.<listcomp>  s    (((AH(((r(   )rR   r{  linalgr  )	embeddingr  s    @r%   normalize_embeddingr    sG    	**++Ds{{((((i((((r(   c                  <   e Zd ZU dZded<   dZded<   dZded<   dZd	ed
<   dZd	ed<   dZ	d	ed<   dZ
d	ed<   dZd	ed<   dZded<   dZd	ed<   dZd	ed<   dZd	ed<   dZded<   dZd	ed<   dZd	ed<   dZded<   dZd ed!<   dZd ed"<   dZd	ed#<    ee$          Zd%ed&<   d'S )(LlamaSamplingParams@   rB   n_prevr   n_probs(   top_kgffffff?rR   top_pg?min_pg      ?tfs_z	typical_pg?r6  r   r!  r  r"  r#  mirostatg      @mirostat_taug?mirostat_etaTr   penalize_nl r   r9  cfg_negative_prompt	cfg_scaledefault_factoryzdict[int, float]
logit_biasN)r   r   r   r  __annotations__r  r  r  r  r  r  r6  r   r!  r"  r#  r  r  r  r  r9  r  r  r   dictr  r   r(   r%   r  r    sp        FGEOOOOEEEIDNNL!O!!!!HLLKG!!!!!I#(5#>#>#>J>>>>>>r(   r  c                      e Zd ZU  ee          Zded<    eej                  Z	ded<   dZ
ded<    ee          Zd	ed
<    ee          Zded<   d Zd Zd!dZd"dZ	 	 d#d$dZd%d ZdS )&LlamaSamplingContextr  r  r   zctypes.c_floatmirostat_muNzOptional[LlamaGrammar]r9  z	list[int]prevz list[llama_cpp.llama_token_data]curc                f    g | _         g | _        | j        | j                                         d S d S r   )r  r  r9  rc  r#   s    r%   rc  zLlamaSamplingContext.reset>  s:    	<#L      $#r(   c                    t          | j        | j        | j        | j                                        | j                                                  S )N)r   r  r9  r  r  )r  r   r  r9  r  copyr  r#   s    r%   cpzLlamaSamplingContext.cpD  sE    #;(L!!
 
 
 	
r(   rA   Optional[int]c                P    t          | j                  dk    r| j        d         S d S )Nr   )r   r  r#   s    r%   lastzLlamaSamplingContext.lastM  s&    ty>>A9R= 4r(   ctx_mainr   r   rB   r   c                x    |j                             | j        | d                                        d          S rm   )r    r   r  r\   )r$   r  r   s      r%   prev_strzLlamaSamplingContext.prev_strS  s2    ~((A23388??HHHr(   r   idxlogits_array Optional[npt.NDArray[np.single]]c                     t          d          )NzCLlamaSamplingContext.sample is deprecated, use LlamaSampler insteadrh   )r$   r  r  r  s       r%   samplezLlamaSamplingContext.sampleV  s     ""ghhhr(   ru  apply_grammarr   c                :    | j                             |           d S r   )r  append)r$   r  ru  r  s       r%   acceptzLlamaSamplingContext.accept_  s    	r(   )rA   r  )r  r   r   rB   rA   r   )r   N)r  r   r  rB   r  r  )r  r   ru  rB   r  r   )r   r   r   r   r  r   r  rX   c_floatr  r9  r   r  r  rc  r  r  r  r  r  r   r(   r%   r  r  5  s&        "'%8K"L"L"LFLLLL"'%"G"G"GKGGGG&*G****eD111D1111,1E$,G,G,GCGGGG! ! !
 
 
   I I I I 9=	i i i i i     r(   r  c                      e Zd ZddZd	dZdS )
CustomSampler
apply_func2Callable[[llama_cpp.llama_token_data_array], None]c                    | _         d fd}d	d}t          j                    }t          j        |          |_        | _        t          j        d          |_        t          j        d          |_	        t          j
        d          |_        t          j        d          |_        t          j        d          |_        t          j                     _        t%          j        |           j        _        d  j        _        d S )
Nr2   llama_cpp.llama_sampler_pcur_p"llama_cpp.llama_token_data_array_pc                2                         |           d S r   )r  )r2   r  r$   s     r%   apply_wrapperz-CustomSampler.__init__.<locals>.apply_wrapperi  s     OOE"""""r(   c                    d S r   r   )r2   s    r%   free_wrapperz,CustomSampler.__init__.<locals>.free_wrappero  s    Dr(   r   )r2   r  r  r  )r2   r  )r  r!   llama_sampler_illama_sampler_i_applyapply_apply_wrapper_refllama_sampler_i_namere   llama_sampler_i_acceptr  llama_sampler_i_resetrc  llama_sampler_i_cloneclonellama_sampler_i_freefreellama_samplerr2   rX   pointerifacer   )r$   r  r  r  	sampler_is   `    r%   r4   zCustomSampler.__init__d  s     %	# 	# 	# 	# 	# 	#	 	 	 	 -//	#9-HH	"/"7::	$;A>>	#9!<<	#9!<<	"7::	 .00#^I66r(   rA   r  c                4    t          j        | j                  S r   )rX   r  r2   r#   s    r%   get_samplerzCustomSampler.get_sampler  s    ~dl+++r(   Nr  r  )rA   r  )r   r   r   r4   r  r   r(   r%   r  r  c  s<               8, , , , , ,r(   r  c                      e Zd Zd Zd Zd Zd ZdJdZd ZdKd
Z	dLdMdZ
dLdMdZdLdMdZdNdZdOdZdPdZdQdZdRdZdSd ZdTd%ZdUd*ZdVd/ZdWd6ZdXd9ZdYd:ZdZd=Zd[d?Zd\d]dDZd^dFZdG ZdH ZdIS )_LlamaSamplerc                     t          j                    }t          j        |           _        g  _        t                       _         fd} j                            |           d S )Nc                     j         Vt          j                  D ]\  } }t          j        j         |             t          j        j                    d _         d S d S r   )r2   r6   r7   r!   r8   llama_sampler_free)r;   r<   r$   s     r%   free_samplerz+LlamaSampler.__init__.<locals>.free_sampler  si    |'$T%9:: J JDAq8qIIII,T\:::# ('r(   )r!   "llama_sampler_chain_default_paramsllama_sampler_chain_initr2   r7   r   r)   r3   )r$   r   r  s   `  r%   r4   zLlamaSampler.__init__  so    =?? 9&AA@B$;;	$ 	$ 	$ 	$ 	$ 	!!,/////r(   c                8    | j                                          d S r   r   r#   s    r%   r:   zLlamaSampler.close  r   r(   c                .    |                                   d S r   r>   r#   s    r%   r?   zLlamaSampler.__del__  r@   r(   c                `    t          j                    }t          j        | j        |           d S r   )r!   llama_sampler_init_greedyllama_sampler_chain_addr2   r$   r2   s     r%   
add_greedyzLlamaSampler.add_greedy  s+    577)$,@@@@@r(   r  rB   c                b    t          j        |          }t          j        | j        |           d S r   )r!   llama_sampler_init_distr  r2   )r$   r  r2   s      r%   add_distzLlamaSampler.add_dist  -    3D99)$,@@@@@r(   c                `    t          j                    }t          j        | j        |           d S r   )r!   llama_sampler_init_softmaxr  r2   r  s     r%   add_softmaxzLlamaSampler.add_softmax  s+    688)$,@@@@@r(   r*  c                b    t          j        |          }t          j        | j        |           d S r   )r!   llama_sampler_init_top_kr  r2   )r$   r*  r2   s      r%   	add_top_kzLlamaSampler.add_top_k  s-    4Q77)$,@@@@@r(   r   r.  rR   r+  c                d    t          j        ||          }t          j        | j        |           d S r   )r!   llama_sampler_init_top_pr  r2   r$   r.  r+  r2   s       r%   	add_top_pzLlamaSampler.add_top_p  /    4QAA)$,@@@@@r(   c                d    t          j        ||          }t          j        | j        |           d S r   )r!   llama_sampler_init_min_pr  r2   r  s       r%   	add_min_pzLlamaSampler.add_min_p  r   r(   c                d    t          j        ||          }t          j        | j        |           d S r   )r!   llama_sampler_init_typicalr  r2   r  s       r%   add_typicalzLlamaSampler.add_typical  s/    6q(CC)$,@@@@@r(   r6  c                b    t          j        |          }t          j        | j        |           d S r   )r!   llama_sampler_init_tempr  r2   )r$   r6  r2   s      r%   add_tempzLlamaSampler.add_temp  r  r(   tdeltaexponentc                f    t          j        |||          }t          j        | j        |           d S r   )r!   llama_sampler_init_temp_extr  r2   )r$   r
  r  r  r2   s        r%   add_temp_extzLlamaSampler.add_temp_ext  s1    75(KK)$,@@@@@r(   c                h    t          j        ||||          }t          j        | j        |           d S r   )r!   llama_sampler_init_xtcr  r2   )r$   r.  r
  r+  r  r2   s         r%   add_xtczLlamaSampler.add_xtc  s3    21a4HH)$,@@@@@r(   r   c                b    t          j        |          }t          j        | j        |           d S r   )r!   llama_sampler_init_top_n_sigmar  r2   )r$   r   r2   s      r%   add_top_n_sigmazLlamaSampler.add_top_n_sigma  s-    :1==)$,@@@@@r(   rI   r<  r=  r>  c                j    t          j        |||||          }t          j        | j        |           d S r   )r!   llama_sampler_init_mirostatr  r2   )r$   rI   r  r<  r=  r>  r2   s          r%   add_mirostatzLlamaSampler.add_mirostat  s6    7sCQRSS)$,@@@@@r(   c                f    t          j        |||          }t          j        | j        |           d S r   )r!   llama_sampler_init_mirostat_v2r  r2   )r$   r  r<  r=  r2   s        r%   add_mirostat_v2zLlamaSampler.add_mirostat_v2  s1    :4cJJ)$,@@@@@r(   r    r   r9  r   c                    t          j        |j        |j                            d          |j                            d                    }t          j        | j        |           d S rm   )r!   llama_sampler_init_grammarr1   _grammarr/   _rootr  r2   )r$   r    r9  r2   s       r%   add_grammarzLlamaSampler.add_grammar  s\    6K)00997=;O;OPW;X;X
 
 	)$,@@@@@r(   trigger_patterns	List[str]trigger_tokensr   c                   t          j        t          |          z              }t          |          D ]\  }}|                    d          ||<   t          j        t          |          z  | }t          j        |j        |j	                            d          |j
                            d          |t          |          |t          |                    }	t          j        | j        |	           d S rm   )rX   c_char_pr   	enumerater/   r!   r   (llama_sampler_init_grammar_lazy_patternsr1   r  r  r  r2   )
r$   r    r9  r!  r#  pattern_ptrsr;   patterntoken_arrayr2   s
             r%   add_grammar_lazy_patternsz&LlamaSampler.add_grammar_lazy_patterns  s     #.>*?*??BB#$455 	6 	6JAw%nnW55LOO !,s>/B/BB^TDK##G,,M  )) !!
 
 	)$,@@@@@r(   r   r!  r"  r#  c                h    t          j        ||||          }t          j        | j        |           d S r   )r!   llama_sampler_init_penaltiesr  r2   )r$   r   r!  r"  r#  r2   s         r%   add_penaltieszLlamaSampler.add_penalties  s?     8	
 
 	)$,@@@@@r(   rM   dry_multiplierdry_basedry_allowed_lengthdry_penalty_last_nseq_breakersc                4   t          j        t          |          z              }t          |          D ]\  }	}
|
                    d          ||	<   t          j        |j        ||||||t          |                    }t          j        | j	        |           d S rm   )
rX   r%  r   r&  r/   r!   llama_sampler_init_dryr1   r  r2   )r$   r    rM   r/  r0  r1  r2  r3  breaker_ptrsr;   breakerr2   s               r%   add_dryzLlamaSampler.add_dry  s     #l*;*;;>>#L11 	6 	6JAw%nnW55LOO2K	
 	
 	)$,@@@@@r(   r  Dict[int, float]c                N   t          j        t          |          z              }t          |                                          D ]"\  }\  }}|||         _        |||         _        #t          j        |t          |          |          }t          j        | j	        |           d S r   )
r!   llama_logit_biasr   r&  itemsrk   biasllama_sampler_init_logit_biasr  r2   )r$   rI   r  
bias_arrayr;   rk   r=  r2   s           r%   add_logit_biaszLlamaSampler.add_logit_bias  s      03z??BEE
 )**:*:*<*< = = 	& 	&A}t"'JqM!%JqM9
OO
 

 	)$,@@@@@r(   c                l    t          j        |j                  }t          j        | j        |           d S r   )r!   llama_sampler_init_infillr1   r  r2   )r$   r    r2   s      r%   
add_infillzLlamaSampler.add_infill+  s/    5ekBB)$,@@@@@r(   r  r  c                    t          |          }|                                }t          j        | j        |           | j                            t          j        | j                  dz
  |f           d S )Nr   )r  r  r!   r  r2   r7   r  llama_sampler_chain_n)r$   r  custom_samplerr2   s       r%   
add_customzLlamaSampler.add_custom/  sv     'z22 ,,..)$,@@@##,T\::Q>O	
 	
 	
 	
 	
r(   rA   c                4    t          j        | j                  S r   )r!   llama_sampler_get_seedr2   r#   s    r%   get_seedzLlamaSampler.get_seed:  s    /===r(   r  r   r   r  c                B    t          j        | j        |j        |          S r   )r!   llama_sampler_sampler2   r   )r$   r   r  s      r%   r  zLlamaSampler.sample=  s    -dlCGSIIIr(   rk   c                :    t          j        | j        |           d S r   )r!   llama_sampler_acceptr2   ro   s     r%   r  zLlamaSampler.accept@  s    &t|U;;;;;r(   c                8    t          j        | j                   d S r   )r!   llama_sampler_resetr2   r#   s    r%   rc  zLlamaSampler.resetC  s    %dl33333r(   c                $   | j         rt          d          t          j        | j                  }t
                              t
                    |_        g _         t                      _        fd}j        	                    |           S )Nz7Cannot clone LlamaSampler that contains custom samplersc                 Z     j         "t          j         j                    d  _         d S d S r   )r2   r!   r  )new_samplers   r%   r  z(LlamaSampler.clone.<locals>.free_samplerR  s6    ".,[-@AAA&*### /.r(   )
r7   ri   r!   llama_sampler_cloner2   r  __new__r   r)   r3   )r$   cloned_samplerr  rS  s      @r%   r  zLlamaSampler.cloneF  s     	a%&_```"6t|DD"**<88,&(#"+++	+ 	+ 	+ 	+ 	+
 	((666r(   NrT  )r*  rB   )r   )r.  rR   r+  rB   )r6  rR   )r
  rR   r  rR   r  rR   )r.  rR   r
  rR   r+  rB   r  rB   )r   rR   )
rI   rB   r  rB   r<  rR   r=  rR   r>  rB   )r  rB   r<  rR   r=  rR   )r    r   r9  r   )r    r   r9  r   r!  r"  r#  r   )r   rB   r!  rR   r"  rR   r#  rR   )r    r   rM   rB   r/  rR   r0  rR   r1  rB   r2  rB   r3  r"  )rI   rB   r  r9  )r    r   r  r   )r  )r   r   r  rB   rA   rB   )rk   rB   )r   r   r   r4   r:   r?   r  r  r  r  r  r  r  r	  r  r  r  r  r  r   r+  r.  r8  r@  rC  rG  rJ  r  r  rc  r  r   r(   r%   r  r    s       0 0 0 ! ! !  A A AA A A AA A AA A A AA A A A AA A A A AA A A A AA A A AA A A AA A A AA A A AA A A AA A A AA A A AA A A A4A A A AA A A A6A A A A$A A A A	
 	
 	
 	
> > > >J J J J J< < < <4 4 4    r(   r  )%
__future__r   r*   rX   typingr   r   r   r   r   r	   r
   dataclassesr   r   
contextlibr   numpyr{  numpy.typingnptllama_typesllama_grammarr   _utilsr   llama_cpp.llama_cppr!   r   r   r   rs  r  r  r  r  r  r   r(   r%   <module>rb     s   " " " " " " 				                   ) ( ( ( ( ( ( (                           ' ' ' ' ' ' * * * * * * ' ' ' ' ' 'R6 R6 R6 R6 R6 R6 R6 R6j}8 }8 }8 }8 }8 }8 }8 }8@</ </ </ </ </ </ </ </~, , , , , , , ,:) ) ) ? ? ? ? ? ? ? ?4 * * * * * * * *Z, , , , , , , ,BT T T T T T T T T Tr(   