
    PiT                   ~I   d dl mZ d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
 d dlmZmZmZ e
rd dlmZmZmZmZmZmZmZ dZej                            d          ZeC ej        ej                            ej                            e                              dz  n ej        e          Z eee          Z  ee           Z!d Z"d	Z#d
Z$dZ%dZ&dZ'dZ(dZ)dZ*dZ+dZ,dZ-dZ.dZ/dZ0dZ1dZ2dZ3dZ4dZ5dZ6dZ7dZ8dZ9d Z:d!Z;d"Z<d#Z=d$Z> ej?        ej@        ejA        ej@        ejA                  ZB ej?        ej@        ejA                  ZCg e jD        _E        ejF        e jD        _G        e D                                ZHd%ZId&ZJd'ZKd(ZLd)ZMeLZNdZOeMZPd
ZQ ed*eR          ZSejA        ZT ed+eR          ZUejA        ZV ed,eR          ZWejA        ZX ed-eR          ZYejA        ZZ ed.eR          Z[ejA        Z\ej]        Z^ej]        Z_ ej`        e_          Zaej]        Zbd Zc	 d	Zd	 d
Ze	 dZf	 d/Zg	 d0Zh	 dZi	 d Zjd	Zkd
ZldZmd/Znd0ZodZpdZqdZrdZsdZtdZudZvdZwdZxdZydZzdZ{dZ|dZ}dZ~dZdZdZdZdZd Zd!Zd"Zd#Zd$Zd1Zd2Zd3Zd4Zd5Zd&Zd Zd
xZZdxZZdxZZd Zd	Zd
ZdZd/Zd0ZdZd Zd	Zd
Zd/ZdZdZd2Zd6Zd7Zd8Zd9Zd Zd	Zd
ZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZd Zd!Zd"Zd#Zd$Zd1Zd2Zd:Zd;Zd<Zd=Zd&Zd Zd	Zd
ZdZeZd&Zd Zd	Zd
ZdZd/Zd&Zd Zd	Zd Zd	Zd
Z G d> d?ej                  Z ej`        eݦ          Z G d@ dAej                  Z ej`        eߦ          Z ej?        ej@        ej        ejA                  Z G dB dCej                  Zd Zd	Zd
ZdZ G dD dEej                  Z G dF dGej                  Z G dH dIej                  Z G dJ dKej                  Z ej?        dej        ej        ejA                  Z	  G dL dMej                  Z G dN dOej                  Z ej`        e          Z G dP dQej                  Z G dR dSej                  ZejA        Z ej`        ejA                  Z e!dTg e          ddV            Z e!dWg e          ddX            Z e!dYg e          ddZ            Z e!d[g e          dd\            Z e!d]g d          d^             Zd Zd	Zd
ZdZd/Zd0Z  e!d_g d          d`             Z e!daej        gd          ddd            Z e!deej        egeV          ddj            Z e!dkej        egeV          ddl            Z e!dm ej`        ej                  ejF        egeV          ddq            Z e!dreVej        gd          ddt            Z e!dueVgd          ddv            Z e!dweVgd          ddx            Z e!dyeVegeX          dd{            Z	 e!d|eVegeX          dd}            Z
 e!d~eXgd          dd            Z e!dg ej                  dd            Z e!dg ejF                  dd            ZD e!dg ejF                  dd            Z e!dg ej@                  dd            Z e!dg ej@                  dd            Z e!dg ej@                  dd            Z e!dg ej@                  dd            Z e!deXgej                  dd            Z e!deXgej                  dd            Z e!deXgej                  dd            Z e!deXgej                  dd            Z e!deVgej]                  dd            Z e!deVgej]                  dd            Z e!deVgej]                  dd            Z e!deVgej]                  dd            Z e!deTgej]                  dd            Z e!deXgeV          dd            Z e!deXgeZ          dd            Z e!deXgej                  dd            Z e!deXge\          dd            Z  e!deVgeT          dd            Z! e!deVgej                  dd            Z" e!deVgej]                  dd            Z# e!deVgej]                  dd            Z$ e!deVgej]                  dd            Z% e!deVgej]                  dd            Z& e!deVgej]                  dd            Z' e!deVgej]                  dd            Z( e!deVgej                  dd            Z) e!deVgej                  dd            Z* e!deVej        gej                  ddń            Z+ e!deTgej                  ddȄ            Z, e!deTgej]                  ddʄ            Z- e!deVej        ej        ejF        gej]                  ddЄ            Z. e!deVgej]                  dd҄            Z/ e!deVej]        ej        ejF        gej]                  ddք            Z0 e!deVej]        ej        ejF        gej]                  dd؄            Z1 e!deVej        ejF        gej]                  ddۄ            Z2 e!deVgej3                  dd݄            Z4 e!deVej        gej                  dd            Z5 e!deVgej3                  dd            Z6 e!deVgej@                  dd            Z7 e!deVgej@                  dd            Z8 e!deVgej]                  dd            Z9 e!deVgej@                  dd            Z: e!deVgej@                  dd            Z; e!dej        ej         ej`        e          gej                  dd            Z< e!deVej        ge          dd            Z= e!degd          dd            Z> e!deXeej        gej]                  dd            Z? e!deXegej]                  dd            Z@ e!deXgd          dǐd             ZA e!deX ej`        ej                  ejF        ej]        ej]        ej]        gej]                  dސd            ZB e!d	eZej@        gd          dߐd            ZC e!deZebe^e^gej@                  dd            ZD e!deZebebe^e^gd          dd            ZE e!deZebgd          dd            ZF e!deZebe^e^e^gd          dd            ZG e!deZebe^e^ej        gd          dd            ZH e!deZebge^          dd             ZI e!d!eZebge^          dd"            ZJ e!d#eZgej@                  dd$            ZK e!d%eXgej]                  dʐd&            ZL e!d'eXgej]                  dʐd(            ZM e!d)eXgd          dǐd*            ZN e!d+eXebe^e^gej@                  dd,            ZO e!d-eXebebe^e^gd          dd.            ZP e!d/eXebgd          dd0            ZQ e!d1eXebe^e^e^gd          dd2            ZR e!d3eXebe^e^ej        gd          dd4            ZS e!d5eXebge^          dd6            ZT e!d7eXebge^          dd8            ZU e!d9eXgd          dǐd:            ZV e!d;eXgej@                  dd<            ZW e!d=eXgd          dǐd>            ZX e!d?eXgejF                  dʐd@            ZY e!dAeXgejF                  dʐdB            ZZ e!dCeX ej`        ej[                  ejF        gejF                  ddG            Z\ e!dHeX ej`        ej[                  gejF                  ddI            Z] e!dJeX ej`        ej[                  ejF        gejF                  ddL            Z^ e!dMeX ej`        ej[                  gejF                  ddN            Z_ e!dOeXej        eaejF         ej`        ejF                  gej@                  ddV            Z` e!dWeXej        eaejF         ej`        ejF                  gej@                  ddX            Za e!dYeXej        eaejF        gej@                  dd\            Zb e!d]eXej        eaejF        gej@                  dd^            Zc e!d_eXebgejF                  dda            Zd e!dbeX ej`        ej[                  ejF        ebgejF                  ddc            Ze e!ddeX ej`        ej[                  ejF        ebgejF                  ddf            Zf e!dgeXej        ebeaejF        gejF                  ddi            Zg e!djeXej        ebeaejF         ej`        ejF                  gejF                  ddk            Zh e!dleaej]        ge          ddn            Zi e!doej]        ej]        ej]        ge          dds            Zj e!dtegd          ddv            Zk e!dweXegej]                  ddx            Zl e!dyeXegej]                  ddz            Zm e!d{eXej]        ej]        gd          dd~            Zn e!deXgej]                  dʐd            Zo e!deXgej]                  dʐd            Zp e!deXej@        gd          dd            Zq e!deXej@        gd          dd            Zr e!deXej@        gd          d d            Zs e!deXeCejA        gd          dd            Zt e!deXgd          dǐd            Zu e!deXg ej`        ej                            dd            Zv e!deXej]        g ej`        ej                            dd            Zw e!deXg ej`        ej                            dd            Zx e!deXej]        g ej`        ej                            dd            Zy e!deXebg ej`        ej                            dd            Zz e!deTe_gej                  dd            Z{ e!deTe_gej                  dd            Z| e!deTe_gej                  dd            Z} e!deTe_gej@                  dd            Z~ e!deTe_gej@                  dd            Z e!deTge_          d	d            Z e!deTge_          d	d            Z e!deTge_          d	d            Z e!deTge_          d	d            Z e!deTge_          d	d            Z e!deTge_          d	d            Z e!deTge_          d	d            Z e!deTgej@                  d
d            Z e!deTgej@                  d
d            Z e!deTgej@                  d
d            Z e!deTge_          d	d            Z e!deTge_          d	dÄ            Z e!deTge_          d	dń            Z e!deTge_          d	dǄ            Z e!deTge_          d	dɄ            Z e!deTge_          d	d˄            Z e!deTe_gej                  dd̈́            Z e!deTe_gej                  ddτ            Z e!deTe_gej                  ddф            Z e!deTe_gej@                  ddӄ            Z e!deTe_gej@                  ddՄ            Z e!deTge_          dӐdׄ            Z e!deTge_          dӐdل            Z e!deTge_          dӐdۄ            Z e!deTge_          dӐd݄            Z e!deTge_          dӐd߄            Z e!deTge_          dӐd            Z e!deTge_          dӐd            Z e!deTgej@                  d
d            Z e!deTgej@                  d
d            Z e!deTge_          d	d            Z e!deTge_          d	d            Z e!deTge_          d	d            Z e!deTge_          d	d            Z e!deTge_          d	d            Z e!deTge_          d	d            Z e!deTge_          d	d            Z e!deTej        ej]        eaej]        ej@        ej@        gej]                  dd            Z e!deTe_ej        ej]        ej]        ej@        gej]                  dd            Z e!deT ej`        e_          ej]        ej        ej]        ej@        ej@        gej]                  dd            Z e!d	ej         ej`        e          ejF        ej@        ej        ej]        gej]                  dd            Z e!d ej`        ej                  ejF        gej]                  dd            ZejA        Z G d dej                  Z G d dej                  Ze
r
ee         Z ej`        e          Z ej?        ej        e          Z ej?        dee_          Z ej?        dee          Z ej?        de          Z ej?        ee          Z ej?        de          Zdߐefdefdefdefdefdefge_         e!d ej`        e          ege          dd"            Z e!d#egej                  dd%            Z e!d&ee_gd          dd'            Z e!d(eegd          dd+            Z e!d,egd          dd-            Z e!d.ege          dd/            Z e!d0egd          dd1            Z e!d2ege          dd3            Z e!d4eegd          dd6            Z e!d7eej]        ge          dd8            Z e!d9egej                  dd:            Z e!d;eej]        ge          dd<            Z e!d=g e          dd>            Z e!d?ej        ge          ddA            Z e!dBg e          ddC            Z e!dDej]        ge          ddF            Z e!dGej        ejF        ge          ddJ            Z e!dKej        ejF        ge          ddL            Z e!dMej        ejF        ge          ddN            Z e!dOej        ge          ddQ            Z e!dRej        ej        ej        ge          ddT            Z e!dUej        ej        ejF        ej        ge          d dV            Z e!dWej        ge          d!dY            Z e!dZej]        ej        ej        ej        ej]        ge          d"d_            Z e!d`ej        ej        ej        ge          d#da            Z e!dbeTej        ej        ge          d$de            Z e!dfeTej        ej         ej`        ej                  ejF         ej`        e_          ejF        ge          d%dk            Z e!dleTej        ej         ej`        ej                  ejF         ej`        e_          ejF        ge          d&do            Z e!dpej]        ej        ej        ej        ge          d'du            Z e!dveTej]        ej        ej        ej]        ej]         ej`        ej                  ejF        ge          d(d}            Z e!d~ej]        ej]        ege          d)d            Z e!deTge          d*d            Z e!degej                  d+d            Z e!deeXej]        ge_          d,d            Z e!dej        ejF        ej        ej        ej        gej                  d-d            Z e!dej        ejF        ej        ej        ej        gej                  d.d            Z e!dg ej                  d/d            Z e!dejA        ejA        gd          d0d            Z G d dej                  Z G d dej                  Z e!deXgeܦ          d1d            Z e!deXgd          dǐd            Z e!deXgd          dǐd            Z e!degeݦ          d2d            Z e!degd          d3d            Z e!degd          d3d            Z ej?        ej@        ejA        ejA                  Z e!dejA        ejA        gej@                  d4d            Z G d dej                  Z e!deXeVegd          d5d            Z e!deXejA        ejA        ejA        ej        ejA        ejA        gd          d6d            ZdS (7      )annotationsN)CallableUnionNewTypeOptionalTYPE_CHECKING)load_shared_librarybyref"ctypes_function_for_shared_library)CtypesCDataCtypesArrayCtypesPointerCtypesVoidPointer	CtypesRefCtypesPointerOrRefCtypesFuncPointerllamaLLAMA_CPP_LIB_PATHlib                  	   
                                                               l    ialgginsggiqsggllama_vocab_pllama_model_pllama_context_pllama_memory_tllama_kv_cache_p             !   "   #   @         i   $   %   &   i   c                  `    e Zd ZU dZerded<   ded<   ded<   defdej        fdej        fgZ	dS )llama_token_datazUsed to store token data

    Attributes:
        id (llama_token): token id
        logit (float): log-odds of the token
        p (float): probability of the tokenllama_tokenidfloatlogitpN
__name__
__module____qualname____doc__r   __annotations__rG   ctypesc_float_fields_     g/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/llama_cpp/llama_cpp.pyrF   rF     sd         / /   
{	&.!	fnHHHrV   rF   c                  z    e Zd ZU dZerded<   ded<   ded<   ded<   defdej        fdej	        fdej
        fgZd	S )
llama_token_data_arraya  Used to sample tokens given logits

    Attributes:
        data (ctypes.Array[llama_token_data]): token data
        size (int): size of the array
        selected (int): index in the data array (i.e. not the token id)
        sorted (bool): whether the array is sortedzCtypesArray[llama_token_data]dataintsizeselectedboolsortedN)rM   rN   rO   rP   r   rQ   llama_token_data_prR   c_size_tc_int64c_boolrT   rU   rV   rW   rY   rY      s{         6 6  ++++			 
#$	!	V^$	6=!	HHHrV   rY   c                  x   e Zd ZU dZer#ded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   dej        fd ej        e	          fd ej        ej
                  fd	 ej        e          fd ej        ej                  fd ej         ej        e                    fd ej        ej                  fgZdS )llama_batcha0  Input data for llama_encode/llama_decode

    A llama_batch object can contain input about one or many sequences

    The provided arrays (i.e. token, embd, pos, etc.) must have size of n_tokens

    Attributes:
        n_tokens (int): number of tokens
        token (ctypes.Array[llama_token]): the token ids of the input (used when embd is NULL)
        embd (ctypes.Array[ctypes.ctypes.c_float]): token embeddings (i.e. float vector of size n_embd) (used when token is NULL)
        pos (ctypes.Array[ctypes.Array[llama_pos]]): the positions of the respective token in the sequence
        seq_id (ctypes.Array[ctypes.Array[llama_seq_id]]): the sequence to which the respective token belongs
        logits (ctypes.Array[ctypes.ctypes.c_int8]): if zero, the logits for the respective token will not be output
    r[   n_tokensCtypesArray[llama_token]tokenCtypesArray[ctypes.c_float]embdz#CtypesArray[CtypesArray[llama_pos]]poszCtypesArray[ctypes.c_int]n_seq_idz&CtypesArray[CtypesArray[llama_seq_id]]seq_idzCtypesArray[ctypes.c_int8]logitsN)rM   rN   rO   rP   r   rQ   rR   c_int32POINTERrG   rS   	llama_posllama_seq_idc_int8rT   rU   rV   rW   re   re   9  s           +''''))))0000++++6666**** 
V^$	.&.--.	//0	y))*	^V^FN334	>6>.&.">">??@	>6>&-001HHHrV   re   c                      e Zd ZU dej        fdej        fdej        fdej        dz  fgZe	rde
d<   de
d<   de
d<   d	e
d<   d
S d
S )llama_model_kv_override_valueval_i64val_f64val_boolval_strr@   r[   rI   r^   bytesN)rM   rN   rO   rR   rb   c_doublerc   c_charrT   r   rQ   rU   rV   rW   ru   ru   v  s|         	FN#	FO$	V]#	FMC'(	H  	 rV   ru   c                  f    e Zd ZU dej        fdej        dz  fdefgZerde	d<   de	d<   de	d<   dS dS )	llama_model_kv_overridetagkeyr@   valuer[   rz   zUnion[int, float, bool, bytes]N)
rM   rN   rO   rR   c_intr|   ru   rT   r   rQ   rU   rV   rW   r~   r~     sm         		#$	/0H  .


------. .rV   r~   c                     e Zd ZU dZerFded<   ded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   dej        fdej        fdej        fdej	        fd	ej        fd ej
        ej                  fdefdej        fd ej
        e          fdej        fdej        fdej        fdej        fdej        fgZdS )llama_model_paramsa  Parameters for llama_model

    Attributes:
        devices (ctypes.Array[ggml_backend_dev_t]): NULL-terminated list of devices to use for offloading (if NULL, all available devices are used)
        tensor_buft_overrides (ctypes.Array[llama_model_tensor_buft_override]): NULL-terminated list of buffer types to use for tensors that match a pattern
        n_gpu_layers (int): number of layers to store in VRAM
        split_mode (int): how to split the model across multiple GPUs
        main_gpu (int): the GPU that is used for the entire model when split_mode is LLAMA_SPLIT_MODE_NONE
        tensor_split (ctypes.Array[ctypes.ctypes.c_float]): proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
        progress_callback (llama_progress_callback): called with a progress value between 0.0 and 1.0. Pass NULL to disable. If the provided progress_callback returns true, model loading continues. If it returns false, model loading is immediately aborted.
        progress_callback_user_data (ctypes.ctypes.c_void_p): context pointer passed to the progress callback
        kv_overrides (ctypes.Array[llama_model_kv_override]): override key-value pairs of the model meta data
        vocab_only (bool): only load the vocabulary, no weights
        use_mmap (bool): use mmap if possible
        use_mlock (bool): force system to keep model in RAM
        check_tensors (bool): validate model tensor data
        use_extra_bufts (bool): use extra buffer types (used for weight repacking)zCtypesArray[ctypes.c_void_p]devicesz-CtypesArray[llama_model_tensor_buft_override]tensor_buft_overridesr[   n_gpu_layers
split_modemain_gpuri   tensor_splitz(Callable[[float, ctypes.c_void_p], bool]progress_callbackctypes.c_void_pprogress_callback_user_dataz$CtypesArray[llama_model_kv_override]kv_overridesr^   
vocab_onlyuse_mmap	use_mlockcheck_tensorsuse_extra_buftsN)rM   rN   rO   rP   r   rQ   rR   c_void_pro   r   rp   rS   llama_progress_callbackr~   rc   rT   rU   rV   rW   r   r     sm        V V$  ----LLLL1111CCCC4444:::: 
FO$	 &/2	(	v|$	V^$	778	56	&8	(?@@A	v}%	V]#	fm$	&-(	FM*HHHrV   r   c                     e Zd ZU dZerded<   ded<   ded<   ded<   ded<   ded<   ded	<   ded
<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded <   ded!<   ded"<   ded#<   ded$<   ded%<   dej        fdej        fdej        fdej        fdej        fdej        fd	ej	        fd
ej	        fdej	        fdej
        fdej
        fdej
        fdej
        fdej
        fdej
        fdej        fdej
        fdefdej        fdej	        fdej	        fdefdej        fdej        fd ej        fd!ej        fd"ej        fd#ej        fd$ej        fd%ej        fgZd&S )'llama_context_paramsa  Parameters for llama_context

    Attributes:
        n_ctx (int): text context, 0 = from model
        n_batch (int): logical maximum batch size that can be submitted to llama_decode
        n_ubatch (int): physical maximum batch size
        n_seq_max (int): max number of sequences (i.e. distinct states for recurrent models)
        n_threads (int): number of threads to use for generation
        n_threads_batch (int): number of threads to use for batch processing
        rope_scaling_type (int): RoPE scaling type, from `enum llama_rope_scaling_type`
        pooling_type (int): whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)
        attention_type (int): attention type to use for embeddings
        rope_freq_base (float): RoPE base frequency, 0 = from model
        rope_freq_scale (float): RoPE frequency scaling factor, 0 = from model
        yarn_ext_factor (float): YaRN extrapolation mix factor, negative = from model
        yarn_attn_factor (float): YaRN magnitude scaling factor
        yarn_beta_fast (float): YaRN low correction dim
        yarn_beta_slow (float): YaRN high correction dim
        yarn_orig_ctx (int): YaRN original context size
        defrag_thold (float): defragment the KV cache if holes/size > thold, <= 0 disabled (default)
        cb_eval (ggml_backend_sched_eval_callback): callback for scheduling eval
        cb_eval_user_data (ctypes.ctypes.c_void_p): user data for cb_eval
        type_k (int): data type for K cache
        type_v (int): data type for V cache
        abort_callback (ggml_abort_callback): abort callback if it returns true, execution of llama_decode() will be aborted
        abort_callback_data (ctypes.ctypes.c_void_p): data for abort_callback
        embeddings (bool): if true, extract embeddings (together with logits)
        offload_kqv (bool): whether to offload the KQV ops (including the KV cache) to GPU
        flash_attn (bool): whether to use flash attention
        no_perf (bool): whether to measure performance timings
        op_offload (bool): offload host tensor operations to device
        swa_full (bool): use full-size SWA cache
        kv_unified (bool): use a unified buffer across the input sequences when computing the attention
    r[   n_ctxn_batchn_ubatch	n_seq_max	n_threadsn_threads_batchrope_scaling_typepooling_typeattention_typerI   rope_freq_baserope_freq_scaleyarn_ext_factoryarn_attn_factoryarn_beta_fastyarn_beta_slowyarn_orig_ctxdefrag_tholdz'Callable[[ctypes.c_void_p, bool], bool]cb_evalr   cb_eval_user_datatype_ktype_vz!Callable[[ctypes.c_void_p], bool]abort_callbackabort_callback_datar^   
embeddingsoffload_kqv
flash_attnno_perf
op_offloadswa_full
kv_unifiedN)rM   rN   rO   rP   r   rQ   rR   c_uint32ro   r   rS    ggml_backend_sched_eval_callbackr   ggml_abort_callbackrc   rT   rU   rV   rW   r   r      s        ! !F  


8888****9999,,,, 
&/"	FO$	V_%	fo&	fn%	FN+	fl+	&	6<(	6>*	FN+	FN+	V^,	6>*	6>*	&/*	(	45	fo.	6< 	6< 	./	0	v}%	&	v}%	FM"	v}%	V]#	v}%=HHHrV   r   c                  n   e Zd ZU dZerAded<   ded<   ded<   ded<   ded<   ded	<   ded
<   ded<   ded<   ded<   ded<   ded<   ded<   dej        fdej        fdej        fdej        fdej	        fd	ej	        fd
ej	        fdej	        fdej	        fdej
        fdej
        fdej
        fdej
        fgZdS )llama_model_quantize_paramsa  Parameters for llama_model_quantize

    Attributes:
        nthread (int): number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
        ftype (int): quantize to this llama_ftype
        output_tensor_type (int): output tensor type
        token_embedding_type (int): token embeddings tensor type
        allow_requantize (bool): allow quantizing non-f32/f16 tensors
        quantize_output_tensor (bool): quantize output.weight
        only_copy (bool): only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
        pure (bool): quantize all tensors to the default type
        keep_split (bool): quantize to the same number of shards
        imatrix (ctypes.c_void_p): pointer to importance matrix data
        kv_overrides (ctypes.c_void_p): pointer to vector containing overrides
        tensor_types (ctypes.c_void_p): pointer to vector containing tensor types
        prune_layers (ctypes.c_void_p): pointer to vector containing layer indices to prune
    r[   nthreadftypeoutput_tensor_typetoken_embedding_typer^   allow_requantizequantize_output_tensor	only_copypure
keep_splitr   imatrixr   tensor_typesprune_layersN)rM   rN   rO   rP   r   rQ   rR   ro   r   rc   r   rT   rU   rV   rW   r   r     s?         $  &


!!!!$$$$


    %%%%%%%%%%%% 
FN#	&,	v|,	.	V]+	!6=1	fm$		v}%	FO$	)	)	)HHHrV   r   c                  F    e Zd ZU dZer
ded<   ded<   defdej        fgZ	dS )llama_logit_biaszjUsed to store logit bias

    Attributes:
        token (llama_token): token id
        bias (float): biasrG   rh   rI   biasNrL   rU   rV   rW   r   r     sT             
+	 HHHrV   r   c                  6    e Zd ZU dZerded<   dej        fgZdS )llama_sampler_chain_paramszrParameters for llama_sampler_chain

    Attributes:
        no_perf (bool): whether to measure performance timingsr^   r   N)	rM   rN   rO   rP   r   rQ   rR   rc   rT   rU   rV   rW   r   r     sB         B B
   
FM"HHHrV   r   c                  2    e Zd Zdej        fdej        fgZdS )llama_chat_messagerolecontentN)rM   rN   rO   rR   c_char_prT   rU   rV   rW   r   r     s)        	!	FO$HHHrV   r   llama_model_default_paramsreturnc                     dS )z&Get default parameters for llama_modelNrU   rU   rV   rW   r   r     	     CrV   llama_context_default_paramsc                     dS )z(Get default parameters for llama_contextNrU   rU   rV   rW   r   r   !  r   rV   "llama_sampler_chain_default_paramsc                     dS )z.Get default parameters for llama_sampler_chainNrU   rU   rV   rW   r   r   ,  r   rV   #llama_model_quantize_default_paramsc                     dS )z/Get default parameters for llama_model_quantizeNrU   rU   rV   rW   r   r   7  r   rV   llama_backend_initc                     dS )zMInitialize the llama + ggml backend
    Call once at the start of the programNrU   rU   rV   rW   r   r   E  	     CrV   llama_backend_freec                     dS )zACall once at the end of the program - currently only used for MPINrU   rU   rV   rW   r   r   c  r   rV   llama_numa_initnumar[   c                   d S NrU   )r   s    rW   r   r   o  	     CrV   llama_load_model_from_file
path_modelrz   paramsOptional[llama_model_p]c                   d S r   rU   r   r   s     rW   r   r     r   rV   llama_model_load_from_filec                   dS )zLoad the model from a file

    If the file is split into multiple parts, the file name must follow this pattern: <name>-%05d-of-%05d.gguf

    If the split file name does not follow this pattern, use llama_model_load_from_splitsNrU   r   s     rW   r   r     	     CrV   llama_model_load_from_splitspathsList[bytes]n_pathsc                   dS )znLoad the model from multiple splits (support custom naming scheme)

    The paths must be in the correct orderNrU   )r   r   r   s      rW   r   r     	     CrV   llama_model_save_to_filemodelc                   dS )zSave the model to a fileNrU   )r   r   s     rW   r   r     r   rV   llama_free_modelc                   d S r   rU   r   s    rW   r   r     r   rV   llama_model_freec                   d S r   rU   r   s    rW   r   r     r   rV   llama_init_from_modelOptional[llama_context_p]c                   d S r   rU   r   r   s     rW   r   r     r   rV   llama_new_context_with_modelc                   d S r   rU   r   s     rW   r   r     r   rV   
llama_freectxc                   dS )zFrees all allocated memoryNrU   r  s    rW   r  r    r   rV   llama_time_usc                     d S r   rU   rU   rV   rW   r  r  	  r   rV   llama_max_devicesc                     d S r   rU   rU   rV   rW   r  r        CrV   llama_max_parallel_sequencesc                     d S r   rU   rU   rV   rW   r  r    r
  rV   llama_supports_mmapr^   c                     d S r   rU   rU   rV   rW   r  r    r
  rV   llama_supports_mlockc                     d S r   rU   rU   rV   rW   r  r  %  r
  rV   llama_supports_gpu_offloadc                     d S r   rU   rU   rV   rW   r  r  +  r
  rV   llama_supports_rpcc                     d S r   rU   rU   rV   rW   r  r  1  r
  rV   llama_n_ctxc                   d S r   rU   r  s    rW   r  r  7  r
  rV   llama_n_batchc                   d S r   rU   r  s    rW   r  r  =  r
  rV   llama_n_ubatchc                   d S r   rU   r  s    rW   r  r  C  r
  rV   llama_n_seq_maxc                   d S r   rU   r  s    rW   r  r  I  r
  rV   llama_n_ctx_trainc                   d S r   rU   r   s    rW   r  r  O  r
  rV   llama_n_embdc                   d S r   rU   r   s    rW   r  r  U  r
  rV   llama_n_layerc                   d S r   rU   r   s    rW   r!  r!  [  r
  rV   llama_n_headc                   d S r   rU   r   s    rW   r#  r#  a  r
  rV   llama_n_vocabc                   d S r   rU   r   s    rW   r%  r%  g  r
  rV   llama_get_modelc                   d S r   rU   r  s    rW   r'  r'  m  r
  rV   llama_get_memoryOptional[llama_memory_t]c                   dS )zGet the memory for the contextNrU   r  s    rW   r)  r)  s  	     CrV   llama_pooling_typec                   d S r   rU   r  s    rW   r-  r-  z  r
  rV   llama_get_kv_selfOptional[llama_kv_cache_p]c                   dS )z0Get the KV cache for self-attention (DEPRECATED)NrU   r  s    rW   r/  r/    r   rV   llama_model_get_vocabOptional[llama_vocab_p]c                   d S r   rU   r   s    rW   r2  r2    r
  rV   llama_model_rope_typec                   d S r   rU   r   s    rW   r5  r5    r
  rV   llama_model_n_ctx_trainc                   d S r   rU   r   s    rW   r7  r7    r
  rV   llama_model_n_embdc                   d S r   rU   r   s    rW   r9  r9    r
  rV   llama_model_n_layerc                   d S r   rU   r   s    rW   r;  r;    r
  rV   llama_model_n_headc                   d S r   rU   r   s    rW   r=  r=    r
  rV   llama_model_n_head_kvc                   d S r   rU   r   s    rW   r?  r?    r
  rV   llama_model_n_swac                   d S r   rU   r   s    rW   rA  rA    r
  rV   !llama_model_rope_freq_scale_trainrI   c                   d S r   rU   r   s    rW   rC  rC    r
  rV   llama_model_n_cls_outc                   dS )zKReturns the number of classifier outputs (only valid for classifier models)NrU   r   s    rW   rE  rE    r,  rV   llama_model_cls_labeliOptional[bytes]c                   dS )zNReturns label of classifier output by index. Returns None if no label providedNrU   )r   rH  s     rW   rG  rG    r,  rV   llama_vocab_typevocabc                   d S r   rU   rL  s    rW   rK  rK    r
  rV   llama_vocab_n_tokensc                   d S r   rU   rN  s    rW   rO  rO    r
  rV   llama_model_meta_val_strr   Union[ctypes.c_char_p, bytes]bufbuf_sizec                   dS )z*Get metadata value as a string by key nameNrU   )r   r   rS  rT  s       rW   rQ  rQ    	    $ CrV   llama_model_meta_countc                   dS )z*Get the number of metadata key/value pairsNrU   r   s    rW   rW  rW    r,  rV   llama_model_meta_key_by_indexUnion[ctypes.c_int, int](Union[bytes, CtypesArray[ctypes.c_char]]c                   dS )zGet metadata key name by indexNrU   r   rH  rS  rT  s       rW   rY  rY    rV  rV   !llama_model_meta_val_str_by_indexc                   dS )z'Get metadata value as a string by indexNrU   r]  s       rW   r^  r^    rV  rV   llama_model_descUnion[ctypes.c_size_t, int]c                   dS )z&Get a string describing the model typeNrU   )r   rS  rT  s      rW   r`  r`  4  	     CrV   llama_model_sizec                   dS )z?Returns the total size of all the tensors in the model in bytesNrU   r   s    rW   rd  rd  E  r,  rV   llama_model_chat_templatenamec                   dS )zsGet the default chat template. Returns None if not available
    If name is None, returns the default chat templateNrU   )r   rg  s     rW   rf  rf  N  	     CrV   llama_model_n_paramsc                   dS )z3Returns the total number of parameters in the modelNrU   r   s    rW   rj  rj  W  r,  rV   llama_model_has_encoderc                   dS )zOReturns true if the model contains an encoder that requires llama_encode() callNrU   r   s    rW   rl  rl  _  r,  rV   llama_model_has_decoderc                   dS )zNReturns true if the model contains a decoder that requires llama_decode() callNrU   r   s    rW   rn  rn  g  r,  rV   llama_model_decoder_start_tokenc                   dS )zFor encoder-decoder models, this function returns id of the token that must be provided
    to the decoder to start generating output sequence. For other models, it returns -1.
    NrU   r   s    rW   rp  rp  p  r   rV   llama_model_is_recurrentc                   dS )z?Returns true if the model is recurrent (like Mamba, RWKV, etc.)NrU   r   s    rW   rr  rr  |  r,  rV   llama_model_is_diffusionc                   dS )zFReturns true if the model is diffusion-based (like LLaDA, Dream, etc.)NrU   r   s    rW   rt  rt    r,  rV   llama_model_quantize	fname_inp	fname_out/CtypesPointerOrRef[llama_model_quantize_params]c                   dS )zReturns 0 on successNrU   )rw  rx  r   s      rW   rv  rv    	      CrV   llama_adapter_lora_init	path_loraOptional[llama_adapter_lora_p]c                   d S r   rU   )r   r}  s     rW   r|  r|    r   rV   llama_adapter_lora_freeadapterllama_adapter_lora_pc                   d S r   rU   )r  s    rW   r  r    r   rV   llama_set_adapter_lorascalec                   dS )zRAdd a loaded LoRA adapter to given context
    This will not modify model's weightNrU   )r  r  r  s      rW   r  r    	     CrV   llama_rm_adapter_lorac                   dS )zlRemove a specific LoRA adapter from given context
    Return -1 if the adapter is not present in the contextNrU   )r  r  s     rW   r  r    r  rV   llama_clear_adapter_lorac                   dS )z+Remove all LoRA adapters from given contextNrU   r  s    rW   r  r    r   rV   llama_apply_adapter_cvecrZ   "CtypesPointerOrRef[ctypes.c_float]lenn_embdil_startil_endc                   dS )a  Apply a loaded control vector to a llama_context, or if data is NULL, clear
    the currently loaded vector.
    n_embd should be the size of a single layer's control, and data should point
    to an n_embd x n_layers buffer starting from layer 1.
    il_start and il_end are the layer range the vector should apply to (both inclusive)
    See llama_control_vector_load in common to load a control vector.NrU   )r  rZ   r  r  r  r  s         rW   r  r    s	    6 CrV   llama_memory_clearmemc                   dS )zoClear the memory contents
    If data == true, the data buffers will also be cleared together with the metadataNrU   )r  rZ   s     rW   r  r  )  r   rV   llama_memory_seq_rmrm   Union[llama_seq_id, int]p0Union[llama_pos, int]p1c                   dS )a  Removes all tokens that belong to the specified sequence and have positions in [p0, p1)

    Returns false if a partial sequence cannot be removed. Removing a whole sequence never fails

    seq_id < 0 : match any sequence
    p0 < 0     : [0,  p1]
    p1 < 0     : [p0, inf)NrU   )r  rm   r  r  s       rW   r  r  >  s	    0 CrV   llama_memory_seq_cp
seq_id_src
seq_id_dstc                   dS )zvCopy all tokens that belong to the specified sequence to another sequence
    p0 < 0 : [0,  p1]
    p1 < 0 : [p0, inf)NrU   )r  r  r  r  r  s        rW   r  r  b  	    , CrV   llama_memory_seq_keepc                   dS )z?Removes all tokens that do not belong to the specified sequenceNrU   r  rm   s     rW   r  r    	    
 CrV   llama_memory_seq_adddeltac                   dS )zAdds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
    p0 < 0 : [0,  p1]
    p1 < 0 : [p0, inf)NrU   )r  rm   r  r  r  s        rW   r  r    r  rV   llama_memory_seq_divdc                   dS )zcInteger division of the positions by factor of `d > 1`
    p0 < 0 : [0,  p1]
    p1 < 0 : [p0, inf)NrU   )r  rm   r  r  r  s        rW   r  r    r  rV   llama_memory_seq_pos_minc                   dS )zReturns the smallest position present in the memory for the specified sequence
    This is typically non-zero only for SWA caches
    Return -1 if the sequence is emptyNrU   r  s     rW   r  r    	     CrV   llama_memory_seq_pos_maxc                   dS )ztReturns the largest position present in the memory for the specified sequence
    Return -1 if the sequence is emptyNrU   r  s     rW   r  r    r   rV   llama_memory_can_shiftc                   dS )z%Check if the memory supports shiftingNrU   )r  s    rW   r  r    r,  rV   llama_kv_self_n_tokensc                   dS )zTReturns the number of tokens in the KV cache (slow, use only for debug) (DEPRECATED)NrU   r  s    rW   r  r    r  rV   llama_kv_self_used_cellsc                   dS )z0Returns the number of used KV cells (DEPRECATED)NrU   r  s    rW   r  r  
  r  rV   llama_kv_self_clearc                   dS )zClear the KV cache (DEPRECATED)NrU   r  s    rW   r  r    r  rV   llama_kv_self_seq_rmc                   dS )z(Remove tokens from KV cache (DEPRECATED)NrU   )r  rm   r  r  s       rW   r  r  )  rV  rV   llama_kv_self_seq_cpc                   dS )z$Copy tokens in KV cache (DEPRECATED)NrU   )r  r  r  r  r  s        rW   r  r  I  	    ( CrV   llama_kv_self_seq_keepc                   dS )z5Keep only specified sequence in KV cache (DEPRECATED)NrU   r  rm   s     rW   r  r  e  r  rV   llama_kv_self_seq_addc                   dS )z8Add delta to sequence positions in KV cache (DEPRECATED)NrU   )r  rm   r  r  r  s        rW   r  r  y  r  rV   llama_kv_self_seq_divc                   dS )z2Divide sequence positions in KV cache (DEPRECATED)NrU   )r  rm   r  r  r  s        rW   r  r    r  rV   llama_kv_self_seq_pos_minc                   dS )zCReturns the smallest position in KV cache for sequence (DEPRECATED)NrU   r  s     rW   r  r    r   rV   llama_kv_self_seq_pos_maxc                   dS )zBReturns the largest position in KV cache for sequence (DEPRECATED)NrU   r  s     rW   r  r    r   rV   llama_kv_self_defragc                   dS )z$Defragment the KV cache (DEPRECATED)NrU   r  s    rW   r  r    r,  rV   llama_kv_self_can_shiftc                   dS )z<Check if the context supports KV cache shifting (DEPRECATED)NrU   r  s    rW   r  r    r,  rV   llama_kv_self_updatec                   dS )z'Apply the KV cache updates (DEPRECATED)NrU   r  s    rW   r  r    r,  rV   llama_state_get_sizec                   dS )zNReturns the *actual* size in bytes of the state (logits, embedding and memory)NrU   r  s    rW   r  r    r,  rV   llama_get_state_sizec                   dS )z3Returns the size in bytes of the state (DEPRECATED)NrU   r  s    rW   r  r  	  r,  rV   llama_state_get_datadstCtypesArray[ctypes.c_uint8]r\   c                   dS )zCopies the state to the specified destination address.
    Destination needs to have allocated enough memory.
    Returns the number of bytes copiedNrU   )r  r  r\   s      rW   r  r  	  rV  rV   llama_copy_state_datac                   dS )zBCopies the state to the specified destination address (DEPRECATED)NrU   )r  r  s     rW   r  r  )	  rc  rV   llama_state_set_datasrcc                   dS )zUSet the state reading from the specified address
    Returns the number of bytes readNrU   )r  r  r\   s      rW   r  r  >	  r   rV   llama_set_state_datac                   dS )z=Set the state reading from the specified address (DEPRECATED)NrU   )r  r  s     rW   r  r  R	  r  rV   llama_state_load_filepath_session
tokens_outrg   n_token_capacityn_token_count_out#CtypesPointerOrRef[ctypes.c_size_t]c                   d S r   rU   r  r  r  r  r  s        rW   r  r  e	  	    & CrV   llama_load_session_filec                   d S r   rU   r  s        rW   r  r  	  r  rV   llama_state_save_filetokensn_token_countc                   d S r   rU   r  r  r  r  s       rW   r  r  	  	    " CrV   llama_save_session_filec                   d S r   rU   r  s       rW   r  r  	  r  rV   llama_state_seq_get_sizerr   c                   dS )z@Get the exact size needed to copy the state of a single sequenceNrU   r  s     rW   r  r  	  r   rV   llama_state_seq_get_datac                   dS )z=Copy the state of a single sequence into the specified bufferNrU   )r  r  r\   rm   s       rW   r  r  	  rV  rV   llama_state_seq_set_datadest_seq_idc                   dS )z2Copy the sequence data into the specified sequenceNrU   )r  r  r\   r  s       rW   r  r  	  rV  rV   llama_state_seq_save_filefilepathc                   d S r   rU   )r  r  rm   r  r  s        rW   r  r  
  r  rV   llama_state_seq_load_filec                   d S r   rU   )r  r  r  r  r  r  s         rW   r  r  5
  s	    * CrV   llama_batch_get_onerf   c                   dS )zReturn batch for single sequence of tokens

    NOTE: this is a helper function to facilitate transition to the new batch API - avoid using it
    NrU   )r  rf   s     rW   r  r  Z
  r  rV   llama_batch_initUnion[ctypes.c_int32, int]rj   r   c                   dS )a  Allocates a batch of tokens on the heap that can hold a maximum of n_tokens
    Each token can be assigned up to n_seq_max sequence ids
    The batch has to be freed with llama_batch_free()
    If embd != 0, llama_batch.embd will be allocated with size of n_tokens * embd * sizeof(float)
    Otherwise, llama_batch.token will be allocated to store n_tokens llama_token
    The rest of the llama_batch members are allocated with size n_tokens
    All members are left uninitializedNrU   )rf   rj   r   s      rW   r  r  y
  r{  rV   llama_batch_freebatchc                   dS )z9Frees a batch of tokens allocated with llama_batch_init()NrU   )r  s    rW   r  r  
  r,  rV   llama_encodec                   dS )zLProcess a batch of tokens using the encoder.
    0 - success
    < 0 - errorNrU   r  r  s     rW   r  r  
  r  rV   llama_decodec                   dS )aL  Process a batch of tokens.
    0 - success
    1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
    2 - aborted (processed ubatches will remain in the context's memory)
    -1 - invalid input batch
    < -1 - fatal error (processed ubatches will remain in the context's memory)NrU   r  s     rW   r  r  
  r   rV   llama_set_n_threadsr   r   c                   dS )zSet the number of threads used for decoding
    n_threads is the number of threads used for generation (single token)
    n_threads_batch is the number of threads used for prompt and batch processing (multiple tokens)
    NrU   )r  r   r   s      rW   r	  r	  
  r  rV   llama_n_threadsc                   dS )z?Get the number of threads used for generation of a single tokenNrU   r  s    rW   r  r  
  r,  rV   llama_n_threads_batchc                   dS )zOGet the number of threads used for prompt and batch processing (multiple token)NrU   r  s    rW   r  r  
  r,  rV   llama_set_embeddingsr   c                   dS )z1Set whether the context outputs embeddings or notNrU   )r  r   s     rW   r  r  
  r,  rV   llama_set_causal_attncausal_attnc                   dS )zlSet whether to use causal attention or not
    If set to true, the model will only attend to the past tokensNrU   )r  r  s     rW   r  r  
  ri  rV   llama_set_warmupwarmupc                   dS )zSet whether the model is in warmup mode or not
    If true, all model tensors are activated during llama_decode() to load and cache their weights.NrU   )r  r  s     rW   r  r  
  ri  rV   llama_set_abort_callbackr   !Callable[[ctypes.c_void_p], None]r   r   c                   dS )zSet abort callbackNrU   )r  r   r   s      rW   r  r    rc  rV   llama_synchronizec                   dS )zWait until all computations are finished
    This is automatically done when using one of the functions below to obtain the computation results
    and is not necessary to call it explicitly in most casesNrU   r  s    rW   r  r    r  rV   llama_get_logitsri   c                   dS )aZ  Token logits obtained from the last call to llama_decode()
    The logits for which llama_batch.logits[i] != 0 are stored contiguously
    in the order they have appeared in the batch.
    Rows: number of tokens for which llama_batch.logits[i] != 0
    Cols: n_vocab

    Returns:
        Pointer to the logits buffer of shape (n_tokens, n_vocab)NrU   r  s    rW   r  r  *  rc  rV   llama_get_logits_ithc                   dS )zNLogits for the ith token. Equivalent to:
    llama_get_logits(ctx) + i*n_vocabNrU   r  rH  s     rW   r  r  >  r  rV   llama_get_embeddingsc                   dS )zDGet the embeddings for the input
    shape: [n_embd] (1-dimensional)NrU   r  s    rW   r!  r!  S  r   rV   llama_get_embeddings_ithc                   dS )zPGet the embeddings for the ith sequence
    llama_get_embeddings(ctx) + i*n_embdNrU   r   s     rW   r#  r#  b  r  rV   llama_get_embeddings_seqc                   dS )zGet the embeddings for a sequence id
    Returns NULL if pooling_type is LLAMA_POOLING_TYPE_NONE
    shape: [n_embd] (1-dimensional)NrU   r  s     rW   r%  r%  t  r   rV   llama_vocab_get_textrh   Union[llama_token, int]c                   d S r   rU   rL  rh   s     rW   r'  r'    r   rV   llama_vocab_get_scorec                   d S r   rU   r*  s     rW   r+  r+    r   rV   llama_vocab_get_attrc                   d S r   rU   r*  s     rW   r-  r-    r   rV   llama_vocab_is_eogc                   dS )zXCheck if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)NrU   r*  s     rW   r/  r/    r  rV   llama_vocab_is_controlc                   dS )z>Identify if Token Id is a control token or a render-able tokenNrU   r*  s     rW   r1  r1    r   rV   llama_vocab_bosrG   c                   dS )zbeginning-of-sentenceNrU   rN  s    rW   r3  r3    r,  rV   llama_vocab_eosc                   dS )zend-of-sentenceNrU   rN  s    rW   r5  r5    r,  rV   llama_vocab_eotc                   dS )zend-of-turnNrU   rN  s    rW   r7  r7    r,  rV   llama_vocab_sepc                   dS )zsentence separatorNrU   rN  s    rW   r9  r9    r,  rV   llama_vocab_nlc                   dS )z	next-lineNrU   rN  s    rW   r;  r;    r,  rV   llama_vocab_padc                   dS )paddingNrU   rN  s    rW   r=  r=    r,  rV   llama_vocab_maskc                   dS )maskNrU   rN  s    rW   r@  r@    r,  rV   llama_vocab_get_add_bosc                   d S r   rU   rN  s    rW   rC  rC    r   rV   llama_vocab_get_add_eosc                   d S r   rU   rN  s    rW   rE  rE    r   rV   llama_vocab_get_add_sepc                   d S r   rU   rN  s    rW   rG  rG    r   rV   llama_vocab_fim_prec                   d S r   rU   rN  s    rW   rI  rI    r   rV   llama_vocab_fim_sufc                   d S r   rU   rN  s    rW   rK  rK    r   rV   llama_vocab_fim_midc                   d S r   rU   rN  s    rW   rM  rM    r   rV   llama_vocab_fim_padc                   d S r   rU   rN  s    rW   rO  rO  )  r   rV   llama_vocab_fim_repc                   d S r   rU   rN  s    rW   rQ  rQ  3  r   rV   llama_vocab_fim_sepc                   d S r   rU   rN  s    rW   rS  rS  =  r   rV   llama_token_get_textc                   d S r   rU   r*  s     rW   rU  rU  H  r   rV   llama_token_get_scorec                   d S r   rU   r*  s     rW   rW  rW  T  r   rV   llama_token_get_attrc                   d S r   rU   r*  s     rW   rY  rY  _  r   rV   llama_token_is_eogc                   d S r   rU   r*  s     rW   r[  r[  j  r   rV   llama_token_is_controlc                   d S r   rU   r*  s     rW   r]  r]  u  r   rV   llama_token_bosc                   d S r   rU   rN  s    rW   r_  r_    r   rV   llama_token_eosc                   d S r   rU   rN  s    rW   ra  ra    r   rV   llama_token_eotc                   d S r   rU   rN  s    rW   rc  rc    r   rV   llama_token_clsc                   d S r   rU   rN  s    rW   re  re    r   rV   llama_token_sepc                   d S r   rU   rN  s    rW   rg  rg    r   rV   llama_token_nlc                   d S r   rU   rN  s    rW   ri  ri    r   rV   llama_token_padc                   d S r   rU   rN  s    rW   rk  rk    r   rV   llama_add_bos_tokenc                   d S r   rU   rN  s    rW   rm  rm    r   rV   llama_add_eos_tokenc                   d S r   rU   rN  s    rW   ro  ro    r   rV   llama_token_fim_prec                   d S r   rU   rN  s    rW   rq  rq    r   rV   llama_token_fim_sufc                   d S r   rU   rN  s    rW   rs  rs    r   rV   llama_token_fim_midc                   d S r   rU   rN  s    rW   ru  ru    r   rV   llama_token_fim_padc                   d S r   rU   rN  s    rW   rw  rw    r   rV   llama_token_fim_repc                   d S r   rU   rN  s    rW   ry  ry    r   rV   llama_token_fim_sepc                   d S r   rU   rN  s    rW   r{  r{    r   rV   llama_vocab_clsc                   d S r   rU   rN  s    rW   r}  r}    r   rV   llama_tokenizetexttext_lenn_tokens_maxadd_specialUnion[ctypes.c_bool, bool]parse_specialc                   dS )a  Convert the provided text into tokens.

    Args:
        vocab: The vocabulary to use for tokenization.
        text: The text to tokenize.
        text_len: The length of the text.
        tokens: The tokens pointer must be large enough to hold the resulting tokens.
        n_max_tokens: The maximum number of tokens to return.
        add_special: Allow adding special tokens if the model is configured to do so.
        parse_special: Allow parsing special tokens.

    Returns:
        Returns the number of tokens on success, no more than n_tokens_max
        Returns a negative number on failure - the number of tokens that would have been returned
    NrU   )rL  r  r  r  r  r  r  s          rW   r  r  ,  s
    L CrV   llama_token_to_piece9Union[ctypes.c_char_p, bytes, CtypesArray[ctypes.c_char]]lengthlstripspecialc                   dS )a(  Token Id -> Piece.
    Uses the vocabulary in the provided context.
    Does not write null terminator to the buffer.
    User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.

    Args:
        vocab: The vocabulary to use for tokenization.
        token: The token to convert.
        buf: The buffer to write the token to.
        length: The length of the buffer.
        lstrip: The number of leading spaces to skip.
        special: If true, special tokens are rendered in the output.NrU   )rL  rh   rS  r  r  r  s         rW   r  r  a  
    B CrV   llama_detokenizetext_len_maxremove_specialunparse_specialc                   dS )a  Convert the provided tokens into text (inverse of llama_tokenize()).

    Args:
        vocab: The vocabulary to use for tokenization.
        tokens: The tokens to convert.
        n_tokens: The number of tokens.
        text: The buffer to write the text to.
        text_len_max: The length of the buffer.
        remove_special: Allow to remove BOS and EOS tokens if model is configured to do so.
        unparse_special: If true, special tokens are rendered in the output.NrU   )rL  r  rf   r  r  r  r  s          rW   r  r    r  rV   llama_chat_apply_templatetmplchatCtypesArray[llama_chat_message]n_msgadd_assc                   dS )aj  Apply chat template.

    Args:
        tmpl: Template to use. If None, uses model's default
        chat: Array of chat messages
        n_msg: Number of messages
        add_ass: Whether to end prompt with assistant token
        buf: Output buffer
        length: Buffer length

    Returns:
        Number of bytes written, or needed if buffer too small
    NrU   )r  r  r  r  rS  r  s         rW   r  r    s
    D CrV   llama_chat_builtin_templatesoutputCtypesArray[bytes]c                   dS )zGet list of built-in chat templates.

    Args:
        output: Output buffer to store template names.
        len: Length of the output buffer.

    Returns:
        Number of templates available.
        Returns a negative number on error.
    NrU   )r  r  s     rW   r  r    	    . CrV   c                      e Zd ZdS )llama_sampler_iN)rM   rN   rO   rU   rV   rW   r  r  !  s        CrV   r  c                  :    e Zd Zd ej        e          fdefgZdS )llama_samplerifacer  N)rM   rN   rO   rR   rp   r  llama_sampler_context_trT   rU   rV   rW   r  r  )  s2        	.&.112	'(HHHrV   r  acceptapplyresetclonefreellama_sampler_initr  ctypes.POINTER(llama_sampler_i)r  llama_sampler_pc                   d S r   rU   )r  r  s     rW   r  r  J  r   rV   llama_sampler_namesmplc                   d S r   rU   r  s    rW   r  r  V  r   rV   llama_sampler_acceptc                   d S r   rU   )r  rh   s     rW   r  r  `  r   rV   llama_sampler_applycur_p#CtypesArray[llama_token_data_array]c                   d S r   rU   )r  r  s     rW   r  r  j  r   rV   llama_sampler_resetc                   d S r   rU   r  s    rW   r  r  v  r   rV   llama_sampler_clonec                   d S r   rU   r  s    rW   r  r    r   rV   llama_sampler_freec                   d S r   rU   r  s    rW   r  r    r   rV   llama_sampler_chain_initc                   d S r   rU   )r   s    rW   r  r    r   rV   llama_sampler_chain_addchainc                   d S r   rU   )r  r  s     rW   r  r    r   rV   llama_sampler_chain_getc                   d S r   rU   r  rH  s     rW   r  r    r   rV   llama_sampler_chain_nc                   d S r   rU   r  s    rW   r  r    r   rV   llama_sampler_chain_removec                   d S r   rU   r  s     rW   r  r    r   rV   llama_sampler_init_greedyc                     d S r   rU   rU   rV   rW   r  r    r
  rV   llama_sampler_init_distseedc                    d S r   rU   )r  s    rW   r  r    r
  rV   llama_sampler_init_softmaxc                     d S r   rU   rU   rV   rW   r  r    r
  rV   llama_sampler_init_top_kkc                    d S r   rU   )r  s    rW   r  r    r
  rV   llama_sampler_init_top_prK   min_keepc                    d S r   rU   rK   r  s     rW   r  r    r   rV   llama_sampler_init_min_pc                    d S r   rU   r  s     rW   r  r    r   rV   llama_sampler_init_typicalc                    d S r   rU   r  s     rW   r  r    r   rV   llama_sampler_init_temptc                    d S r   rU   )r  s    rW   r  r    r
  rV   llama_sampler_init_temp_extexponentc                    d S r   rU   )r  r  r  s      rW   r  r    r   rV   llama_sampler_init_xtcc                   d S r   rU   )rK   r  r  r  s       rW   r  r  %  r   rV   llama_sampler_init_top_n_sigmanc                   d S r   rU   )r  s    rW   r  r  2  r   rV   llama_sampler_init_mirostatn_vocabtauetamc                   d S r   rU   )r  r  r  r  r  s        rW   r  r  B  r   rV   llama_sampler_init_mirostat_v2c                   d S r   rU   )r  r  r  s      rW   r  r  R  r   rV   llama_sampler_init_grammargrammar_strgrammar_rootc                   d S r   rU   )rL  r  r  s      rW   r  r  b  r   rV   llama_sampler_init_grammar_lazytrigger_wordsnum_trigger_wordstrigger_tokensnum_trigger_tokensc                   d S r   rU   )rL  r  r  r  r  r  r  s          rW   r  r  v  r  rV   (llama_sampler_init_grammar_lazy_patternstrigger_patternsnum_trigger_patternsc                   d S r   rU   )rL  r  r  r  r  r  r  s          rW   r  r    r  rV   llama_sampler_init_penaltiespenalty_last_npenalty_repeatpenalty_freqpenalty_presentc                   d S r   rU   )r  r   r  r  s       rW   r  r    rc  rV   llama_sampler_init_dryn_ctx_traindry_multiplierdry_basedry_allowed_lengthdry_penalty_last_nnum_breakersc                   d S r   rU   )rL  r  r  r  r  r	  seq_breakersr
  s           rW   r  r    s	    2 CrV   llama_sampler_init_logit_biasn_logit_bias
logit_biasCtypesArray[llama_logit_bias]c                   d S r   rU   )r  r  r  s      rW   r  r    r   rV   llama_sampler_init_infillc                   d S r   rU   rN  s    rW   r  r    r   rV   llama_sampler_get_seedc                   d S r   rU   r  s    rW   r  r  
  r   rV   llama_sampler_sampleidxc                   d S r   rU   )r  r  r  s      rW   r  r    r   rV   llama_split_path
split_pathmaxlenpath_prefixsplit_nosplit_countc                   dS )z-Build a split GGUF final path for this chunk.NrU   )r  r  r  r  r  s        rW   r  r  &  	     CrV   llama_split_prefixsplit_prefixc                   dS )z^Extract the path prefix from the split_path if and only if the split_no and split_count match.NrU   )r"  r  r  r  r  s        rW   r!  r!  9  r   rV   llama_print_system_infoc                     d S r   rU   rU   rV   rW   r$  r$  L  r
  rV   llama_log_setlog_callbackOptional[CtypesFuncPointer]	user_datac                   dS )z|Set callback for all future logging events.

    If this is not called, or NULL is supplied, everything is output on stderr.NrU   )r'  r)  s     rW   r&  r&  T  r   rV   c                      e Zd Zdej        fdej        fdej        fdej        fdej        fdej        fdej        fgZdS )	llama_perf_context_data
t_start_ms	t_load_mst_p_eval_ms	t_eval_msn_p_evaln_evaln_reusedNrM   rN   rO   rR   r{   ro   rT   rU   rV   rW   r,  r,  r  s[        	v'	fo&	(	fo&	V^$	6>"	V^$HHHrV   r,  c                  2    e Zd Zdej        fdej        fgZdS )llama_perf_sampler_datat_sample_msn_sampleNr4  rU   rV   rW   r6  r6    s)        	(	V^$HHHrV   r6  llama_perf_contextc                   d S r   rU   r  s    rW   r9  r9    r   rV   llama_perf_context_printc                   d S r   rU   r  s    rW   r;  r;    r   rV   llama_perf_context_resetc                   d S r   rU   r  s    rW   r=  r=    r   rV   llama_perf_samplerc                   d S r   rU   r  s    rW   r?  r?    r   rV   llama_perf_sampler_printc                   d S r   rU   r  s    rW   rA  rA    r   rV   llama_perf_sampler_resetc                   d S r   rU   r  s    rW   rC  rC    r   rV   llama_opt_param_filter_alltensoruserdatac                   d S r   rU   )rF  rG  s     rW   rE  rE    r   rV   c                  X    e Zd Zdej        fdefdej        fdej        fdej        fgZdS )llama_opt_paramsr  param_filterparam_filter_udget_opt_parsget_opt_pars_udN)rM   rN   rO   rR   r   llama_opt_param_filterr   rT   rU   rV   rW   rJ  rJ    sF        	(	/0	FO,	)	FO,HHHrV   rJ  llama_opt_initlctxlopt_paramsc                   d S r   rU   )rQ  r   rR  s      rW   rP  rP    r   rV   llama_opt_epochdatasetresult_trainresult_evalidata_splitcallback_traincallback_evalc                   d S r   rU   )rQ  rU  rV  rW  rX  rY  rZ  s          rW   rT  rT    r  rV   )r   r   )r   r   )r   r   )r   r   )r   r[   )r   rz   r   r   r   r   )r   r   r   r[   r   r   r   r   )r   r4   r   rz   )r   r4   )r   r4   r   r   r   r   )r  r5   )r   r[   )r   r^   )r  r5   r   r[   )r   r4   r   r[   )r   r3   r   r[   )r  r5   r   r   )r  r5   r   r*  )r  r5   r   r0  )r   r4   r   r3  )r   r4   r   rI   )r   r4   rH  r[   r   rI  )rL  r3   r   r[   )
r   r4   r   rR  rS  rz   rT  r[   r   r[   )
r   r4   rH  rZ  rS  r[  rT  r[   r   r[   )r   r4   rS  r[  rT  ra  r   r[   )r   r4   rg  rI  r   rI  )r   r4   r   r^   )rw  rz   rx  rz   r   ry  r   r[   )r   r4   r}  rz   r   r~  )r  r  )r  r5   r  r  r  rI   r   r[   )r  r5   r  r  r   r[   )r  r5   rZ   r  r  r[   r  r[   r  r[   r  r[   r   r[   )r  r6   rZ   r^   )
r  r6   rm   r  r  r  r  r  r   r^   )
r  r6   r  r  r  r  r  r  r  r  )r  r6   rm   r  )
r  r6   rm   r  r  r  r  r  r  r  )
r  r6   rm   r  r  r  r  r  r  rZ  )r  r6   rm   r  r   r[   )r  r6   r   r^   )
r  r5   rm   r  r  r  r  r  r   r^   )
r  r5   r  r  r  r  r  r  r  r  )r  r5   rm   r  )
r  r5   rm   r  r  r  r  r  r  r  )
r  r5   rm   r  r  r  r  r  r  rZ  )r  r5   rm   r  r   r[   )r  r5   r   r^   )r  r5   r  r  r\   ra  r   r[   )r  r5   r  r  r   r[   )r  r5   r  r  r\   ra  r   r[   )r  r5   r  r  r   r[   )r  r5   r  rz   r  rg   r  ra  r  r  r   r^   )
r  r5   r  rz   r  rg   r  ra  r   r^   )r  r5   rm   rr   r   r[   )
r  r5   r  r  r\   ra  rm   rr   r   r[   )
r  r5   r  r  r\   ra  r  rr   r   r[   )r  r5   r  rz   rm   rr   r  rg   r  ra  r   r[   )r  r5   r  rz   r  rr   r  rg   r  ra  r  r  r   r[   )r  rg   rf   rZ  r   re   )rf   r  rj   r  r   r  r   re   )r  re   )r  r5   r  re   r   r[   )r  r5   r   r  r   r  )r  r5   r   r^   )r  r5   r  r^   )r  r5   r  r^   )r  r5   r   r  r   r   )r  r5   r   ri   )r  r5   rH  r  r   ri   )r  r5   rm   r  r   ri   )rL  r3   rh   r(  r   rz   )rL  r3   rh   r(  r   rI   )rL  r3   rh   r(  r   r[   )rL  r3   rh   r(  r   r^   )rL  r3   r   rG   )rL  r3   r   r^   )rL  r3   r  rz   r  rZ  r  rg   r  rZ  r  r  r  r  r   r[   )rL  r3   rh   r(  rS  r  r  rZ  r  rZ  r  r  r   r[   )rL  r3   r  rg   rf   rZ  r  rz   r  rZ  r  r  r  r  r   r[   )r  rz   r  r  r  r[   r  r^   rS  rz   r  r[   r   r[   )r  r  r  ra  r   r[   )r  r  r  r  r   r  )r  r  r   rz   )r  r  rh   r(  )r  r  r  r  )r  r  )r  r  r   r  )r   r   r   r  )r  r  r  r  )r  r  rH  r  r   r  )r  r  r   r[   )r   r  )r  r[   r   r  )r  r[   r   r  )rK   rI   r  r[   r   r  )r  rI   r   r  )r  rI   r  rI   r  rI   r   r  )
rK   rI   r  rI   r  r[   r  r[   r   r  )r  rI   r   r  )r  r[   r  r[   r  rI   r  rI   r  r[   r   r  )r  r[   r  rI   r  rI   r   r  )rL  r3   r  rz   r  rz   r   r  )rL  r3   r  rz   r  rz   r  r  r  r[   r  rg   r  r[   r   r  )rL  r3   r  rz   r  rz   r  r  r  r[   r  rg   r  r[   r   r  )
r  r[   r   rI   r  rI   r  rI   r   r  )rL  r3   r  r[   r  rI   r  rI   r  r[   r	  r[   r
  r[   r   r  )r  r[   r  r[   r  r  r   r  )rL  r3   r   r  )r  r  r   r[   )r  r  r  r5   r  r[   r   r[   )r  rz   r  ra  r  rz   r  rZ  r  rZ  r   r[   )r"  rz   r  ra  r  rz   r  rZ  r  rZ  r   r[   )r   rz   )r'  r(  r)  r   )r  r5   r   r,  )r  r  r   r6  )r  r  )rF  r   rG  r   r   r^   )rQ  r5   r   r4   rR  rJ  )rQ  r5   rU  r   rV  r   rW  r   rX  r[   rY  r   rZ  r   (  
__future__r   osrR   pathlibtypingr   r   r   r   r   llama_cpp._ctypes_extensionsr	   r
   r   r   r   r   r   r   r   r   _lib_base_nameenvironget_override_base_pathPathpathabspathdirname__file__
_base_path_libctypes_functionGGML_TYPE_F32GGML_TYPE_F16GGML_TYPE_Q4_0GGML_TYPE_Q4_1GGML_TYPE_Q5_0GGML_TYPE_Q5_1GGML_TYPE_Q8_0GGML_TYPE_Q8_1GGML_TYPE_Q2_KGGML_TYPE_Q3_KGGML_TYPE_Q4_KGGML_TYPE_Q5_KGGML_TYPE_Q6_KGGML_TYPE_Q8_KGGML_TYPE_IQ2_XXSGGML_TYPE_IQ2_XSGGML_TYPE_IQ3_XXSGGML_TYPE_IQ1_SGGML_TYPE_IQ4_NLGGML_TYPE_IQ3_SGGML_TYPE_IQ2_SGGML_TYPE_IQ4_XSGGML_TYPE_I8GGML_TYPE_I16GGML_TYPE_I32GGML_TYPE_I64GGML_TYPE_F64GGML_TYPE_IQ1_MGGML_TYPE_COUNT	CFUNCTYPErc   r   r   r   r  argtypesra   restypeLLAMA_MAX_DEVICESLLAMA_DEFAULT_SEEDLLAMA_TOKEN_NULLLLAMA_FILE_MAGIC_GGLALLAMA_FILE_MAGIC_GGSNLLAMA_FILE_MAGIC_GGSQLLAMA_SESSION_MAGICLLAMA_SESSION_VERSIONLLAMA_STATE_SEQ_MAGICLLAMA_STATE_SEQ_VERSIONr[   r3   llama_vocab_p_ctypesr4   llama_model_p_ctypesr5   llama_context_p_ctypesr6   llama_memory_t_ctypesr7   llama_kv_cache_p_ctypesro   rq   rG   rp   llama_token_prr   LLAMA_VOCAB_TYPE_NONELLAMA_VOCAB_TYPE_SPMLLAMA_VOCAB_TYPE_BPELLAMA_VOCAB_TYPE_WPMLLAMA_VOCAB_TYPE_UGMLLAMA_VOCAB_TYPE_RWKVLLAMA_VOCAB_TYPE_PLAMO2LLAMA_VOCAB_PRE_TYPE_DEFAULTLLAMA_VOCAB_PRE_TYPE_LLAMA3!LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM#LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODERLLAMA_VOCAB_PRE_TYPE_FALCONLLAMA_VOCAB_PRE_TYPE_MPTLLAMA_VOCAB_PRE_TYPE_STARCODERLLAMA_VOCAB_PRE_TYPE_GPT2LLAMA_VOCAB_PRE_TYPE_REFACTLLAMA_VOCAB_PRE_TYPE_COMMAND_RLLAMA_VOCAB_PRE_TYPE_STABLELM2LLAMA_VOCAB_PRE_TYPE_QWEN2LLAMA_VOCAB_PRE_TYPE_OLMOLLAMA_VOCAB_PRE_TYPE_DBRXLLAMA_VOCAB_PRE_TYPE_SMAUGLLAMA_VOCAB_PRE_TYPE_POROLLAMA_VOCAB_PRE_TYPE_CHATGLM3LLAMA_VOCAB_PRE_TYPE_CHATGLM4LLAMA_VOCAB_PRE_TYPE_VIKINGLLAMA_VOCAB_PRE_TYPE_JAISLLAMA_VOCAB_PRE_TYPE_TEKKENLLAMA_VOCAB_PRE_TYPE_SMOLLMLLAMA_VOCAB_PRE_TYPE_CODESHELLLLAMA_VOCAB_PRE_TYPE_BLOOM!LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISHLLAMA_VOCAB_PRE_TYPE_EXAONELLAMA_VOCAB_PRE_TYPE_CHAMELEONLLAMA_VOCAB_PRE_TYPE_MINERVA"LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLMLLAMA_VOCAB_PRE_TYPE_GPT4OLLAMA_VOCAB_PRE_TYPE_SUPERBPELLAMA_VOCAB_PRE_TYPE_TRILLIONLLAMA_VOCAB_PRE_TYPE_BAILINGMOELLAMA_VOCAB_PRE_TYPE_LLAMA4LLAMA_VOCAB_PRE_TYPE_PIXTRALLLAMA_VOCAB_PRE_TYPE_SEED_CODERLLAMA_ROPE_TYPE_NONELLAMA_ROPE_TYPE_NORMLLAMA_ROPE_TYPE_NEOXGGML_ROPE_TYPE_NEOXLLAMA_ROPE_TYPE_MROPEGGML_ROPE_TYPE_MROPELLAMA_ROPE_TYPE_VISIONGGML_ROPE_TYPE_VISIONLLAMA_TOKEN_TYPE_UNDEFINEDLLAMA_TOKEN_TYPE_NORMALLLAMA_TOKEN_TYPE_UNKNOWNLLAMA_TOKEN_TYPE_CONTROLLLAMA_TOKEN_TYPE_USER_DEFINEDLLAMA_TOKEN_TYPE_UNUSEDLLAMA_TOKEN_TYPE_BYTELLAMA_TOKEN_ATTR_UNDEFINEDLLAMA_TOKEN_ATTR_UNKNOWNLLAMA_TOKEN_ATTR_UNUSEDLLAMA_TOKEN_ATTR_NORMALLLAMA_TOKEN_ATTR_CONTROLLLAMA_TOKEN_ATTR_USER_DEFINEDLLAMA_TOKEN_ATTR_BYTELLAMA_TOKEN_ATTR_NORMALIZEDLLAMA_TOKEN_ATTR_LSTRIPLLAMA_TOKEN_ATTR_RSTRIPLLAMA_TOKEN_ATTR_SINGLE_WORDLLAMA_FTYPE_ALL_F32LLAMA_FTYPE_MOSTLY_F16LLAMA_FTYPE_MOSTLY_Q4_0LLAMA_FTYPE_MOSTLY_Q4_1LLAMA_FTYPE_MOSTLY_Q8_0LLAMA_FTYPE_MOSTLY_Q5_0LLAMA_FTYPE_MOSTLY_Q5_1LLAMA_FTYPE_MOSTLY_Q2_KLLAMA_FTYPE_MOSTLY_Q3_K_SLLAMA_FTYPE_MOSTLY_Q3_K_MLLAMA_FTYPE_MOSTLY_Q3_K_LLLAMA_FTYPE_MOSTLY_Q4_K_SLLAMA_FTYPE_MOSTLY_Q4_K_MLLAMA_FTYPE_MOSTLY_Q5_K_SLLAMA_FTYPE_MOSTLY_Q5_K_MLLAMA_FTYPE_MOSTLY_Q6_KLLAMA_FTYPE_MOSTLY_IQ2_XXSLLAMA_FTYPE_MOSTLY_IQ2_XSLLAMA_FTYPE_MOSTLY_Q2_K_SLLAMA_FTYPE_MOSTLY_IQ3_XSLLAMA_FTYPE_MOSTLY_IQ3_XXSLLAMA_FTYPE_MOSTLY_IQ1_SLLAMA_FTYPE_MOSTLY_IQ4_NLLLAMA_FTYPE_MOSTLY_IQ3_SLLAMA_FTYPE_MOSTLY_IQ3_MLLAMA_FTYPE_MOSTLY_IQ2_SLLAMA_FTYPE_MOSTLY_IQ2_MLLAMA_FTYPE_MOSTLY_IQ4_XSLLAMA_FTYPE_MOSTLY_IQ1_MLLAMA_FTYPE_MOSTLY_BF16LLAMA_FTYPE_MOSTLY_TQ1_0LLAMA_FTYPE_MOSTLY_TQ2_0LLAMA_FTYPE_MOSTLY_MXFP4_MOELLAMA_FTYPE_GUESSED#LLAMA_ROPE_SCALING_TYPE_UNSPECIFIEDLLAMA_ROPE_SCALING_TYPE_NONELLAMA_ROPE_SCALING_TYPE_LINEARLLAMA_ROPE_SCALING_TYPE_YARN LLAMA_ROPE_SCALING_TYPE_LONGROPE!LLAMA_ROPE_SCALING_TYPE_MAX_VALUELLAMA_POOLING_TYPE_UNSPECIFIEDLLAMA_POOLING_TYPE_NONELLAMA_POOLING_TYPE_MEANLLAMA_POOLING_TYPE_CLSLLAMA_POOLING_TYPE_LASTLLAMA_POOLING_TYPE_RANK LLAMA_ATTENTION_TYPE_UNSPECIFIEDLLAMA_ATTENTION_TYPE_CAUSALLLAMA_ATTENTION_TYPE_NON_CAUSALLLAMA_SPLIT_MODE_NONELLAMA_SPLIT_MODE_LAYERLLAMA_SPLIT_MODE_ROW	StructurerF   r`   rY   llama_token_data_array_prS   r   re   LLAMA_KV_OVERRIDE_TYPE_INTLLAMA_KV_OVERRIDE_TYPE_FLOATLLAMA_KV_OVERRIDE_TYPE_BOOLLLAMA_KV_OVERRIDE_TYPE_STRru   r~   r   r   r   r   llama_log_callbackr   r   llama_logit_bias_pr   r   r  llama_adapter_lora_p_ctypesr   r   r   r   r   GGML_NUMA_STRATEGY_DISABLEDGGML_NUMA_STRATEGY_DISTRIBUTEGGML_NUMA_STRATEGY_ISOLATEGGML_NUMA_STRATEGY_NUMACTLGGML_NUMA_STRATEGY_MIRRORGGML_NUMA_STRATEGY_COUNTr   r   r   r   r   r   r   r   r   r   r  rb   r  r  r  r  r  r  r   r  r  r  r  r  r  r!  r#  r%  r'  r)  r-  r/  r2  r5  r7  r9  r;  r=  r?  rA  rC  rE  rG  rK  rO  rQ  rW  rY  r^  r`  c_uint64rd  rf  rj  rl  rn  rp  rr  rt  rv  r|  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  c_uint8r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r	  r  r  r  r  r  r  r  r  r  r!  r#  r%  r'  r+  r-  r/  r1  r3  r5  r7  r9  r;  r=  r@  rC  rE  rG  rI  rK  rM  rO  rQ  rS  rU  rW  rY  r[  r]  r_  ra  rc  re  rg  ri  rk  rm  ro  rq  rs  ru  rw  ry  r{  r}  r  r  r  r  r  r  r  r  r  llama_sampler_p_ctypesllama_sampler_i_namellama_sampler_i_acceptllama_sampler_i_applyllama_sampler_i_resetllama_sampler_i_clonellama_sampler_i_freerT   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r!  r$  r&  r,  r6  r9  r;  r=  r?  rA  rC  rO  rE  rJ  rP  rT  rU   rV   rW   <module>r.     s	B   " " " " " " 				                          	                  jnn%9:: QdQl\W\"'//"'//(*C*CDDEEMMr~ryr~  @S  sT  sT
>:6644T::L       $46#3
M6?FM6?$ $   'f&v}foFF  #%  !'  **,,       #  #  #  ,   .   --  --  '+S11  )3//  7-s33  /  N	n{++~     @  -  '  #  1  FV  !  $% !&' #  !"   !" !#       "  "        !#  $& !  !# ! %' "  "  " "$   ! "$    -. . */0 0 ,13 3 .      !      !     !  &  $     % ^                                  !   ') #  !"   #$  $D ! "$       $&   "#         v'   ( $V^$455     V-   . *6>*@AA  +&*
M6>6?  >! ! ! ! !&" ! ! !T           FL   
. 
. 
. 
. 
.f. 
. 
. 
.j2 2 2 2 2) 2 2 2Lc c c c c6+ c c cX &V%&,  J.0 0 0 0 0&"2 0 0 0n    v'   " $V^$455     !1   &    )     ,fnV_== 
   
   
 " 
   
 ( 
   
 ) 
   
  
  
    !     
  
  
 \N 
   
(  _() 
   
  _() 
   
" "V^FO$$fo7IJ 
   
 6?+ 
   
  
   

  
   
 /0 
   
 "/0 
   
  
   
 
N 
   

 $b&/::   ;:
 /V_EE   FE
 &FM::   ;:
 'V];;   <;
 -r6=AA   BA
 %r6=99   :9
 !7 8&/JJ   KJ
 #9":FOLL   ML
 !$:#;V_MM   NM
 "%;$<foNN   ON
 $';&<fnMM   NM
 "6!7HH   IH
 #7"8&.II   JI
 "6!7HH   IH
 #7"8&.II   JI
 "%;$<>RSS   TS
 #&<%=?TUU   VU %(>'?NN   ON
  
   
 (+?*@BVWW   XW
 (+?*@&,OO   PO
 *-A,BFNSS   TS
 %(<'=v~NN   ON
 &)=(>OO   PO
 %(<'=v~NN   ON
 (+?*@&.QQ   RQ
 $';&<fnMM   NM 47K6Lfn]]   ^] (+?*@&/RR   SR (+?*QSYSbcc   dc #&:%;V\JJ   KJ
 '*>)?PP   QP 	 N	 	  	 	 ),@+A6>RR   SR #	 N	 	  	 	 '	 N	 	  	 	 6?FO<
N 
   
 #&:%;V_MM   NM ,/CV_.UW]Wfgg   hg '*>)?QQ   RQ *-A,BFMRR   SR *-A,BFMRR   SR %(<'=v~     +.B-CV]SS   TS +.B-CV]SS   TS 233
 O    $ 6?+ 
   
  ! 
   
 8&.I
N 
   
 89
N 
   
  
   
$ v~&& N    6 FM* 
   
  	 M	 	  	 	4  	
 
  
 
$ 3\BD      	
 
  
 
.  	
 
  
 
* !6 Ey     !6 Ey     ),A+BFMRR   SR 56     !7 8&.     23T      	 M	 	  	 	,  	
 
	 	 	
 
	" 5|Dd    "  	
 
	 	 	
 
	0  	
 
	 	 	
 
	( "8,!G     "8,!G     '*@)A4HH   IH *-C,DfmTT   UT '*@)A4HH   IH '*@)A6?SS   TS '*@)A6?SS   TS v~&&
 O 	 	 	 	  v~&& O     ^V^FN;;V_M
O 
   
 ^V^FN;;<
O 
   
 v'' M
 
  
 
$ v'' M
 
  
 
  	 M	 	  	 	  	 M	 	  	 	 \*
O 
   
 v~&&	 O	 	  	 	( v~&&	 O	 	  	 	"  O
 
  
 
$ v'' O 	 	 	 	2   	 	 	 	. H+    $ #k]D99   :9 "8+!FWW   XW, "8+!FWW   XW 
 	 
 
 
 
 "%;$<fnMM   NM (+A*BFNSS   TS '*@&-)PRVWW   XW (+A6=*QSWXX   YX #&<fm%LdSS   TS 0&/B 
   
 $'=&>EE   FE /0.&.2P2P 	 	 	 	" V^,FN6>"" 
   
  34nfnV^6T6T     V^,FN6>"" 
   
 \*FN6>"" 
   
 1;?     2K@&.     1;?     /=v}     3[A6=     "%9$:KHH   IH "%9$:KHH   IH "%9$:KHH   IH "%9$:KHH   IH !$8#9;GG   HG "%9$:KHH   IH #&:%;[II   JI 
M 
   

 
M 
   

 
M 
   

  
   

  
   

  
   

  
   

  
   

  
   
 ;'
O 
   
 ;'
N 
   
 ;'
L 
   
 ;'
M 
   
 ;'
M 
   
  
   
  
   
  
   
  
   
  
   

  
   

  
   

 
M 
   
 
M 
   

  
   
  
   
  
   
  
   
  
   
  
   
  
   
4  N    P  N    L {## N    X )** N    6 "v'' N    . !/     f&       F$     3#M2O'66 'v'9OPP ))$0FTT ((
 ":   )(/EFF (()?AWXX 'v'.DEE  !"%&#$#$#$!"  V^O$$&=> 
   
 
O 
   

 [) 
   

 56 
   
  
   

  
   
  
   
   
   
 34 
   

 V^, 
   
 
L 
   
  V^, 
   
 ,b2HII   JI
 *V_,=?UVV   WV -r3IJJ   KJ +fn-=?UVV   WV ^V_% 
   
 ^V_% 
   
  ^V_% 
   
 *V^,<>TUU   VU !^V^V^4 
   
 ^V^V_foF 
   
 $^ 
   
 !^V_fnfnfnU 
   
 $_fnfn5 
   
  6?FO< 
   
 %v''{##  
 
 
 
, .v''{##  
 
 
 
& "^V^V^V^D 
   
( v''	     $ #^V^%78 
   
  
   
 
O 
   
 3V^D 
   
 _fovflS
L 
	 	 	 
	 _fovflS
L 
	 	 	 
	 *B@@   A@ _fo& 
   
2	 	 	 	 	f. 	 	 	"    f.     
   

  
   

  
   
  
   

  
   

  
   
 *)&-&/ZZ   _fo&
M 
   
    v'    13CD 
   
  	 
 
 
 
 
 
rV   