
    Piw                      d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlmZmZmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZ ddlT ddlmZ dd	l m!Z!m"Z"m#Z#m$Z$ dd
l%m&Z&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.Z/d dl0mZ1 d dl2m3Z4 ddl5m6Z6 ddl7m8Z8  G d d          Z9 G d d          Z:ee1j;        e/j<                 e1j;        e/j=                 ge1j;        e/j=                 f         Z> G d dee>                   Z?ee1j;        e/j<                 e1j;        e/j=                 ge@f         ZA G d deeA                   ZB G d de>          ZCdS )    )annotationsN)AnyListLiteralOptionalUnion	GeneratorSequenceIteratorDequeCallableDict)deque)Path   )*)LlamaGrammar)BaseLlamaCache
LlamaCacheLlamaDiskCacheLlamaRAMCache)BaseLlamaTokenizerLlamaTokenizer)LlamaDraftModel)set_verbose)suppress_stdout_stderrc            ,         e Zd ZdZdZdej        ddddddej        dddddej        ej	        dddd	d
d	ddddddddddd	ddddddddddd+ddFZ
eddI            ZeddK            ZeddM            ZeddO            ZeddQ            ZeddS            Z	 dddYZ	 	 ddd]Zdd`ZddaZdb ZdddZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddd|Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddd~Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dddZ	 dddZ	 	 	 dddZdddhdfdgd	ddg ddd	deddd	ddjdidddddfddZdddhdfdgd	ddg ddd	deddd	ddjdidddddfddZdddhdfdgd	ddg ddd	deddd	ddjdidddddfddZddddddfdedgd	dg dddddd	d	ddjdiddddddfddZ ddZ!d Z"d Z#ddZ$ddZ%ddZ&ddZ'ddZ(ddZ)ddZ*ddZ+ddZ,ddZ-ddÄZ.ddĄZ/e0	 dddɄ            Z1e0dd̄            Z2e3	 	 	 	 dd d؄            Z4dS (  Llamaz0High-level Python wrapper for a llama.cpp model.Fr   NTi           g            ?g      @@@   )+n_gpu_layers
split_modemain_gputensor_split
vocab_onlyuse_mmap	use_mlockkv_overridesseedn_ctxn_batchn_ubatch	n_threadsn_threads_batchrope_scaling_typepooling_typerope_freq_baserope_freq_scaleyarn_ext_factoryarn_attn_factoryarn_beta_fastyarn_beta_slowyarn_orig_ctx
logits_all	embeddingoffload_kqv
flash_attn
op_offloadswa_fullno_perflast_n_tokens_size	lora_base
lora_scale	lora_pathnumachat_formatchat_handlerdraft_model	tokenizertype_ktype_v
spm_infillverbose
model_pathstrr"   intr#   r$   r%   Optional[List[float]]r&   boolr'   r(   r)   1Optional[Dict[str, Union[bool, int, float, str]]]r*   r+   r,   r-   r.   Optional[int]r/   r0   r1   r2   floatr3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   Optional[bool]r>   r?   r@   rA   Optional[str]rB   rC   rD   Union[bool, int]rE   rF   6Optional[llama_chat_format.LlamaChatCompletionHandler]rG   Optional[LlamaDraftModel]rH   Optional[BaseLlamaTokenizer]rI   rJ   rK   rL   c       +   
         |, _         t          j                     _        t	          |,           t
          j        sGt          |,          5  t          j	                     ddd           n# 1 swxY w Y   dt
          _        t          |$t                    r |$rt          j        nt          j         _        n|$ _         j        t          j        k    rAt          |,          5  t          j         j                   ddd           n# 1 swxY w Y   | _        t          j                     _        |dk    rdn| j        _        | j        _        | j        _        | _        d _         j        rt1           j                  t          j        k    rt5          dt          j                   t6          j        t          j        z  }. |.|  _         j         j        _        | j        _        |#|nd j        _        | j        _        |	 _         |	t1          |	          dz   }/t          j!        |/z               _"        tG          |	$                                          D ]>\  }0\  }1}2|1%                    d	           j"        |0         _&        t          |2t                    r4t          j'         j"        |0         _(        |2 j"        |0         j)        _*        wt          |2tV                    r4t          j,         j"        |0         _(        |2 j"        |0         j)        _-        t          |2t\                    r5t          j/         j"        |0         _(        |2 j"        |0         j)        _0        
t          |2tb                    r|2%                    d	          }3t1          |3          d
k    rt5          d|1 d|2           |32                    d
d          }3t          j3         j"        |0         _(        ti          j5        tV          t7          j6         j"        |0         j)                  t          j7        j8        j9        z             }4t7          j5        |4t7          j:        t6          j;                            }5t7          j<        |5|3d
           +t5          d|1 d|2           d j"        d         _&         j"         j        _         t{          ||           _>        |p#t          t          jA                    dz  d           _B        |pt          jA                     _C        |
pt          jD         _E        t          jF                     _G        | jG        _H         j>         jG        _>        t{           j>        |           jG        _I         jB         jG        _B         jC         jG        _C        ||nt          jJ         jG        _K        | jG        _L        |dk    r|nd jG        _M        |dk    r|nd jG        _N        |dk    r|nd jG        _O        |dk    r|nd jG        _P        |dk    r|nd jG        _Q        |dk    r|nd jG        _R        |dk    r|nd jG        _S        |'|nd _T        | jG        _U        | jG        _V        | jG        _W        || jG        _X        || jG        _Y        |)|) jG        _Z        |*|* jG        _[        | jG        _\        |  _]        d _^        |! __        |" _`        |# _a        |+ _b        t          jd        e                    |          st5          d|            j        f                    t          jg        t          ji         j         j         j                                        _j        |(pt                      _l        |dk    r jj        m                                }t{          ||           _>         jj        m                                 jG        _H         j>         jG        _>        t{           j>        |           jG        _I         j        f                    t          jg        t          jn         jj         jG         j                                        _o         j        f                    t          jg        t          jp         j>        d jG        jH         j                                        _q        d _r         ja        rt          js         jj        jt         ja        %                    d	                     _r         jr        t          d ja                    fd}6 j        v                    |6           t          jw         jo        jx         jr         j`                  rt          d ja                    j         r?t          t          jz                    {                    d	          t          j}                   |% _~        |& _        i  _        |' _                                          _         H                                 _                                          _                                          _        t          j         j                   _        d _        t          j        |ft          j                   _        t          j        |dk    r|n| j        ft          j                   _        t7          j        d           _        	  jj                                         _        nD# t(          $ r6}7i  _         j         rt          d|7 t          j}                   Y d}7~7nd}7~7ww xY w j         r#t          d j         t          j}                                                    }8                                 }9|8dk    r jj                            |8          nd }:|9dk    r jj                            |9          nd };t/          d!  j        $                                D                       }<d" j        v r j        d"         |<d#<    j         rE|<rCt          d$d%                    |<                                           t          j}                   |<$                                D ]9\  }=}>t5          j        |>|:|;|8g&                                           j        |=<   : j~         j        d#|<v rt5          j         j                  }%|%-|% _~         j         rt          d'|% t          j}                   nn j         r`t          d(|<d#          t          j}                   t          d)|: t          j}                   t          d*|; t          j}                   d# _~         j~        8 j        1d+ _~         j         r#t          d, j~         t          j}                   d _        dS )-a  Load a llama.cpp model from `model_path`.

        Examples:
            Basic usage

            >>> import llama_cpp
            >>> model = llama_cpp.Llama(
            ...     model_path="path/to/model",
            ... )
            >>> print(model("The quick brown fox jumps ", stop=["."])["choices"][0]["text"])
            the lazy dog

            Loading a chat model

            >>> import llama_cpp
            >>> model = llama_cpp.Llama(
            ...     model_path="path/to/model",
            ...     chat_format="llama-2",
            ... )
            >>> print(model.create_chat_completion(
            ...     messages=[{
            ...         "role": "user",
            ...         "content": "what is the meaning of life?"
            ...     }]
            ... ))

        Args:
            model_path: Path to the model.
            n_gpu_layers: Number of layers to offload to GPU (-ngl). If -1, all layers are offloaded.
            split_mode: How to split the model across GPUs. See llama_cpp.LLAMA_SPLIT_* for options.
            main_gpu: main_gpu interpretation depends on split_mode: LLAMA_SPLIT_MODE_NONE: the GPU that is used for the entire model. LLAMA_SPLIT_MODE_ROW: the GPU that is used for small tensors and intermediate results. LLAMA_SPLIT_MODE_LAYER: ignored
            tensor_split: How split tensors should be distributed across GPUs. If None, the model is not split.
            vocab_only: Only load the vocabulary no weights.
            use_mmap: Use mmap if possible.
            use_mlock: Force the system to keep the model in RAM.
            kv_overrides: Key-value overrides for the model.
            seed: RNG seed, -1 for random
            n_ctx: Text context, 0 = from model
            n_batch: Prompt processing maximum batch size
            n_ubatch: Physical batch size
            n_threads: Number of threads to use for generation
            n_threads_batch: Number of threads to use for batch processing
            rope_scaling_type: RoPE scaling type, from `enum llama_rope_scaling_type`. ref: https://github.com/ggerganov/llama.cpp/pull/2054
            pooling_type: Pooling type, from `enum llama_pooling_type`.
            rope_freq_base: RoPE base frequency, 0 = from model
            rope_freq_scale: RoPE frequency scaling factor, 0 = from model
            yarn_ext_factor: YaRN extrapolation mix factor, negative = from model
            yarn_attn_factor: YaRN magnitude scaling factor
            yarn_beta_fast: YaRN low correction dim
            yarn_beta_slow: YaRN high correction dim
            yarn_orig_ctx: YaRN original context size
            logits_all: Return logits for all tokens, not just the last token. Must be True for completion to return logprobs.
            embedding: Embedding mode only.
            offload_kqv: Offload K, Q, V to GPU.
            flash_attn: Use flash attention.
            op_offload: offload host tensor operations to device
            swa_full: use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
            no_perf: Measure performance timings.
            last_n_tokens_size: Maximum number of tokens to keep in the last_n_tokens deque.
            lora_base: Optional path to base model, useful if using a quantized base model and you want to apply LoRA to an f16 model.
            lora_path: Path to a LoRA file to apply to the model.
            numa: numa policy
            chat_format: String specifying the chat format to use when calling create_chat_completion.
            chat_handler: Optional chat handler to use when calling create_chat_completion.
            draft_model: Optional draft model to use for speculative decoding.
            tokenizer: Optional tokenizer to override the default tokenizer from llama.cpp.
            verbose: Print verbose output to stderr.
            type_k: KV cache data type for K (default: f16)
            type_v: KV cache data type for V (default: f16)
            spm_infill: Use Suffix/Prefix/Middle pattern for infill (instead of Prefix/Suffix/Middle) as some models prefer this.

        Raises:
            ValueError: If the model path does not exist.

        Returns:
            A Llama instance.
        )disableNTizZAttempt to split tensors that exceed maximum supported devices. Current LLAMA_MAX_DEVICES=Fr   utf-8   z
Value for z is too long:     zUnknown value type for z:    r   r   zModel path does not exist: )
path_modelparamsrL   )modelrc   rL   )n_tokensembd	n_seq_maxrL   z2Failed to initialize LoRA adapter from lora path: c                 Z     j         d S t          j         j                    d  _         d S N)_lora_adapter	llama_cppllama_adapter_lora_freeselfs   c/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/llama_cpp/llama.pyfree_lora_adapterz)Llama.__init__.<locals>.free_lora_adapter  s3    %-F1$2DEEE%)"""    z+Failed to set LoRA adapter from lora path: file)n_vocabdtypeg      $@zFailed to load metadata: zModel metadata:  c              3  ^   K   | ](\  }}|                     d           |dd         |fV  )dS )ztokenizer.chat_template.
   N)
startswith).0nametemplates      ro   	<genexpr>z!Llama.__init__.<locals>.<genexpr>  sZ        
  
h9:: 
"##Y! 
  
  
  
  
  
rq   ztokenizer.chat_templatezchat_template.defaultz&Available chat formats from metadata: z, )r}   	eos_token	bos_tokenstop_token_idszGuessed chat format: zUsing gguf chat template: zUsing chat eos_token: zUsing chat bos_token: zllama-2zUsing fallback chat format: )rL   
contextlib	ExitStack_stackr   r   _Llama__backend_initializedr   rk   llama_backend_init
isinstancerQ   GGML_NUMA_STRATEGY_DISTRIBUTEGGML_NUMA_STRATEGY_DISABLEDrD   llama_numa_initrM   llama_model_default_paramsmodel_paramsr"   r#   r$   r%   _c_tensor_splitlenLLAMA_MAX_DEVICES
ValueErrorctypesc_floatr&   r'   r(   r)   llama_model_kv_override_kv_overrides_array	enumerateitemsencodekeyLLAMA_KV_OVERRIDE_TYPE_BOOLtagvalueval_boolrO   LLAMA_KV_OVERRIDE_TYPE_INTval_i64rT   LLAMA_KV_OVERRIDE_TYPE_FLOATval_f64rN   ljustLLAMA_KV_OVERRIDE_TYPE_STRtypingcast	addressofllama_model_kv_override_valueval_stroffsetPOINTERc_charmemmoveminr,   maxmultiprocessing	cpu_countr.   r/   LLAMA_DEFAULT_SEED_seedllama_context_default_paramscontext_paramsr+   r-   #LLAMA_ROPE_SCALING_TYPE_UNSPECIFIEDr0   r1   r2   r3   r4   r5   r6   r7   r8   _logits_all
embeddingsr;   r<   r=   r>   rI   rJ   r?   r@   cacherA   rB   rC   rK   ospathexistsenter_contextclosing	internals
LlamaModel_modelr   
tokenizer_n_ctx_trainLlamaContext_ctx
LlamaBatch_batchrj   llama_adapter_lora_initrd   RuntimeErrorcallbackllama_set_adapter_loractxprintllama_print_system_infodecodesysstderrrE   rF   _chat_handlersrG   rt   _n_vocab_n_ctxtoken_nl	_token_nl	token_eos
_token_eosLlamaTokenDataArray_candidatesre   npndarrayintc	input_idssinglescores_mirostat_mumetadata	Exception	token_bostoken_get_textdictjoinkeysllama_chat_formatJinja2ChatFormatterto_chat_handler$guess_chat_format_from_gguf_metadata_sampler)?rn   rM   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   kwargs
FloatArraykvo_array_lenikvv_bytesaddressbuffer_startrp   eeos_token_idbos_token_idr   r   template_choicesr|   r}   s?   `                                                              ro   __init__zLlama.__init__<   s   V  *,,G* 	/'888 / /,.../ / / / / / / / / / / / / / /*.E'dD!! 	 ;	77: II DI9	==='888 5 5)$)4445 5 5 5 5 5 5 5 5 5 5 5 5 5 5 % &@BB&",,JJ, 	& (2$%-"(#(4$%%	(CCC  Oqz  rM  O  O    )*EEJ#-:$D  .2-AD*'1$1:1BXX"&/# )#--1M1MA( (D$ '|'9'9';';<< &I &I	6Aq23((72C2C(+/a&& $I &A ,ABD,Q/5>>3'' I &@ ,@AD,Q/5==5)) I &B ,@AD,Q/5==3'' Ihhw//G7||c))()Ja)J)Jq)J)JKKK%mmC77G &@ ,$k()A!)D)JKK#AIPQ G
 $*;wv}8U8U#V#VLN$    %%Gq%G%GA%G%GHHH  $-1-ED*5'**"Nc/*C*E*E*JA&N&N.M/2K2M2M 9Y9
 (DFF$)!&*l#'*4<'B'B$(,%.2.B+ !, > 	-
 ,8(,33NN 	*  /#55OO1 	+  /#55OO1 	+ !1C 7 7Q 	, -33NN 	* -33NN 	* >Ka=O=OMMUV))4)<::$)2&*5')3&!-7D*+3D( )/D&)/D&&-#"4/3
"$"$w~~j)) 	IG:GGHHHk//$#, L   
 
 $;~d';'; A::K++--Eug..DL(,(?(?(A(AD%*.,D'+.t|X+F+FD(K--&+. L   
 
	 k//$!\"17 L	   	
 	
 HL> 	!*!B!%%g.." "D !)"YYY  * * * * * K  !2333/	t14?   #R$.RR   < 	X)355<<WEECJWWWW&(  	 'jjll..**$8OOO/1z5("'/R/R/R.0j D((UUgt}ERY/
 /
 /
 #N
 
	H K0022DMM 	H 	H 	HDM| H5!55CJGGGG	H
 < 	G4T]443:FFFF~~''~~'' 9E8J8JDK&&|444PR 	 9E8J8JDK&&|444PR 	
    
  
"&-"5"5"7"7 
  
  
 
 
 %558<)945 < 	, 	]CSCXCXCZCZ9[9[]]Z   
 /4466 	  	 ND((9(M!## ,~	) ) )
 o %% $!)'+;;;+P K &#. < R?+??cjQQQQ< Q`5EF]5^`` Z    >9>>SZPPPP>9>>SZPPPP#: #(9(A(D| E43CEECJ    s<   A,,A03A0$D

DD;l m%,mmreturnllama_cpp.llama_context_pc                    | j         j        S ri   )r   r   rm   s    ro   r   z	Llama.ctx%  s    y}rq   llama_cpp.llama_model_pc                    | j         j        S ri   )r   rd   rm   s    ro   rd   zLlama.model)  s    {  rq   npt.NDArray[np.intc]c                *    | j         d | j                 S ri   )r   re   rm   s    ro   
_input_idszLlama._input_ids-  s    ~oo..rq   npt.NDArray[np.single]c                2    | j         d | j        d d f         S ri   )r   re   rm   s    ro   _scoreszLlama._scores1  s    {?T]?AAA-..rq   
Deque[int]c                v    t          | j        d | j                                                 | j                  S )Nmaxlen)r   r   re   tolistr   rm   s    ro   eval_tokenszLlama.eval_tokens5  s0    T^OdmO4;;==dkRRRRrq   Deque[List[float]]c                    t          | j        d | j        d d f                                         | j        r| j        nd          S )Nr   r  )r   r   re   r	  r   r   rm   s    ro   eval_logitszLlama.eval_logits9  sO    K$-*+2244"&"294;;
 
 
 	
rq   textbytesadd_bosspecial	List[int]c                :    | j                             |||          S )aU  Tokenize a string.

        Args:
            text: The utf-8 encoded string to tokenize.
            add_bos: Whether to add a beginning of sequence token.
            special: Whether to tokenize special tokens.

        Raises:
            RuntimeError: If the tokenization failed.

        Returns:
            A list of tokens.
        )r   tokenize)rn   r  r  r  s       ro   r  zLlama.tokenize@  s      ''gw???rq   tokensprev_tokensOptional[List[int]]c                <    | j                             |||          S )a?  Detokenize a list of tokens.

        Args:
            tokens: The list of tokens to detokenize.
            prev_tokens: The list of previous tokens. Offset mapping will be performed if provided.
            special: Whether to detokenize special tokens.

        Returns:
            The detokenized string.
        )r  r  )r   
detokenize)rn   r  r  r  s       ro   r  zLlama.detokenizeR  s*      ))W * 
 
 	
rq   r   Optional[BaseLlamaCache]c                    || _         dS )zKSet the cache.

        Args:
            cache: The cache to set.
        N)r   )rn   r   s     ro   	set_cachezLlama.set_cachef  s     


rq   c                    || _         dS )zOSet the random seed.

        Args:
            seed: The random seed.
        N)r   )rn   r*   s     ro   set_seedzLlama.set_seedn  s     


rq   c                    d| _         dS )zReset the model state.r   N)re   rm   s    ro   resetzLlama.resetv  s    rq   Sequence[int]c                   | j                             d| j        d           t          dt	          |          | j                  D ]"}||t          t	          |          || j        z                      }| j        }t	          |          }| j                            ||| j	                   | j         
                    | j                   || j        |||z   <   | j	        rt|}| j        }t          j                            | j                                         ||z  f          }|| j        |||z   ddf                             d          dd<   n	 | xj        |z  c_        $dS )zfEvaluate a list of tokens.

        Args:
            tokens: The list of tokens to evaluate.
        r]   r   )batchn_pastr9   )shapeN)r   kv_cache_seq_rmre   ranger   r,   r   r   	set_batchr   r   r   r   r   	ctypeslibas_array
get_logitsr   reshape)	rn   r  r   r#  r$  re   rowscolslogitss	            ro   evalz
Llama.evalz  s~    		!!"dmR888q#f++t|44 	& 	&A1s3v;;DL0@AAABE]F5zzHK!!Ft7G "    IT[)))9>DN6FX$556 }..I((**4$;. /   NTFVh%669:BB2FFrrrJJ MMX%MMM9	& 	&rq   (   ffffff?皙?皙?皙?      @top_ktop_pmin_p	typical_ptemprepeat_penaltyfrequency_penaltypresence_penaltytfs_zmirostat_modemirostat_etamirostat_taupenalize_nllogits_processorOptional[LogitsProcessorList]grammarOptional[LlamaGrammar]c                R    t          j                    }d
 fd}|                    |           |                     j        |||           ||                     j        |           |dk     r0|                                 |                     j	                   n|dk    r|
                                 n|
dk    r&d}|                     j         j	        |||           n|
dk    r|                     j	        ||           nd	}t          d|          }|                    |           |                    ||           |                    ||           |                    ||           |                    |           |                     j	                   |S )Ntoken_data_array"llama_cpp.llama_token_data_array_pc           	        | j         j        }| j         j        }t          j        |j                   }t          j        |ft          j        dt
          j        fdt
          j	        fdt
          j	        fgd          t          j        |z                      |                    }D ]"} |j        |j                  |j        d d <   #d S )NidlogitpT)align)r%  rv   buf)contentssizedatar   r   r   recarrayrv   r   r   rk   llama_token_datafrom_addressr  rM  )rI  rR  data_soadata_soa_addressrT  logit_processorrD  rn   s         ro   
apply_funcz'Llama._init_sampler.<locals>.apply_func  s    '05+49#)#3H4E#F#F ;'(7BI*>bi@PQ"   #3d:HH( 	 	 	 (8 Y YO(7(X(XHN111%%Y Yrq   )penalty_last_npenalty_repeatpenalty_freqpenalty_presentr   r   d   ra   r   )rI  rJ  )r   LlamaSampler
add_customadd_penaltiesr@   add_grammarr   add_softmaxadd_distr   
add_greedyadd_mirostatr   add_mirostat_v2r   	add_top_kadd_typical	add_top_p	add_min_padd_temp)rn   r7  r8  r9  r:  r;  r<  r=  r>  r?  r@  rA  rB  rC  rD  rF  samplerrZ  
mirostat_mn_probsmin_keeps   `             `      ro   _init_samplerzLlama._init_sampler  s   $ (**'Y Y Y Y Y Y Y$ z***  2)*, 	 
	
 
	
 
	
 W555#::!!!TZ((((S[[    !! 
$$MJ      !##''J      q'??!!%(((##Ix888!!%222!!%222  &&&  ,,,rq   idxc                   | j         dk    sJ d}| j        +d}|                     |||||||||	|
|||||          | _        |
|| j         z
  nd}| j        J | j                            | j        |          }|rd| _        |S )a0  Sample a token from the model.

        Args:
            top_k: The top-k sampling parameter.
            top_p: The top-p sampling parameter.
            temp: The temperature parameter.
            repeat_penalty: The repeat penalty parameter.

        Returns:
            The sampled token.
        r   FNTr7  r8  r9  r:  r;  r<  r=  r>  r?  r@  rB  rA  rC  rD  rF  r]   )re   r   rr  r   sampler   )rn   r7  r8  r9  r:  r;  r<  r=  r>  r?  r@  rA  rB  rC  rD  rF  rs  tmp_samplerridxtokens                       ro   rv  zLlama.sample  s    < }q    = K ..#-"3!1+))'!1 /  DM$ '*osT]""2x###$$TY55 	! DMrq   r   stopping_criteriaOptional[StoppingCriteriaList]-Generator[int, Optional[Sequence[int]], None]c              #    K   t          j        d|z            | _        |                     |||||||	|
|||||||          | _        |r| j        dk    rd}t          | j        |dd                   D ]\  }}||k    r|dz  } |dk    rId}||d         }|| _        | j        r/t          d| d	t          |           d
t          j                   |r|                                  | j        t          |          z   dz
  }t          |          }	 |                     |           || j        k     r|                     |||||||	|
||||||||          }|dz  }|2 || j        d|         | j        || j        z
  ddf                   rdS |V }|                                 |                    |           ||                    |           || j        k     r:|| j        |         k    r)|| _        | j                            d| j        d           n|| j        k     | j        || j        | j        | j        t          |          z   <   |                     | j        d| j        t          |          z                      }|                    |                    t4                    d| j        | j        z
  t          |          z
                      )a  Create a generator of tokens from a prompt.

        Examples:
            >>> llama = Llama("models/ggml-7b.bin")
            >>> tokens = llama.tokenize(b"Hello, world!")
            >>> for token in llama.generate(tokens, top_k=40, top_p=0.95, temp=1.0, repeat_penalty=1.0):
            ...     print(llama.detokenize([token]))

        Args:
            tokens: The prompt tokens.
            top_k: The top-k sampling parameter.
            top_p: The top-p sampling parameter.
            temp: The temperature parameter.
            repeat_penalty: The repeat penalty parameter.
            reset: Whether to reset the model state.

        Yields:
            The generated tokens.
        g       @ru  r   Nr]   r   FzLlama.generate: z prefix-match hit, remaining z prompt tokens to evalrr   T)r7  r8  r9  r:  r;  r<  r=  r>  r?  r@  rB  rA  rD  rF  rC  rs  )r   r   r   rr  r   re   zipr  rL   r   r   r   r   r   listr0  rv  r  clearappendextendr   r&  rG   r   astyperO   r   )rn   r  r7  r8  r9  r:  r;  r<  r   r=  r>  r?  r@  rB  rA  rC  rD  rz  rF  longest_prefixab
sample_idxry  tokens_or_nonedraft_tokenss                             ro   generatezLlama.generate6  s{     R #N3+=>>**)/-'%%#- + 
 
&  	T]Q&&NDOVCRC[99  166"a'NN!!0 .< I> I I%([[I I I Z     	JJLLL ]S[[014
f/	IIft},,'#1&7%5"/!-!-%5# +"! $  & a
$05F5FOLjL14<
T]@Z\]\]\]@]3^6 60 F',e$$$!-MM.111--%4?:;V2V2V$.DMI--b$-DDDE t},,H +NTt}t}s6{{/JJK#//N#@T]S[[%@#@A     '',,C$+5FCC  W/	rq   inputUnion[str, List[str]]rd   CreateEmbeddingResponsec                    ||n| j         }t          |t                    r|n|g}|                     |d          \  }}d t	          |          D             }d||||ddS )zEmbed a string.

        Args:
            input: The utf-8 encoded string to embed.

        Returns:
            An embedding object.
        NT)return_countc                     g | ]\  }}d ||dS )r:   )objectr:   index )r{   rs  embs      ro   
<listcomp>z*Llama.create_embedding.<locals>.<listcomp>  s>     !
 !
 !
 S	 &  !
 !
 !
rq   r  )prompt_tokenstotal_tokens)r  rS  rd   usage)rM   r   r  embedr   )rn   r  rd   
model_nameembedsr  rS  s          ro   create_embeddingzLlama.create_embedding  s     $)#4%%$/
#E400=ug
  $zz%dzCC!
 !
 &f--!
 !
 !
 !- , 	
 
 	
rq   	normalizetruncater  c                *                                       j        }                                 t          j        k    } j        j        du rt          d           j        rt          j	         j
        j                   t          |t                    r|g}n|} j                                         g d fd}d}	g }
d}d}|D ]}                     |                    d                    }|r
|d|         }t%          |          }|	|z  }	||k    rt'          d	| d
|           ||z   |k    r ||
           g }
d}d} j                            |||           |
                    |           ||z  }|dz  } ||
            j        rt          j         j
        j                   t          |t                    rd         n}t          j         j
        j                                                     |r||	fS |S )zEmbed a string.

        Args:
            input: The utf-8 encoded string to embed.

        Returns:
            A list of embeddings
        FzCLlama model must be created with embedding=True to call this method	seq_sizesr  c                   t          j        
j        j                   
j                            
j                   
j                                         	t           j        k    r|dt          |           D ]h\  }}t          j	        
j        j                  fdt          |          D             }rd |D             }                    |           |z  id S t          t          |                     D ]V}t          j        
j        j        |          d          }rt          j        |          }                    |           Wd S )Nr   c                B    g | ]}|z  z   |d z   z  z            S )r   r  )r{   jn_embdposptrs     ro   r  z5Llama.embed.<locals>.decode_batch.<locals>.<listcomp>  sJ     4 4 4 C!f*,sa!ev5E/EEF4 4 4rq   c                6    g | ]}t          j        |          S r  )r   normalize_embedding)r{   r   s     ro   r  z5Llama.embed.<locals>.decode_batch.<locals>.<listcomp>"  s0     % % %ABI9!<<% % %rq   )rk   llama_kv_self_clearr   r   r   r   r   LLAMA_POOLING_TYPE_NONEr   llama_get_embeddingsr'  r  r   llama_get_embeddings_seqr   r  )r  r   rR  r:   r  r  rS  r  r  r1   rn   s       @@ro   decode_batchz!Llama.embed.<locals>.decode_batch  s   )$)-888IT[)))K y@@@(33    GAt#8GGC4 4 4 4 4 4!&t4 4 4I ! % %FO% % %	 KK	***4KCC    s9~~.. + +A#<TY]ANNC-0&\I  M$-$A)$L$L	KK	****+ +rq   r   r^   NRequested tokens (z) exceed batch size of r   )r  r  )r  r,   r1   rk   r  r   r   r   rL   llama_perf_context_resetr   r   r   rN   r   r   r  r   r   r   add_sequencer  llama_perf_context_printr  )rn   r  r  r  r  r,   r9   inputsr  r  s_batcht_batchp_batchr  r  re   outputrS  r  r1   s   ` `              @@@ro   r  zLlama.embed  s    , ((**!Y%FF
)U22U   < 	>.ty}===eS!! 	WFFF 	 CE	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+:   	 	D]]4;;w#7#788F *)6{{HH$L '!! SSS'SS  
 !G++W%%% K$$VWjAAA NN8$$$xGqLGG 	W< 	>.ty}===&uc22<a%dim444

 	<''Mrq      promptUnion[str, List[int]]suffix
max_tokenstemperaturelogprobsechostopOptional[Union[str, List[str]]]stream
logit_biasOptional[Dict[int, float]]SUnion[Iterator[CreateCompletionResponse], Iterator[CreateCompletionStreamResponse]]c              #   #   XYZ[\K   ||j         t          u sJ dt          t          j                               }t	          t          j                              }                                 } j                                        } j        	                                }d}d} d}! j
                            dd          dk    }"|dk    r|n|g}#|dk    r|n                                 g}$t          |t                    r|( j                                        r|#d d         dgk    rg }#t          |t                    r| j                                        s|dk    rg }$d}%|"r|!dk    r	|rd|z   }d}%t#          |          dk    rg n|g}&|dk    r||gng t          |t                    r;|d	k    r3                     |                    d
          d|dk     p|d u           ng n|z   }'|!dk    r=|;|!g|r3                     |                    d
          dd          |%d          ng z   ng }(| dk    r|| gng })|# j        r|(|'z   |)z   n|'|(z   |)z   z   |$z   }*d}+d},t          |
t                    r|
nt          |
t                    r|
gng }
||n j        }-|*d d                                          gdz  k    rHt-          j        d j                                                                        dt2                     |Nd |                                D             [d9[fd}.t7          |.g          }/||/}n|                    |/          } j        r j                                         t#          |*           j         k    r9tC          dt#          |*           dtE          j#         j$                             ||dk    r j         t#          |*          z
  }|t#          |*          z    j         k     r|n j         t#          |*          z
  }|
g k    rd |
D             }0ng }0| j%        du rtC          d           j&        r	  j&        |*         }1tN          (                    |1j)        *                                |*          }2tN          (                     j+        *                                |*          }3|2|3k    r7 ,                    |1            j        rt[          dt\          j/                   n2# t`          $ r%  j        rt[          dt\          j/                   Y nw xY w| 1                    |           n@ 1                    te          j3         j4                  5                    dd                     d}4d}5 6                    |*|||||||||||||||           D ]}6tE          j7         j        j8        |6          r 9                    |&|*!          }+d"}4 n|&:                    |6            9                    |&|*!          Xtw          Xd#d                    D ]&\  }7}8d$|7z
  }7d%D ]\  }9}:|9|7k    r|:|8z  |:k    r|9|7z
  }5'|5dk    r|5dz  }5Xfd&|0D             };t#          |;          dk    r*|;d         }<Xd X<                    |<                   }+d"}4 n|r|&|,d          }= 9                    |=|*|&d |,         z   !          \t#          \          }>d}?|0D ]X}@t{          t}          t#          |@          |>          dd          D ])}A\?                    |@d |A                   r
|A|?k    r|A}? n*Yd}B|Y|=D ]T}6|6|k    r
|Bt#           9                    |6g|*|&d |,         z   !                    z  }B|B|>|?z
  k    r n 9                    |6g|*|&d |,         z   !          @                    d
d'(          }Ct#          |          t#           9                    |&d |,         |*|&d |,         z   !          @                    d
d'(                    z   }Dt#          |*          |,z   }E jA        |Edz
  d d f         }FtN          B                    |F          *                                }Gt          t          t          |Gt{          t#          |G                              d)*                    }H fd+|Hd |         D             }I|IE                    |C|Gt	          |6                   i            9                    |6g|*|&d |,         z   !          @                    d
d'(          g|Dg|Gt	          |6                   g|Igd,}J|,dz  },|d-||- 9                    |6g|*|&d |,         z   !          @                    d
d'(          d|Jd d.gd/V  Vnt#          |=          dk    rd}Kt{          dt#          |=          dz             D ]U}A	  9                    |=d |A         |*|&d |,         z   !          }L|L@                    d
          }Md)}K n# t          $ r Y Rw xY wnQ|KsnN|Bt#          |L          z  }B|B|>|?z
  k    rn2|=|Ad          }=|,|Az  },|d-||-|Mdd d d.gd/V  t#          |=          dk    t#          |&          |k    r 9                    |&|*!          }+d}4 n|9 | j+         jA        dd d f                   r 9                    |&|*!          }+d"}4 j        r j        G                                 |ry|&|,d          }= 9                    |=|*|&d |,         z   !          \\fd0|0D             };t#          |;          dk    rt}          \fd1|;D                       }Nnt#          \          }Nd}B|=D ]}6|Bt#           9                    |6g|*|&d |,         z   !                    z  }Bd }J||6|k    rB 9                    |6g          @                    d
d'(          }Ct#          |          t#           9                    |&d |,         |*|&d |,         z   !                    z   }Dt#          |*          |,z   dz
  }E jA        |Ed d f         }FtN          B                    |F          *                                }Gt          t          t          |Gt{          t#          |G                              d)*                    }H fd2|Hd |         D             }I|IE                    |C|Gt	          |6                   i            9                    |6g          @                    d
d'(          g|Dg|Gt	          |6                   g|Igd,}J|B|Nk    rh 9                    |6g          }O|B|Ndz
  k    r n|,dz  },|d-||-|Od t#          |O          |B|Nz
  z
           @                    d
d'(          d|Jd d.gd/V   n@|,dz  },|d-||- 9                    |6g          @                    d
d'(          d|Jd d.gd/V  |d-||-d	dd |4d.gd/V   j&        rc j        rt[          d3t\          j/                    H                                 j&        |*|&z   <    j        rt[          d4t\          j/                   d S  j&        rA j        rt[          d3t\          j/                    H                                 j&        |*|&z   <   |+@                    d
d'(          }P|	r||Pz   }P|!dk     r||P|z   }Pd }J|1|	rdnt#          |          }D|	rdnt#          |*dd                    }Eg }Qg }Rg }Sg }T|	r.|*|*d                                          k    rdndd          |&z   Yn|&YY fd5tw          Y          D             }UtN          B                     jA                  |Ed          }Vtw          t          Y|U|V                    D ]@\  Z\  }6}C}W|6|k    r|Q:                    |Dt#           9                    Yd Z                   @                    d
d'(                    z              |S:                    |C           t          t          t          |Wt{          t#          |W                              d)*                    }H|R:                    |Wt	          |6                              YZ fd6|Hd |         D             }I|IE                    |C|Wt	          |6                   i           |T:                    |I           B|	rt#          Y          dk    r
d |Rd<   d |Td<   |S|Q|R|Td,}J|d-||-|Pd|J|4d.gt#          |*          t#          |&          t#          |*          t#          |&          z   d7d8V  d S ):Nzcmpl-r   ztokenizer.ggml.add_space_prefixtruer]   r   u   ☺ra   rw   r^   F)r  r  rq   zDetected duplicate leading "zN" in prompt, this will likely reduce response quality, consider removing it...c                N    i | ]"\  }}t          |          t          |          #S r  )rO   rT   )r{   r   r   s      ro   
<dictcomp>z,Llama._create_completion.<locals>.<dictcomp>  s*    NNN41ac!ffeAhhNNNrq   r   r   r   r  r   c                    t          j        |          }                                D ]\  }}|||         z   ||<   |S ri   )r   copyr   )r   r   
new_scoresinput_idscorelogit_bias_maps        ro   logit_bias_processorz6Llama._create_completion.<locals>.logit_bias_processor  sY      W 
 (6';';'='= D DOHe+06(3C+CJx((!!rq   r  z) exceed context window of c                8    g | ]}|                     d           S )r^   )r   )r{   ss     ro   r  z,Llama._create_completion.<locals>.<listcomp>	  s$    >>>Aahhw//>>>rq   zBlogprobs is not supported for models created with logits_all=Falsez#Llama._create_completion: cache hitrr   z$Llama._create_completion: cache missl        length)r7  r8  r9  r:  r;  r?  r@  rB  rA  r=  r>  r<  rz  rD  rF  r  r     ))ra      )r     )      c                    g | ]}|v |	S r  r  )r{   r  all_texts     ro   r  z,Llama._create_completion.<locals>.<listcomp>R  s    CCCaQ(]]]]]rq   ignoreerrorsT)reversec                n    i | ]1\  }}                     |g                              d d          |2S r^   r  r  r  r   r{   logprobr   rn   s      ro   r  z,Llama._create_completion.<locals>.<dictcomp>  sZ     ' ' ' !+ !OOQC0077 ' 8  &' ' 'rq   )r  text_offsettoken_logprobstop_logprobstext_completion)r  r  r  finish_reason)rL  r  createdrd   choicesc                    g | ]}|v |	S r  r  )r{   r  remaining_texts     ro   r  z,Llama._create_completion.<locals>.<listcomp>  s#    IIIaQ.5H5H5H5H5Hrq   c              3  B   K   | ]}                     |          V  d S ri   )r  )r{   r  r  s     ro   r~   z+Llama._create_completion.<locals>.<genexpr>  s1      JJ...t44JJJJJJrq   c                n    i | ]1\  }}                     |g                              d d          |2S r  r  r  s      ro   r  z,Llama._create_completion.<locals>.<dictcomp>   sP     # # #&GQ ,,33GH3MMw# # #rq   z$Llama._create_completion: cache savez%Llama._create_completion: cache savedc                    g | ]:\  }}                     |gd |                                       dd          ;S Nr  r^   r  r  r  )r{   r   ry  
all_tokensrn   s      ro   r  z,Llama._create_completion.<locals>.<listcomp>  sd        Au Z^DDKKH L    rq   c                    i | ];\  }}                     |gd                                        dd          |<S r  r  )r{   r  r   r  rs  rn   s      ro   r  z,Llama._create_completion.<locals>.<dictcomp>  sh     ; ; ; # OOQCZ5EOFFMM N  ; ; ;rq   )r  completion_tokensr  )rL  r  r  rd   r  r  r   r   r   r  r   r  )I	__class__rN   uuiduuid4rO   timer   r   	token_cls	token_sepr   getr   r   r  add_bos_tokenadd_eos_tokenr   r  r   rK   rM   warningswarnr   RuntimeWarningr   LogitsProcessorListr  rL   r   reset_timingsr   r   rk   llama_n_ctxr   r   r   r   longest_token_prefixr   r	  r  
load_stater   r   r   KeyErrorr  randomRandomr   randintr  llama_token_is_eogvocabr  r  r   r  r'  r   endswithr   r  logits_to_logprobssortedr~  updateUnicodeErrorprint_timings
save_state)]rn   r  r  r  r  r8  r9  r:  r  r  r  r=  r>  r<  r7  r  r*   r?  r@  rB  rA  rd   rz  rD  rF  r  completion_idr  r   cls_token_idsep_token_idprefix_token_idmiddle_token_idsuffix_token_idadd_space_prefix
bos_tokens
eos_tokenssuffix_space_prefixr  prefix_tokenssuffix_tokensmiddle_tokensr  r  returned_tokensr  r  _logit_bias_processorstop_sequences
cache_itemcache_prefix_leneval_prefix_lenr  multibyte_fixry  r   charnumpatternany_stop
first_stopremaining_tokensremaining_lengthfirst_stop_positionr  r   token_end_position	token_strr  token_offsetr/  current_logprobssorted_logprobstop_logproblogprobs_or_nonedecode_successbstsend	last_texttext_strtext_offsetsr  r  r  all_token_strsall_logprobslogprobs_tokenr  r  rs  r  r  s]   `                                                                                       @@@@@ro   _create_completionzLlama._create_completionc  s:     < ~!1S!8!8!888S%6%68849;;'' NN,, K1133 K1133   M?HHFR 	 2>1C1C V
(B..LLDNN4D4D!


 %%	*0.;,,.. +9"1"~"%%Jvt$$ 	))++ *80<0B0BJ#$ 	$1 4 4 4V^F"# .1[[1__rr<. "1A!5!5&:L_RT &#&&
 R<< MM'**!,q0BFdN     R $
4 !##(: !!
 DMM&--"8"8%QVMWW+,,     	 "1A!5!5&:L_RT 	  ?E.>>#m3mC	  	  tT**WDD*T3:O:O0WUW 	 $)#4%%$/
!!1!1 2Q 666M lt{/I/I$..JZJZ/[/[  l  l  l   !NN:;K;K;M;MNNNN	" 	" 	" 	" 	" 	" %89M8N$O$O!'#8  #3#:#:;P#Q#Q < 	&I##%%%},,uS%7%7uuT]TijnjrTsTsuu   qs='9'99J
 C...<< J+M 2 22 	 2::>>>>>NNND$4$=$=T   : 	SS!Z6
#(#=#=(//11=$ $  #("<"<O**,,m# # $o55OOJ///| VC#*UUUU S S S< S@szRRRRS MM$MM&-
33;;AwGGHHH ]]'%%/-)/-! # 
 
 	 	E$ +DK,=uEE '8mTT &$$U+++'8mTTH %Xbcc]33 0 04E$B 0 0LCQww7T>W#<#<(+a0 q  "CCCC>CCCH8}}q  %a[
 <(.."<"< <= & K#4_5E5E#F !%$ -0ABR?BR0S S "1 " " $'~#6#6 
 '(#' " "A"3s1vv/?#@#@!RHH " ")221RaR599 " #66667 3!E"
 &'"' "2 I I L00$*c OO!&,9"34D_4D"E-F ,  / / * .,/BB  "E$(OO"G(5/0@0@A)B %4 % % !&&::	 "
 '*&kkC OO 12B?2B C,9"34D_4D"E-F ,   %fWXf>>5 5 ' (+='9'9O'K!%lQ.>.A!B+0+C+CF+K+K+R+R+T+T(*." #$4eC@P<Q<Q6R6R S S(,  + +' ' ' ' />ixi.H	' ' ' $**I7GE

7S+TUUU !%%*G0=&78H8H&I1J !0 !" !" #)&&"B"B' -8=/?E

/K.L-8M, ,( (1,"/&7'.%/ -1OO).4A*;<L_<L*M5N -< -& -& '-fWXf&F&F-.0@59	!" 	!"(     & .//!33).!&q#.>*?*?!*C!D!D " "A
%%)__$4RaR$80=&78H8H&I1J &5 &" &"
 &(YYw%7%715 %#/ % % % $% "- "!*c"gg5*-,/BB  "+;ABB+?('1, #0&7'.%/ -/-.0459	!" !"(    7 .//!33T $%%33'8mTT ( 4
 (->->OT\"aaa%0.
 .
( ??#4-?PPD"M< 	&I##%%% u	01A1AB!__ ),=>N>N,OO -  N JIII>IIIH8}}q  JJJJJJJJJ.))!") S S"cOO$14EFVFV4W$W $  ' ' " BF ',,  $ 8 8 ? ? !@ ! !I #&f++-.>.>?(5/0@0@A)B (  1 1 #K $'}#5#5#G!#KL!\,/:F','?'?'G'G'N'N'P'P$&* 0%<L8M8M2N2NOO$(  ' 'O# # # #*9)8)*D# # #K  &&	3CCJJ3O'PQQQ !OOUG44;;GH;UU# )4}+;CJJ+G*H)4( ($ &,, $ 8 8I)S1W44#q(O+"3#*!+ )2$Qc)nn8JS8P&Q$Q)""(&&"B"B)*,<15 	$      E1$'/&' %)OOUG$<$<$C$C ' %D % % &'(8-1 	      " $+"# !#!"$()6	      z T< S@szRRRR@D@Q@Q
=+<<=< TA
SSSSF: 	N| O<3:NNNN<@OO<M<MDJ}'889;;wx;88 	)(HQ6#5&(H9=#4!!VK $@11#mABB.?*@*@L&(L46N "F=?L / "}Q'74>>;K;K'K'K!!QR"T"TU'( 

 /
     !** 5 5	  N !33DLAA,--PL;DJ==< < 1 177eY L((##
4C4(899@@#H A      i((("&NE#n2E2E,F,FGGQU  # #
 %%nSZZ&@AAA; ; ; ; ; ; '6ixi&@	; ; ; ""I~c%jj/I#JKKK##K0000  'J!++$(q!"&Q +"0 ,	     ' % 0%2	  "%]!3!3%():%;%; #M 2 2S9J5K5K K 
 
 	
 	
 	
 	
 	
s&   B.T
 
,T98T9-Ah00
h=<h=IUnion[CreateCompletionResponse, Iterator[CreateCompletionStreamResponse]]c                    |                      |||dn|||||||	|
|||||||||||||||          }|r|}|S t          |          }|S )  Generate text from a prompt.

        Args:
            prompt: The prompt to generate text from.
            suffix: A suffix to append to the generated text. If None, no suffix is appended.
            max_tokens: The maximum number of tokens to generate. If max_tokens <= 0 or None, the maximum number of tokens to generate is unlimited and depends on n_ctx.
            temperature: The temperature to use for sampling.
            top_p: The top-p value to use for nucleus sampling. Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
            min_p: The min-p value to use for minimum p sampling. Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841
            typical_p: The typical-p value to use for sampling. Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
            logprobs: The number of logprobs to return. If None, no logprobs are returned.
            echo: Whether to echo the prompt.
            stop: A list of strings to stop generation when encountered.
            frequency_penalty: The penalty to apply to tokens based on their frequency in the prompt.
            presence_penalty: The penalty to apply to tokens based on their presence in the prompt.
            repeat_penalty: The penalty to apply to repeated tokens.
            top_k: The top-k value to use for sampling. Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
            stream: Whether to stream the results.
            seed: The seed to use for sampling.
            tfs_z: The tail-free sampling parameter. Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.
            mirostat_mode: The mirostat sampling mode.
            mirostat_tau: The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.
            mirostat_eta: The learning rate used to update `mu` based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause `mu` to be updated more quickly, while a smaller learning rate will result in slower updates.
            model: The name to use for the model in the completion object.
            stopping_criteria: A list of stopping criteria to use.
            logits_processor: A list of logits processors to use.
            grammar: A grammar to use for constrained sampling.
            logit_bias: A logit bias to use.

        Raises:
            ValueError: If the requested tokens exceed the context window.
            RuntimeError: If the prompt fails to tokenize or the model fails to evaluate the prompt.

        Returns:
            Response object containing the generated text.
        Nr]   r  r  r  r  r8  r9  r:  r  r  r  r=  r>  r<  r7  r  r*   r?  r@  rB  rA  rd   rz  rD  rF  r  )rA  next)rn   r  r  r  r  r8  r9  r:  r  r  r  r=  r>  r<  r7  r  r*   r?  r@  rB  rA  rd   rz  rD  rF  r  completion_or_chunkschunks
completions                                ro   create_completionzLlama.create_completion  s    @  $66'/rrZ#/-)'%%/-!3  7  
  
6  	?SFM!%&:!;!;
rq   c                ^    |                      |||||||||	|
|||||||||||||||          S )rD  rE  )rJ  )rn   r  r  r  r  r8  r9  r:  r  r  r  r=  r>  r<  r7  r  r*   r?  r@  rB  rA  rd   rz  rD  rF  r  s                             ro   __call__zLlama.__call__0  sm    @ %%!#/-)'%%/-!3 & 
 
 	
rq   g?messages"List[ChatCompletionRequestMessage]	functions&Optional[List[ChatCompletionFunction]]function_call+Optional[ChatCompletionRequestFunctionCall]tools"Optional[List[ChatCompletionTool]]tool_choice(Optional[ChatCompletionToolChoiceOption]response_format-Optional[ChatCompletionRequestResponseFormat]r  QUnion[CreateChatCompletionResponse, Iterator[CreateChatCompletionStreamResponse]]c                :   | j         p7| j                            | j                  pt	          j        | j                  } |di d| d|d|d|d|d|d|d|d	|d
|	d|
d|d|d|d|d|d|d|d|d|d|d|d|d|d|d|d|d|d|S )a	  Generate a chat completion from a list of messages.

        Args:
            messages: A list of messages to generate a response for.
            functions: A list of functions to use for the chat completion.
            function_call: A function call to use for the chat completion.
            tools: A list of tools to use for the chat completion.
            tool_choice: A tool choice to use for the chat completion.
            temperature: The temperature to use for sampling.
            top_p: The top-p value to use for nucleus sampling. Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
            top_k: The top-k value to use for sampling. Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
            min_p: The min-p value to use for minimum p sampling. Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841
            typical_p: The typical-p value to use for sampling. Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
            stream: Whether to stream the results.
            stop: A list of strings to stop generation when encountered.
            seed: The seed to use for sampling.
            response_format: The response format to use for the chat completion. Use { "type": "json_object" } to contstrain output to only valid json.
            max_tokens: The maximum number of tokens to generate. If max_tokens <= 0 or None, the maximum number of tokens to generate is unlimited and depends on n_ctx.
            presence_penalty: The penalty to apply to tokens based on their presence in the prompt.
            frequency_penalty: The penalty to apply to tokens based on their frequency in the prompt.
            repeat_penalty: The penalty to apply to repeated tokens.
            tfs_z: The tail-free sampling parameter.
            mirostat_mode: The mirostat sampling mode.
            mirostat_tau: The mirostat sampling tau parameter.
            mirostat_eta: The mirostat sampling eta parameter.
            model: The name to use for the model in the completion object.
            logits_processor: A list of logits processors to use.
            grammar: A grammar to use.
            logit_bias: A logit bias to use.

        Returns:
            Generated chat completion or a stream of chat completion chunks.
        llamarM  rO  rQ  rS  rU  r  r8  r7  r9  r:  r  r  r  r  r*   rW  r  r>  r=  r<  r?  r@  rB  rA  rd   rD  rF  r  r  )rF   r   r  rE   r   get_chat_completion_handler)rn   rM  rO  rQ  rS  rU  r  r8  r7  r9  r:  r  r  r*   rW  r  r>  r=  r<  r?  r@  rB  rA  rd   rD  rF  r  r  r  handlers                                 ro   create_chat_completionzLlama.create_chat_completion  s   F  O"&&t'788O <T=MNN 	
 w 
 
 
$
X
  i
 (-	

 %
 $
 $
 %
 %
 %
  i
 X
 &
 6
 
  !
" ,O#
$ "z%
& .-'
( 0/)
* *>+
, %-
. (-/
0 &1
2 &3
4 %5
6 .-7
8 G9
: "z;
 	
rq   argsr   r   c           	        	 ddl m}m |                    dd          }t	          |t
                    sJ |rfd | j        |i |D             S  |di  | j        |i |S # t          $ r t          d          w xY w)	a  Generate a chat completion with return type based on the the OpenAI v1 API.

        OpenAI python package is required to use this method.

        You can install it with `pip install openai`.

        Args:
            *args: Positional arguments to pass to create_chat_completion.
            **kwargs: Keyword arguments to pass to create_chat_completion.

        Returns:
            Generated chat completion or a stream of chat completion chunks.
        r   )ChatCompletionChatCompletionChunkr  Fc              3  (   K   | ]} di |V  d S Nr  r  )r{   chunkrb  s     ro   r~   z9Llama.create_chat_completion_openai_v1.<locals>.<genexpr>  s5      oo++44e44oooooorq   zzTo use create_chat_completion_openai_v1, you must install the openai package.You can install it with `pip install openai`.Nr  )openai.types.chatra  rb  r  r   rQ   r^  ImportError)rn   r_  r   ra  r  rb  s        @ro    create_chat_completion_openai_v1z&Llama.create_chat_completion_openai_v1  s    $	MMMMMMMMZZ%00Ffd+++++ VooooB]$B]_cBngmBnBnoooo%~UU(C(CT(TV(T(TUUU 	 	 	@  	s   AA& A& &B c                   t          d,i d| j        d| j        j        d| j        j        d| j        j        d| j        d| j        j        d| j        j        d| j        j	        d	| j
        d
| j        d| j        j        d| j        d| j        j        d| j        j        d| j        j        d| j        j        d| j        j        d| j        j        d| j        j        d| j        j        d| j        j        d| j        j        d| j        j        d| j        j        d| j        d| j        j        d| j        j        d| j        j        d| j        j        d| j        j         d| j        j!        d | j"        d!| j#        d"| j$        d#| j%        d$| j&        d%| j'        d&| j(        d'| j)        d(| j        j*        d)| j        j+        d*| j,        d+| j-        S )-NrM   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rI   rJ   rK   rL   r  ).r   rM   r   r"   r#   r$   r%   r&   r'   r(   r)   r   r   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r   r   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rI   rJ   rK   rL   rm   s    ro   __getstate__zLlama.__getstate__  s"    5
 5
 5
5
 *775
 (33	5

 &//5
 **5
 (335
 &//5
 '115
 **5
 5
 %++5
 LL5
 (115
  )33!5
" !/??#5
$ #1CC%5
& ,99'5
(  .==)5
* !/??+5
, !/??-5
. "0AA/5
0  .==15
2  .==35
4 -;;55
6 ''75
8 )4495
: +77;5
< *55=5
> *55?5
@ (11A5
D '//E5
F  $66G5
J nnK5
L M5
N nnO5
R S5
V ((W5
X **Y5
\ ((]5
` &--a5
b &--c5
f g5
h LLi5
 5	
rq   c                      | j         di | d S rd  )r   )rn   states     ro   __setstate__zLlama.__setstate__L  s     rq   
LlamaStatec                   | j         rt          dt          j                   t	          j        | j        j                  }| j         rt          d| t          j                   t          j	        t          |          z              }| j         rt          dt          j                   t	          j        | j        j        |          }| j         rt          d| t          j                   t          |          t          |          k    rt          d          t          j	        t          |          z              }t          j                            ||t          |                     | j         rt          d| dt          j                   t          | j                                        | j                                        | j        t'          |          || j        	          S )
Nz$Llama.save_state: saving llama staterr   z"Llama.save_state: got state size: z!Llama.save_state: allocated statez&Llama.save_state: copied llama state: zFailed to copy llama state datazLlama.save_state: saving z bytes of llama state)r   r   re   llama_statellama_state_sizer*   )rL   r   r   r   rk   llama_get_state_sizer   r   r   c_uint8rO   llama_copy_state_datar   r   rn  r  r  r   re   r  r   )rn   
state_sizerp  n_bytesllama_state_compacts        ro   r  zLlama.save_stateO  s   < 	K8szJJJJ3DIMBB
< 	VCzCC#*UUUU~J7::< 	H5CJGGGG1$)-MM< 	WD7DD3:VVVVw<<#j//))@AAA%~G<??  !4k3w<<PPP< 	JGJJJZ    <$$&&n))++]122$
 
 
 	
rq   rl  Nonec                   |j                                         | j         d |j        d d f<   | j         |j        d d d f         }d||dk    <   |j                                        | _        |j        | _        |j        | _        |j        }t          j        |z  }|	                    |j
                  }t          j        | j        j        |          |k    rt          d          d S )Nr   r   zFailed to set llama state data)r   r  re   r   r*   r   rq  r   rs  from_buffer_copyrp  rk   llama_set_state_datar   r   r   )rn   rl  restru  LLamaStateArrayTyperp  s         ro   r  zLlama.load_statem  s    +0<+<+<+>+>$en$aaa'({5>++QQQ./TAX--//Z
+
$nz9)::5;LMM)$)-EESS?@@@ TSrq   c                4    | j                                         S )zReturn the context window size.)r   r+   rm   s    ro   r+   zLlama.n_ctx|  s    y   rq   c                4    | j                                         S )zReturn the embedding size.)r   r  rm   s    ro   r  zLlama.n_embd  s    {!!###rq   c                4    | j                                         S )zReturn the vocabulary size.)r   rt   rm   s    ro   rt   zLlama.n_vocab  s    {""$$$rq   r   c                     t          |           S )z*Return the llama tokenizer for this model.)r   rm   s    ro   rH   zLlama.tokenizer  s    d###rq   c                4    | j                                         S )z!Return the end-of-sequence token.)r   r   rm   s    ro   r   zLlama.token_eos      {$$&&&rq   c                4    | j                                         S )z'Return the beginning-of-sequence token.)r   r   rm   s    ro   r   zLlama.token_bos  r  rq   c                4    | j                                         S )zReturn the newline token.)r   r   rm   s    ro   r   zLlama.token_nl  s    {##%%%rq   c                4    | j                                         S )zReturn the pooling type.)r   r1   rm   s    ro   r1   zLlama.pooling_type  s    y%%'''rq   c                8    | j                                          dS )z&Explicitly free the model from memory.N)r   closerm   s    ro   r  zLlama.close  s    rq   c                .    |                                   d S ri   )r  rm   s    ro   __del__zLlama.__del__  s    

rq   r]   r/  #Union[npt.NDArray[np.single], List]axisc                   t          j        | |d          }|j        dk    rd|t          j        |           <   nt          j        |          sd}t          j        | |t           j                  }t          j        |          }t          j        d          5  t          j        ||d          }t          j	        |          }d d d            n# 1 swxY w Y   ||z
  S )NT)r  keepdimsr   ru   r  )divide)
r   amaxndimisfinitesubtractr   experrstatesumlog)r/  r  logits_maxssubtract_maxsr  summedouts          ro   r  zLlama.logits_to_logprobs  s   
 #%'&td"K"K"Ka56K[11122[)) 	KFKryIIIf]##[))) 	! 	!VCdT:::F&..C	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! s""s   ,CCCr  r  c                N    d}t          | |          D ]\  }}||k    r|dz  } |S )Nr   r   )r~  )r  r  r  _a_bs        ro   r  zLlama.longest_token_prefix  s@    !Qii 	 	FBRxx!#rq   autorepo_idfilenameadditional_filesOptional[List]	local_dir&Optional[Union[str, os.PathLike[str]]]local_dir_use_symlinksUnion[bool, Literal['auto']]	cache_dir'Llama'c                   	 ddl m}m}	 ddlm}
 n# t
          $ r t          d          w xY w |
|            |	            }d |                    |d          D             }g }|D ]F}t          |                              |          }|	                    t          |                     Gfd|D             }t          |          dk    r*t          d	| d
 dt          j        |                     t          |          dk    r*t          d| d dt          j        |                     |\  }t          t          |          j                  }t          |          j         ||||||           |r|D ]fd|D             }t          |          dk    r*t          d	| d
 dt          j        |                     t          |          dk    r*t          d| d dt          j        |                     |\  } |||||||           | ||||||d          }n t"          j                            |          } | dd|i|S )a  Create a Llama model from a pretrained model name or path.
        This method requires the huggingface-hub package.
        You can install it with `pip install huggingface-hub`.

        Args:
            repo_id: The model repo id.
            filename: A filename or glob pattern to match the model file in the repo.
            additional_files: A list of filenames or glob patterns to match additional model files in the repo.
            local_dir: The local directory to save the model to.
            local_dir_use_symlinks: Whether to use symlinks when downloading the model.
            **kwargs: Additional keyword arguments to pass to the Llama constructor.

        Returns:
            A Llama model.r   )hf_hub_downloadHfFileSystem)validate_repo_idzrLlama.from_pretrained requires the huggingface-hub package. You can install it with `pip install huggingface-hub`.c                L    g | ]!}t          |t                    r|d          n|"S )r|   )r   r   )r{   rs   s     ro   r  z)Llama.from_pretrained.<locals>.<listcomp>  s?     
 
 
 'tT22<DLL
 
 
rq   T)	recursivec                >    g | ]}t          j         |          |S r  fnmatch)r{   rs   r  s     ro   r  z)Llama.from_pretrained.<locals>.<listcomp>  s*    XXX4h8W8WX$XXXrq   zNo file found in z that match z

Available Files:
r   zMultiple files found in z
 matching )r  r  	subfolderr  r  r  c                >    g | ]}t          j         |          |S r  r  )r{   rs   additonal_file_names     ro   r  z)Llama.from_pretrained.<locals>.<listcomp>	  s,    ,v,v,vd7?[_atKuKu,vT,v,v,vrq   N)r  r  r  r  r  r  local_files_onlyrM   r  )huggingface_hubr  r  huggingface_hub.utilsr  rg  lsr   relative_tor  rN   r   r   jsondumpsparentr|   r   r   r   )clsr  r  r  r  r  r  r   r  r  r  hffsfiles	file_listrs   rel_pathmatching_filesmatching_filer  matching_additional_filesmatching_additional_filerM   r  s     `                   @ro   from_pretrainedzLlama.from_pretrained  s   2	EEEEEEEE>>>>>>> 	 	 	I  	 	!!!|~~
 
488
 
 
  "	 	, 	,DDzz--g66HS]]++++ YXXX9XXX~!##=G = = = =%)Z	%:%:= =  
 ~""97 9 9h 9 9%)Z%6%69 9  
 *]++233	&&+ 	#9	
 	
 	
 	
  	'7  #,v,v,v,vi,v,v,v)011Q66$EG E EAT E E-1Z	-B-BE E  
 011A55$A7 A AFY A A-1Z->->A A  
 /H+)  #5''+A'     (!##'=#!%  JJ i::J s 
 
!

 
 	
s    -)XrM   rN   r"   rO   r#   rO   r$   rO   r%   rP   r&   rQ   r'   rQ   r(   rQ   r)   rR   r*   rO   r+   rO   r,   rO   r-   rO   r.   rS   r/   rS   r0   rS   r1   rO   r2   rT   r3   rT   r4   rT   r5   rT   r6   rT   r7   rT   r8   rO   r9   rQ   r:   rQ   r;   rQ   r<   rQ   r=   rU   r>   rU   r?   rQ   r@   rO   rA   rV   rB   rT   rC   rV   rD   rW   rE   rV   rF   rX   rG   rY   rH   rZ   rI   rS   rJ   rS   rK   rQ   rL   rQ   )r   r   )r   r   )r   r   )r   r  )r   r  )r   r  )TF)r  r  r  rQ   r  rQ   r   r  )NF)r  r  r  r  r  rQ   r   r  )r   r  )r*   rO   )r  r!  )r1  r2  r3  r    r4  r    r   r   r    r   r5  r6  TNN)r7  rO   r8  rT   r9  rT   r:  rT   r;  rT   r<  rT   r=  rT   r>  rT   r?  rT   r@  rO   rA  rT   rB  rT   rC  rQ   rD  rE  rF  rG  )r1  r2  r3  r    r4  r    r   r   r    r   r5  r6  TNNN) r7  rO   r8  rT   r9  rT   r:  rT   r;  rT   r<  rT   r=  rT   r>  rT   r?  rT   r@  rO   rA  rT   rB  rT   rC  rQ   rD  rE  rF  rG  rs  rS   )r1  r2  r3  r    r4  r    Tr   r   r    r   r6  r5  TNNN)&r  r!  r7  rO   r8  rT   r9  rT   r:  rT   r;  rT   r<  rT   r   rQ   r=  rT   r>  rT   r?  rT   r@  rO   rB  rT   rA  rT   rC  rQ   rD  rE  rz  r{  rF  rG  r   r|  ri   )r  r  rd   rV   r   r  )FTF)r  r  r  rQ   r  rQ   r  rQ   )4r  r  r  rV   r  rS   r  rT   r8  rT   r9  rT   r:  rT   r  rS   r  rQ   r  r  r=  rT   r>  rT   r<  rT   r7  rO   r  rQ   r*   rS   r?  rT   r@  rO   rB  rT   rA  rT   rd   rV   rz  r{  rD  rE  rF  rG  r  r  r   r  )4r  r  r  rV   r  rS   r  rT   r8  rT   r9  rT   r:  rT   r  rS   r  rQ   r  r  r=  rT   r>  rT   r<  rT   r7  rO   r  rQ   r*   rS   r?  rT   r@  rO   rB  rT   rA  rT   rd   rV   rz  r{  rD  rE  rF  rG  r  r  r   rB  )4r  rN   r  rV   r  rS   r  rT   r8  rT   r9  rT   r:  rT   r  rS   r  rQ   r  r  r=  rT   r>  rT   r<  rT   r7  rO   r  rQ   r*   rS   r?  rT   r@  rO   rB  rT   rA  rT   rd   rV   rz  r{  rD  rE  rF  rG  r  r  r   rB  ):rM  rN  rO  rP  rQ  rR  rS  rT  rU  rV  r  rT   r8  rT   r7  rO   r9  rT   r:  rT   r  rQ   r  r  r*   rS   rW  rX  r  rS   r>  rT   r=  rT   r<  rT   r?  rT   r@  rO   rB  rT   rA  rT   rd   rV   rD  rE  rF  rG  r  r  r  rU   r  rS   r   rY  )r_  r   r   r   )r   rn  )rl  rn  r   rx  )r   rO   )r   r   )r   rN   )r   rx  )r]   )r/  r  r  rO   r   r  )r  r!  r  r!  )NNr  N)r  rN   r  rV   r  r  r  r  r  r  r  r  r   r   r   r  )5__name__
__module____qualname____doc__r   rk   LLAMA_SPLIT_MODE_LAYERr   r   LLAMA_POOLING_TYPE_UNSPECIFIEDr   propertyr   rd   r  r  r
  r  r  r  r  r  r   r0  rr  rv  r  r  r  rA  rJ  rL  r^  rh  rj  rm  r  r  r+   r  rt   rH   r   r   r   r1   r  r  staticmethodr  r  classmethodr  r  rq   ro   r   r   7   s       ::! #:.2 JN0#')- 9%D #!$!%"% $ #   %)#'"$#'#'!&%)OS1526 $ $ ug g g g g gR    X ! ! ! X! / / / X/ / / / X/ S S S XS 
 
 
 X
 BG@ @ @ @ @* ,0	
 
 
 
 
(        #& #& #& #&N  ##&"%!! :>*.!W W W W Wv  ##&"%!! :>*.!#< < < < <B  ##&"%!! :><@*.'J J J J JZ DH&
 &
 &
 &
 &
V  "w w w w wx !%$& "&02#&"% #"!!#<@:>*.155j	
 j	
 j	
 j	
 j	
^ !%$& "&02#&"% #"!!#<@:>*.155_ _ _ _ _H !%$& "&02#&"% #"!!#<@:>*.155Z
 Z
 Z
 Z
 Z
~ =AEI48@D 02"IM$("%#& #!!#:>*.15#'&*;e
 e
 e
 e
 e
N   B6
 6
 6
p  
 
 
 
<A A A A! ! ! !$ $ $ $% % % %$ $ $ $' ' ' '' ' ' '& & & &( ( ( (       AC# # # # \#"    \ 
 ,0<@?E<@|
 |
 |
 |
 [|
 |
 |
rq   r   c                      e Zd ZddZdS )rn  r   r   r   r  re   rO   rp  r  rq  r*   c                Z    || _         || _        || _        || _        || _        || _        d S ri   )r   r   re   rp  rq  r*   )rn   r   r   re   rp  rq  r*   s          ro   r   zLlamaState.__init__@	  s4     # & 0			rq   N)r   r   r   r  re   rO   rp  r  rq  rO   r*   rO   )r  r  r  r   r  rq   ro   rn  rn  ?	  s(             rq   rn  c                      e Zd ZddZdS )	r  r   r   r   r  r   c                (    | D ]} |||          }|S ri   r  )rn   r   r   	processors       ro   rL  zLogitsProcessorList.__call__W	  s+      	2 	2IYy&11FFrq   Nr  r  r  r  rL  r  rq   ro   r  r  V	  s(             rq   r  c                      e Zd Zd	dZdS )
StoppingCriteriaListr   r   r/  r  r   rQ   c                >    t          fd| D                       S )Nc                (    g | ]} |          S r  r  )r{   rz  r   r/  s     ro   r  z1StoppingCriteriaList.__call__.<locals>.<listcomp>f	  s(    WWW=N%%i88WWWrq   )any)rn   r   r/  s    ``ro   rL  zStoppingCriteriaList.__call__c	  s-     WWWWWRVWWWXXXrq   N)r   r   r/  r  r   rQ   r  r  rq   ro   r  r  b	  s.        Y Y Y Y Y Yrq   r  c                      e Zd ZddZdd
ZdS )MinTokensLogitsProcessor
min_tokensrO   r   c                0    || _         || _        d | _        d S ri   )r  r   r  )rn   r  r   s      ro   r   z!MinTokensLogitsProcessor.__init__j	  s    $"!rq   r   r   r   r  r   c                    | j         t          |          | _         t          |          | j         z
  | j        k     rt          j         || j        <   |S ri   )r  r   r  r   infr   )rn   r   r   s      ro   rL  z!MinTokensLogitsProcessor.__call__o	  sK     %!$YDy>>D..@@&(fWF4>"rq   N)r  rO   r   rO   r  )r  r  r  r   rL  r  rq   ro   r  r  i	  s<        " " " "
     rq   r  )D
__future__r   r   r   r  r  r  r   r   r  r  r  r   r   r   r   r   r   r   r	   r
   r   r   r   r   collectionsr   pathlibr   llama_typesllama_grammarr   llama_cacher   r   r   r   llama_tokenizerr   r   llama_cpp.llama_cpprk   llama_cpp.llama_chat_formatr   llama_cpp.llama_speculativer   numpyr   numpy.typingnptllama_cpp._internals
_internalsr   _loggerr   _utilsr   r   rn  NDArrayr   r   LogitsProcessorr  rQ   StoppingCriteriar  r  r  rq   ro   <module>r     s   " " " " " " 				 



                                                          ' ' ' ' ' '            @ ? ? ? ? ? ? ? ' ' ' ' ' ' 7 7 7 7 7 7 7 7 7 7 7 7           ( ( ( ( ( (             * * * * * *E$
 E$
 E$
 E$
 E$
 E$
 E$
 E$
PH       $ [3;ry12CK	4JJ
    $/    S[13;ry3IJDPQ Y Y Y Y Y4 01 Y Y Y         rq   