
    Piv!                        d dl mZ d dlZd dlmZmZmZmZmZm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlZdej        d<    G d	 d
e          Z G d de          Z G d dee          Z G d de          ZdS )    )annotationsN)OptionalListLiteralUnionDictcast)Self)Fieldmodel_validator)BaseSettings protected_namespacesc                     e Zd ZU dZ ed          Zded<    edd          Zd	ed
<    eddd          Zded<    ee	j
        d          Zded<    eddd          Zded<    edd          Zded<    edd          Zded<    e e	j                    d          Zded<    e e	j                    d          Zded<    edd           Zd!ed"<    edd#          Zd	ed$<    ee	j        d%          Zded&<    ed'dd(          Zded)<    ed*d+d,          Zded-<    ed*d+d.          Zded/<    e e ej                    d0z  d+          d+d1          Zded2<    e e ej                    d+          dd3          Zded4<    ee	j        5          Z ded6<    ed7d8          Z!d9ed:<    ed7d;          Z"d9ed<<    ed=5          Z#d9ed><    ed?5          Z$d9ed@<    edA5          Z%d9edB<    ed?5          Z&d9edC<    ed5          Z'dedD<    edEdF          Z(dedG<    edEdH          Z)dedI<    eddJ          Z*dedK<    edEdL          Z+dedM<    eddN          Z,dedO<    edPddQ          Z-dedR<    eddS          Z.d	edT<    eddU          Z/d	edV<    eddW          Z0dXedY<    eddZ          Z1d	ed[<    edd\          Z2d	ed]<    edd^          Z3ded_<    ed`da          Z4dbedc<    eddde          Z5dedf<    eddg          Z6d	edh<    eddi          Z7d	edj<    eddk          Z8d	edl<    eddm          Z9d	edn<    edodp          Z:dedq<    eddr          Z;dsedt<    eddu          Z<dsedv<    edEdw          Z=dedx<    e>dyz          d~d}            Z?dS )ModelSettingsz*Model settings used to load a Llama model.z8The path to the model to use for generating completions.)descriptionstrmodelNz9The alias of the model to use for generating completions.defaultr   Optional[str]model_aliasr   z_The number of layers to put on the GPU. The rest will be on the CPU. Set -1 to move all to GPU.)r   ger   intn_gpu_layerszThe split mode to use.
split_modezMain GPU to use.main_gpuz0Split layers across multiple GPUs in proportion.zOptional[List[float]]tensor_splitFz&Whether to only return the vocabulary.bool
vocab_onlyz	Use mmap.use_mmapz
Use mlock.	use_mlockzList of model kv overrides in the format key=type:value where type is one of (bool, int, float). Valid true values are (true, TRUE, 1), otherwise false.zOptional[List[str]]kv_overridesz2comma seperated list of rpc servers for offloadingrpc_serverszRandom seed. -1 for random.seedi   zThe context size.n_ctxi      zThe batch size to use per eval.n_batchz)The physical batch size used by llama.cppn_ubatch   z8The number of threads to use. Use -1 for max cpu threads	n_threadszNThe number of threads to use when batch processing. Use -1 for max cpu threadsn_threads_batch)r   rope_scaling_typeg        zRoPE base frequencyfloatrope_freq_basezRoPE frequency scaling factorrope_freq_scaleg      yarn_ext_factorg      ?yarn_attn_factorg      @@yarn_beta_fastyarn_beta_slowyarn_orig_ctxTz+if true, use experimental mul_mat_q kernels	mul_mat_qzWhether to return logits.
logits_allzWhether to use embeddings.	embeddingz"Whether to offload kqv to the GPU.offload_kqvzWhether to use flash attention.
flash_attn@   z5Last n tokens to keep for repeat penalty calculation.last_n_tokens_sizezoOptional path to base model, useful if using a quantized base model and you want to apply LoRA to an f16 model.	lora_basez*Path to a LoRA file to apply to the model.	lora_pathzEnable NUMA support.zUnion[bool, int]numazChat format to use.chat_formatz<Path to a CLIP model to use for multi-modal chat completion.clip_model_pathz=Use a cache to reduce processing times for evaluated prompts.cacheramz5The type of cache to use. Only used if cache is True.zLiteral['ram', 'disk']
cache_typel        z;The size of the cache in bytes. Only used if cache is True.
cache_sizez5The path to a HuggingFace tokenizer_config.json file.hf_tokenizer_config_pathz~The model name or path to a pretrained HuggingFace tokenizer model. Same as you would pass to AutoTokenizer.from_pretrained(). hf_pretrained_model_name_or_pathz=The model repo id to use for the HuggingFace tokenizer model.hf_model_repo_idzHMethod to use for speculative decoding. One of (prompt-lookup-decoding).draft_model
   z2Number of tokens to predict using the draft model.draft_model_num_pred_tokensz#Type of the key cache quantization.zOptional[int]type_kz%Type of the value cache quantization.type_vz#Whether to print debug information.verbosebefore)modereturnr
   c                    t          j                    }t          t          t          t
          f         |           }|                    dd          dk    r||d<   |                    dd          dk    r||d<   | S )Nr,   r   r   r-   )multiprocessing	cpu_countr	   r   r   r   get)selfrU   valuess      m/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/llama_cpp/server/settings.pyset_dynamic_defaultsz"ModelSettings.set_dynamic_defaults   sv    
 $-//	d38nd++::k1%%++"+F;::'++r11(1F$%    )rR   r
   )@__name__
__module____qualname____doc__r   r   __annotations__r   r   	llama_cppLLAMA_SPLIT_MODE_LAYERr   r   r   r!   llama_supports_mmapr"   llama_supports_mlockr#   r$   r%   LLAMA_DEFAULT_SEEDr&   r'   r)   r*   maxrT   rU   r,   r-   #LLAMA_ROPE_SCALING_TYPE_UNSPECIFIEDr.   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r=   r>   r?   r@   rA   rB   rC   rE   rF   rG   rH   rI   rJ   rL   rM   rN   rO   r   rZ   r   r[   rY   r   r      sJ	        44N  E     "'O" " "K    
 u  L    
 e0,  J     E&  H    
 +0%F+ + +L     u#K  J     U-	-//  H     e.	.00   I     ). o) ) )L     "'H" " "K    
 ,:W  D     t7JKKKEKKKK5'H  G     E'R  H     U-O-//14a88N  I    
 !5-O-//33d  O    
 #U=       "E#;PQQQNQQQQ"U!@  O     #U4000O0000#eC0000000!E$///N////!E#...N....q)))M))))e"O  I     uT7RSSSJSSSSeE7STTTITTTT"F  K     u#D  J     $eK        %u F     I      %u@     I    
 #U*  D    
 "')" " "K     &+UR& & &O    
 %S  E     */K* * *J     eQ  J    
 /4eK/ / /     7<e U7 7 7$    
 ',eS' ' '    
 "'^" " "K     (-uH( ( (    
 "E9  F     "E;  F    
 E"G  G     _       r[   r   c                  4   e Zd ZU dZ edd          Zded<    edd          Zd	ed
<    edd          Zded<    edd          Z	ded<    edd          Z
ded<    edd          Zded<    edd          Zded<    edd          Zded<   dS )ServerSettingszAServer settings used to configure the FastAPI and Uvicorn server.	localhostzListen addressr   r   hosti@  zListen portr   portNzSSL key file for HTTPSr   ssl_keyfilezSSL certificate file for HTTPSssl_certfilezIAPI key for authentication. If set all requests need to be authenticated.api_keyTz=Whether to interrupt requests when a new request is received.r    interrupt_requestsFz;Disable EventSource pings (may be needed for some clients).disable_ping_events zIThe root path for the server. Useful when running behind a reverse proxy.	root_path)r\   r]   r^   r_   r   rk   r`   rl   rm   rn   ro   rp   rq   rs   r   r[   rY   ri   ri      s        KK k7GHHHDHHHHd>>>D>>>>!&":" " "K     #(%"B# # #L     #U_  G      %uS          !&Q! ! !     U_  I      r[   ri   c                      e Zd ZdS )SettingsN)r\   r]   r^   r   r[   rY   ru   ru      s        Dr[   ru   c                  8    e Zd ZU dZ eg d          Zded<   dS )ConfigFileSettingsz#Configuration file format settings.zModel configsr   zList[ModelSettings]modelsN)r\   r]   r^   r_   r   rx   r`   r   r[   rY   rw   rw      s8         --"'%"P"P"PFPPPPPPr[   rw   )
__future__r   rT   typingr   r   r   r   r   r	   typing_extensionsr
   pydanticr   r   pydantic_settingsr   ra   model_configr   ri   ru   rw   r   r[   rY   <module>r      sr   " " " " " "     = = = = = = = = = = = = = = = = " " " " " " + + + + + + + + * * * * * *     57 0 1v v v v vL v v vr    \   >	 	 	 	 	~} 	 	 	Q Q Q Q Q Q Q Q Q Qr[   