
    Pi4                    x    d dl mZ d dlZd dlmZmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZ  G d d          ZdS )    )annotationsN)DictOptionalUnionList)ModelSettingsc                  T    e Zd ZddZddd
ZddZddZd Zd Ze	dd            Z
dS )
LlamaProxymodelsList[ModelSettings]returnNonec                J   t          |          dk    s
J d            i | _        |D ]$}|j        s|j        |_        || j        |j        <   %d | _        d | _        |d         | _        | j        j        | _        |                     | j                  | _        | j        | _        d S )Nr   zNo models provided!)	len_model_settings_dictmodel_aliasmodel_current_model_current_model_alias_default_model_settings_default_model_aliasload_llama_from_model_settings)selfr   r   s      j/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/llama_cpp/server/model.py__init__zLlamaProxy.__init__   s    6{{Q 5>@! 	A 	AE$ 0$)K!;@D%e&7889=37!6<Qi$)-)E)Q! #AA(
 
 %)$=!!!    Nr   Optional[str]llama_cpp.Llamac                   || j         }|| j        vr| j         }|| j        k    r| j        | j        S | j        r| j                                         d | _        | j        |         }|                     |          | _        || _        | j        S N)r   r   r   r   closer   r   r   settingss      r   __call__zLlamaProxy.__call__$   s    =-E111-ED---".** 	(%%'''",U3"AA(KK$)!""r   strc                @    | j         |                                         S r    )r   
model_dumpr   r   s     r   __getitem__zLlamaProxy.__getitem__8   s    (/::<<<r   r#    Union[ModelSettings, str, bytes]c                z    t          |t          t          f          rt          j        |          }|| j        |<   d S r    )
isinstancebytesr%   r   model_validate_jsonr   r"   s      r   __setitem__zLlamaProxy.__setitem__;   s<    h-- 	C$8BBH+3!%(((r   c              #  &   K   | j         D ]}|V  d S r    )r   r(   s     r   __iter__zLlamaProxy.__iter__@   s-      . 	 	EKKKK	 	r   c                N    | j         r| j                                          | ` d S d S r    )r   r!   )r   s    r   freezLlamaProxy.freeD   s8     	$%%'''###	$ 	$r   r   c                F   d }| j         dk    r}| j        
J d            | j        8t          j        j                            | j        | j        | j                  }nt          j                            | j        | j                  }nV| j         dk    r}| j        
J d            | j        8t          j        j                            | j        | j        | j                  }nt          j                            | j        | j                  }n| j         dk    r}| j        
J d            | j        8t          j        j	                            | j        | j        | j                  }nst          j        	                    | j        | j                  }nF| j         dk    r}| j        
J d            | j        8t          j        j
                            | j        | j        | j                  }nt          j        
                    | j        | j                  }n| j         dk    r}| j        
J d            | j        8t          j        j                            | j        | j        | j                  }nct          j                            | j        | j                  }n6| j         d	k    r}| j        
J d            | j        8t          j        j                            | j        | j        | j                  }nt          j                            | j        | j                  }n| j         d
k    r}| j        
J d            | j        8t          j        j                            | j        | j        | j                  }nSt          j                            | j        | j                  }n&| j         dk    r{| j        
J d            | j        7t          j        j                            | j        | j        | j                  }nt          j                            | j        | j                  }n| j         dk    r6| j        
J d            t          j                            | j                  }n_| j         dk    rT| j        
J d            t          j                            t'          j        t+          | j                                      }d }| j        $t,          j                            | j                  }d }| j        t3          j        | j                  }d }| j        t;          | j        t<                    sJ i }| j        D ]}|                    d          \  }}d|v r|                    d          \  }}|dk    r|                                 dv ||<   V|dk    rtC          |          ||<   o|dk    rtE          |          ||<   |dk    r|||<   tG          d|           dd l$}	i }
| j        2|	%                    t          j&        j        | j        | j'                  }nt          j&        }| j'        |
d<    |dGi |
i d| j(        d| j)        d| j*        d| j+        d | j,        d!| j-        d"| j.        d#|d$| j/        d%| j0        d&| j1        d'| j2        d(| j3        d)| j4        d*| j5        d+| j6        d,| j7        d-| j8        d.| j9        d/| j:        d0| j;        d1| j<        d2| j=        d3| j>        d4| j?        d5| j@        d6| jA        d7| jB        d8| jC        d9| jD        d:| jE        d;| jF        d<| j         d=|d>|d?| jG        d@| jH        dA|dB| j        }| jI        r| jJ        dCk    r9| j        rt          dD| jL                    t          jM        | jL        E          }n8| j        rt          dF| jL                    t          jN        | jL        E          }|O                    |           |S )HNz	llava-1-5zclip model not found)repo_idfilenameverbose)clip_model_pathr7   obsidianz	llava-1-6	moondream	nanollavazllama-3-vision-alphazminicpm-v-2.6z
qwen2.5-vlzhf-autotokenizerzAhf_pretrained_model_name_or_path must be set for hf-autotokenizerzhf-tokenizer-configz<hf_tokenizer_config_path must be set for hf-tokenizer-config)num_pred_tokens=:bool)true1intfloatr%   zUnknown value type r   )r5   r6   
model_pathn_gpu_layers
split_modemain_gputensor_split
vocab_onlyuse_mmap	use_mlockkv_overridesrpc_serversseedn_ctxn_batchn_ubatch	n_threadsn_threads_batchrope_scaling_typerope_freq_baserope_freq_scaleyarn_ext_factoryarn_attn_factoryarn_beta_fastyarn_beta_slowyarn_orig_ctx	mul_mat_q
logits_all	embeddingoffload_kqv
flash_attnlast_n_tokens_size	lora_base	lora_pathnumachat_formatchat_handlerdraft_modeltype_ktype_v	tokenizerr7   diskzUsing disk cache with size )capacity_byteszUsing ram cache with size  )Pre   r8   hf_model_repo_id	llama_cppllama_chat_formatLlava15ChatHandlerfrom_pretrainedr7   ObsidianChatHandlerLlava16ChatHandlerMoondreamChatHandlerNanoLlavaChatHandlerLlama3VisionAlphaMiniCPMv26ChatHandlerQwen25VLChatHandler hf_pretrained_model_name_or_path+hf_autotokenizer_to_chat_completion_handlerhf_tokenizer_config_path.hf_tokenizer_config_to_chat_completion_handlerjsonloadopenllama_tokenizerLlamaHFTokenizerrg   llama_speculativeLlamaPromptLookupDecodingdraft_model_num_pred_tokensrL   r,   listsplitlowerrB   rC   
ValueError	functoolspartialLlamar   rE   rF   rG   rH   rI   rJ   rK   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   rh   ri   cache
cache_typeprint
cache_sizeLlamaDiskCacheLlamaRAMCache	set_cache)r#   rf   rj   rg   rL   kvkeyvalue
value_typer   kwargs	create_fn_modelr   s                 r   r   z)LlamaProxy.load_llama_from_model_settingsI   sk
   ;..+779O777(4/BRR ( 9!)!9 ( 0 S     ):MM$,$<hFV  N     !Z//+779O777(4/CSS ( 9!)!9 ( 0 T     ):NN$,$<hFV  O     ![00+779O777(4/BRR ( 9!)!9 ( 0 S     ):MM$,$<hFV  N     ![00+779O777(4/DTT ( 9!)!9 ( 0 U     ):OO$,$<hFV  P     ![00+779O777(4/DTT ( 9!)!9 ( 0 U     ):OO$,$<hFV  P     !%;;;+779O777(4/AQQ ( 9!)!9 ( 0 R     ):LL$,$<hFV  M     !_44+779O777(4/EUU ( 9!)!9 ( 0 V     ):PP$,$<hFV  Q     !\11+779O777(4/CSS ( 9!)!9 ( 0 T     ):NN$,$<hFV  O     !%7779EER FEE +WW=  L
 !%:::1==M >==$6ee	$x@AABB L =A	4@'8HH9 I ++E ( D  K KO ,h3T:::::L+ M MXXc]]
U%<<(-C(8(8%J!V++,1KKMM],JS))#u,,,/JJS))#w..,1%LLS))#u,,,1S))()Kz)K)KLLL   	$0!))/ 1! *  II "I#+>F<  3
 3
3
 3
 3
 "..3
  **	3

 &&3
 "..3
  **3
 &&3
 ((3
 &3
 !,,3
 3
 ..3
 $$3
  &&!3
" ((#3
$ %44%3
& '88'3
( $22)3
* %44+3
, %44-3
. &66/3
0 $2213
2 $2233
4 #0053
6 ((73
8  **93
: ((;3
< !,,=3
>  **?3
B  (::C3
F ((G3
H ((I3
L M3
P !,,Q3
R &S3
V $W3
Z ??[3
\ ??]3
`  ia3
d $$e3
 3
h > 		$"f,,# OM8KMMNNN!0@STTT# NLx7JLLMMM!/x?RSSSU###r   )r   r   r   r   r    )r   r   r   r   )r   r%   )r   r%   r#   r*   )r#   r   r   r   )__name__
__module____qualname__r   r$   r)   r/   r1   r3   staticmethodr   rm   r   r   r
   r
      s        > > > >*# # # # #(= = = =4 4 4 4
  $ $ $
 n n n \n n nr   r
   )
__future__r   r~   typingr   r   r   r   ro   llama_cpp.llama_speculativer   llama_cpp.llama_tokenizerr   llama_cpp.server.settingsr   r
   rm   r   r   <module>r      s    " " " " " "  . . . . . . . . . . . .     7 7 7 7 7 7 3 3 3 3 3 3 3 3 3 3 3 3j j j j j j j j j jr   