
    PitL                       U d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dlm
Z
mZmZmZ d dlZd dlZd dlmZ d dlmZmZ d dlmZmZmZmZmZmZmZ d d	lmZ d d
lmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+m,Z,m-Z-m.Z. d dl/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8 d dl9m:Z:  ee:          Z;da<de=d<   ddZ>d Z?da@de=d<    e            ZA e            ZBddZCd ZDdaEd e=d!<   dd$ZF	 	 	 ddd)ZGdd2ZHdd7ZIdd;ZJ e!d<=          ZK ee?           eeK          fddAZLdBZMe;N                    dCdD eeL          geejO        ePf         dEdFdGdHdIigdJdKidGdLdMdNdOidPdQieMgR          e;N                    dSd< eeL          geMgT          ddW                        ZQe;N                    dXdY eeL          geMgZ           eeD          fdd\            ZRe;N                    d]d^ eeL          geejS        ePf         dEdFdGdHd_igdJdKidGdLd`dadOidPdQieMgR           edbdcdddedfdgdhdfgdidjdkdcdddedfdgdldfgdmdnidodjdpdcdddedfdgdqdfgdrdsdtdudmdLidmdvidwdxdygdzd{d|gdrdxdsid|d}djd~dcdddedfdgdhdfgddddjd          fdd            ZTe;U                    dd eeL          geMgZ           eeD          fdd            ZVdZWe;N                    dd eeL          geWgZ           eeD          fdd            ZXe;N                    dd eeL          geWgZ           eeD          fdd            ZYe;N                    dd eeL          geWgZ           eeD          fdd            ZZdS )    )annotationsN)Lock)partial)ListOptionalUnionDict)MemoryObjectSendStream)run_in_threadpooliterate_in_threadpool)DependsFastAPI	APIRouterRequestHTTPExceptionstatusBody)
Middleware)CORSMiddleware)
HTTPBearer)EventSourceResponse)RequestIdPlugin)RawContextMiddleware)
LlamaProxy)ConfigFileSettingsSettingsModelSettingsServerSettings)	CreateCompletionRequestCreateEmbeddingRequestCreateChatCompletionRequest	ModelListTokenizeInputRequestTokenizeInputResponseTokenizeInputCountResponseDetokenizeInputRequestDetokenizeInputResponse)RouteErrorHandler)route_classzOptional[ServerSettings]_server_settingsserver_settingsr   c                
    | a d S Nr*   )r+   s    h/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/llama_cpp/server/app.pyset_server_settingsr0   5   s    &    c               #     K   t           V  d S r-   r.    r1   r/   get_server_settingsr4   :   s      
r1   zOptional[LlamaProxy]_llama_proxymodel_settingsList[ModelSettings]c                &    t          |           ad S )N)models)r   r5   r6   s    r/   set_llama_proxyr;   D   s    ^444LLLr1   c                  K   t                                            d {V  d} 	 t                                           d {V  	 t                                            d} t          W V  t                                           n# t                                           w xY w	 | rt                                            d S d S # | rt                                            w w xY w)NTF)llama_outer_lockacquirellama_inner_lockreleaser5   )release_outer_locks    r/   get_llama_proxyrB   I   s      
"
"
$
$$$$$$$$
'&&(((((((((	'$$&&&!&$$&&&&$$&&&&& 	'$$&&&&&	' 	' 	'$$&&&&	's#   C %B *C BC C z+typing.Optional[typing.Callable[[], bytes]]_ping_message_factoryfactorytyping.Callable[[], bytes]c                
    | a d S r-   )rC   )rD   s    r/   set_ping_message_factoryrG   _   s    #r1   settingsSettings | NoneServerSettings | NoneList[ModelSettings] | Nonec                `   t           j                            dd           }|t           j                            |          st          d| d          t          |d          5 }|                    d          s|                    d          r>dd l}t          j
        t          j        |                    |                              }n&t          j
        |                                          }t          j        |          }|j        }d d d            n# 1 swxY w Y   |;|9| t%                      } t          j        |           }t'          j        |           g}||
J d            t)          |           t+          t,          t/                      f	          g}t1          |d
t2          j        |j                  }|                    t:          dgddgdg           |                    t>                     |J tA          |           |j!        rtE          d            |S )NCONFIG_FILEzConfig file z not found!rbz.yamlz.ymlr   z<server_settings and model_settings must be provided together)pluginsu   🦙 llama.cpp Python API)
middlewaretitleversion	root_path*T)allow_originsallow_credentialsallow_methodsallow_headersr:   c                     t                      S r-   )bytesr3   r1   r/   <lambda>zcreate_app.<locals>.<lambda>   s
     r1   )#osenvirongetpathexists
ValueErroropenendswithyamlr   model_validate_jsonjsondumps	safe_loadreadr   model_validater9   r   r   r0   r   r   r   r   	llama_cpp__version__rS   add_middlewarer   include_routerrouterr;   disable_ping_eventsrG   )	rH   r+   r6   config_filefrd   config_file_settingsrP   apps	            r/   
create_appru   d   s   
 *..55Kw~~k** 	FDKDDDEEE+t$$ 	9##G,, X0D0DV0L0L X'9'MJt~~a0011( ($$ (:'Maffhh'W'W$,;<PQQO18N	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 >#9zzH(7AA'6x@@A 	#(B(BE )C(BB (((1O<M<M;OPPPQJ
)%!+	  C eee     v%%%>2222* 2 111Js   &B*DD #D body5CreateCompletionRequest | CreateChatCompletionRequestllama_proxyr   
body_model
str | Nonereturnllama_cpp.Llamac                   |t          t          j        d           ||          }| j        *| j        dk    rt          || j                  n| j        |d<   | j        't          j        	                    | j                  |d<   | j
        dk    rdt          j        t          j        | j
        |                                          g          }d|vr||d<   n|d                             |           |S )NzService is not availablestatus_codedetailtokens
logit_biasgrammarr   logits_processor)r   r   HTTP_503_SERVICE_UNAVAILABLEr   logit_bias_type_logit_bias_tokens_to_input_idsr   rk   LlamaGrammarfrom_string
min_tokensLogitsProcessorListMinTokensLogitsProcessor	token_eosextend)rv   rx   ry   kwargsllama_min_tokens_logits_processors         r/   prepare_request_resourcesr      s    ;-
 
 
 	
 K
##E" #x// ,E4?CCC 	| |%2>>t|LLy'0'D/ARARSST(
 (
$ V++)EF%&&%&--.JKKKLr1   requestr   inner_send_chan"MemoryObjectSendStream[typing.Any]c           
       K   t          t                                }|r|j        nd} t          j        t
                                4 d {V }t          ||||          }	|4 d {V  	 t          ||	fi | d {V }
t          |
          2 3 d {V }|	                    t          t          j        |                               d {V  |                                  d {V r t          j                                |r]t                                           rD|	                    t          d                     d {V   t          j                                6 |	                    t          d                     d {V  nm# t          j                    $ rT}t%          d           t          j        dd          5  t%          d| j                    |# 1 swxY w Y   Y d }~nd }~ww xY wd d d           d {V  n# 1 d {V swxY w Y   d d d           d {V  d S # 1 d {V swxY w Y   d S )	NF)dataz[DONE]disconnected   T)shieldz-Disconnected from client (via refresh/close) )nextr4   interrupt_requests
contextlibasynccontextmanagerrB   r   r   r   senddictrf   rg   is_disconnectedanyioget_cancelled_exc_classr=   lockedprintmove_on_afterclient)r   r   rv   ry   
llama_callr   r+   r   rx   r   iteratorchunkes                r/   get_event_publisherr      s      .0011O.=H**5  ?z-o>>@@       K)$ZPP" 	 	 	 	 	 	 	 	!2:u!O!O!O!OOOOOOO#8#B#B @ @ @ @ @ @ @%)..tE9J9J/K/K/KLLLLLLLLL$4466666666 @=e;==???) @.>.E.E.G.G @-224X3F3F3FGGGGGGGGG=e;==??? $C &**4X+>+>+>??????????022   n%%%(4888  XXX   G	            	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	                             s   H5+H-#FE'C;FHG<(%G7G'	'G++G7.G+/G72H7G<<H?H5
H	H5H	H55
H?H?r   r   Dict[str, float]c                    i }|                                 D ]G\  }}|                    d          }|                     |dd          D ]}||t          |          <   H|S )Nutf-8FT)add_bosspecial)itemsencodetokenizestr)r   r   to_biastokenscoreinput_ids         r/   r   r      sw     !#G"((** + +uW%%ueTJJ 	+ 	+H%*GCMM""	+Nr1   F)
auto_errorr   authorizationOptional[str]c                   K   | j         dS |r|j        | j         k    r|j        S t          t          j        d          )NTzInvalid API keyr~   )api_keycredentialsr   r   HTTP_401_UNAUTHORIZED)rH   r   s     r/   authenticater      s[      
 t  )2h6FFF(( 0    r1   z	OpenAI V1z/v1/completions
Completion200zSuccessful Responseschemaz$refz-#/components/schemas/CreateCompletionResponsez&Completion response, when stream=False)anyOfrQ   stringzServer Side Streaming response, when stream=True. See SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_formatzQdata: {... see CreateCompletionResponse ...} \n\n data: ... \n\n ... data: [DONE])typerQ   example)zapplication/jsonztext/event-stream)descriptioncontent)summarydependenciesresponse_model	responsestags%/v1/engines/copilot-codex/completions)include_in_schemar   r   r   llama_cpp.Completionc                  K   t          |j        t                    rFt          |j                  dk    sJ t          |j                  dk    r|j        d         nd|_        | j        j        dk    r|j        nd}h d}|                    |          }|                    dd	          rWt          j
        d
          \  }}t          |t          t          | |||t          j        j        |          dt"                    S  t%          j        t(                                4 d {V }t+          ||||          }|                                  d {V r2t/          d| j                    t3          t4          j        d          t9          |fi | d {V cd d d           d {V  S # 1 d {V swxY w Y   d S )Nr   r    r   zcopilot-codex>   nuserbest_ofr   r   excludestreamF
   r   r   rv   ry   r   r   
data_sender_callablesepping_message_factory@Disconnected from client (via refresh/close) before llm invoked Client closed requestr~   )
isinstancepromptlistlenurlr_   model
model_dumpr^   r   create_memory_object_streamr   r   r   rk   Llama__call__rC   r   r   rB   r   r   r   r   r   r   HTTP_400_BAD_REQUESTr   	r   rv   ry   r   r   	send_chan	recv_chanrx   r   s	            r/   create_completionr     s     V $+t$$ E4;1$$$$(+DK(8(81(<(<dk!nn" ;FFF 	

   G __W_--F zz(E"" 
$@DD	9"!(# )%$?3" " " !6
 
 
 	
  ?z-o>>@@ 8 8 8 8 8 8 8K)$ZPP((******** 	cSZSacc    "7.   
 'u77777777778 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8s   *A1F..
F8;F8z/v1/embeddings	Embedding)r   r   r   r    c                |   K   t           || j                  j        fi |                     dh           d {V S )Nr   r   )r   r   create_embeddingr   )r   rx   s     r/   r   r   h  sj       #GM""3 


fX

.
.        r1   z/v1/chat/completionsChatz1#/components/schemas/CreateChatCompletionResponsezServer Side Streaming response, when stream=TrueSee SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_formatzUdata: {... see CreateChatCompletionResponse ...} \n\n data: ... \n\n ... data: [DONE]zChat Completionzgpt-3.5-turbosystemzYou are a helpful assistant.)roler   r   zWhat is the capital of France?)r   messages)r   valuez	JSON Modez Who won the world series in 2020r   json_object)r   r   response_formatzTool CallingzExtract Jason is 30 years old.functionUserzUser recordobjectnumber)nameager  r  )r   
propertiesrequired)r  r   
parameters)r   r   )r   r   toolstool_choiceLogprobsTr   )r   r   logprobstop_logprobs)normal	json_modetool_callingr	  )openapi_examplesr!   llama_cpp.ChatCompletionc                  K   |j         }h d}|                    |          }|                    dd          rWt          j        d          \  }}t          |t          t          | |||t          j	        j
        |          dt                    S  t          j        t                                4 d {V }t          ||||          }|                                  d {V r2t#          d	| j                    t'          t(          j        d
          t-          |j
        fi | d {V cd d d           d {V  S # 1 d {V swxY w Y   d S )N>   r   r   r   r   r   r   Fr   r   r   r   r   r   r~   )r   r   r^   r   r   r   r   r   rk   r   create_chat_completionrC   r   r   rB   r   r   r   r   r   r   r   r   r   s	            r/   r  r  x  sR     \ J  G __W_--F zz(E"" 
$@DD	9"!(# )%$?A" " " !6
 
 
 	
  ?z-o>>@@ O O O O O O OK)$ZPP((******** 	cSZSacc    "7.   
 'u'CNNvNNNNNNNNO O O O O O O O O O O O O O O O O O O O O O O O O O O O O Os   8A6E
EEz
/v1/modelsModelsr"   c                $   K   dd | D             dS )Nr   c                    g | ]	}|d dg d
S )r   me)idr   owned_bypermissionsr3   ).0model_aliass     r/   
<listcomp>zget_models.<locals>.<listcomp>  s=     
 
 
  "! !	 
 
 
r1   )r   r   r3   )rx   s    r/   
get_modelsr    s8       
 
  +
 
 
  r1   Extrasz/extras/tokenizeTokenizer#   r$   c                   K    || j                                       | j                            d          d          }t	          |          S )Nr   Tr   )r   )r   r   inputr   r$   rv   rx   r   s      r/   r   r   +  sN       [$$--dj.?.?.H.HRV-WWF ////r1   z/extras/tokenize/countzTokenize Countr%   c                   K    || j                                       | j                            d          d          }t	          t          |                    S )Nr   Tr   )count)r   r   r!  r   r%   r   r"  s      r/   count_query_tokensr%  :  sT       [$$--dj.?.?.H.HRV-WWF%CKK8888r1   z/extras/detokenize
Detokenizer&   r'   c                   K    || j                                       | j                                      d          }t	          |          S )Nr   )text)r   
detokenizer   decoder'   )rv   rx   r(  s      r/   r)  r)  I  sH       ;tz""--dk::AA'JJD"----r1   )r+   r   )r6   r7   )rD   rE   )NNN)rH   rI   r+   rJ   r6   rK   )rv   rw   rx   r   ry   rz   r{   r|   )r   r   r   r   rv   rw   ry   rz   )r   r|   r   r   r{   r   )rH   r   r   r   )r   r   rv   r   r{   r   )r   r    rx   r   )r   r   rv   r!   r{   r  )rx   r   r{   r"   )rv   r#   rx   r   r{   r$   )rv   r#   rx   r   r{   r%   )rv   r&   rx   r   r{   r'   )[
__future__r   r\   rf   typingr   r   r   	functoolsr   r   r   r   r	   rk   anyio.streams.memoryr
   starlette.concurrencyr   r   fastapir   r   r   r   r   r   r   fastapi.middlewarer   fastapi.middleware.corsr   fastapi.securityr   sse_starlette.sser   starlette_context.pluginsr   starlette_context.middlewarer   llama_cpp.server.modelr   llama_cpp.server.settingsr   r   r   r   llama_cpp.server.typesr   r    r!   r"   r#   r$   r%   r&   r'   llama_cpp.server.errorsr(   ro   r*   __annotations__r0   r4   r5   r=   r?   r;   rB   rC   rG   ru   r   r   r   bearer_schemer   openai_v1_tagpostCreateCompletionResponser   r   r   ChatCompletionr  r^   r  
extras_tagr   r%  r)  r3   r1   r/   <module>rB     sG	   " " " " " " " 				                   . . . . . . . . . . . .      7 7 7 7 7 7 J J J J J J J J U U U U U U U U U U U U U U U U U U ) ) ) ) ) ) 2 2 2 2 2 2 ' ' ' ' ' ' 1 1 1 1 1 1 5 5 5 5 5 5 = = = = = =                
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 6 5 5 5 5 5 
0	1	1	1-1  1 1 1 1' ' ' '
   &* ) ) ) )466 466 5 5 5 5
' ' '& FJ  I I I I$ $ $ $ !%-1157 7 7 7 7t   B   D	 	 	 	 
e,,, !!455#*7=#9#9    (  ',''(*	
 	0 #%TU" "J	 %  ("]#~	 & 
 
. ?     B +',''(
	   68 68 68 C   N68r ',''(
	    &go66     ',''(13670  !'(["
 "J 	%  ("] $C	 & 
 
2 =   D )- -,!)6TUU!'4TUU! 	 	 ',!)6TUU!'4VWW! )/'> 
 
 *,!)6TUU!'4TUU! %/(./<,4170B06/A3& 3& 28/" /") ) $ !+"F%$ $/    D &,!)6TUU!'4TUU! !%$&  oC
 C
E) E) E)vO vO vO vOA @vOr ',''(
	    &go66    " 
 ',''(
	    &go660 0 0 0 0 ',''(
	    &go669 9 9 9 9 ',''(
	    &go66. . . . . . .r1   