
    &`i              .       h	   d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	m
Z
mZmZmZ d dlmZ d dlmZmZ d dlmZ d dlZd dlmZ d dlmZ d d	lmZ d d
lmZmZmZmZ d dl m!Z!m"Z"m#Z# d dl$m%Z%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4 d dl5m6Z6m7Z7m8Z8m9Z9m:Z: d dl;m<Z<m=Z=m>Z>m?Z? d dl@mAZAmBZB d dlCmDZD d dlEmFZF d dlGmHZH d dlImJZJmKZKmLZL d dlMmNZNmOZO d dlPmQZR  ejS        e#          ZT eOd          	 	 	 	 d^dedeUe8f         dedeVe7f         dedeVe:f         dedeVeJf         fd             ZW eOd          d!             ZX eOd"          d#             ZYeNd$e<fd%            ZZ eOd          d&eeef         d$efd'            Z[ eOd          de.j\        e.j\        e.j\        e.j\        e.j\        e.j\        e.j\        e.j\        e.j\        e.j\        e.j\        e.j\        e.j\        e.j\        e.j\        e.j\        e.j\        e.j\        e.j\        fd(e
e         d)e/eU         d*e/eU         d+e/e
ee]eUf                           d,e/eeUdf                  d-e/e         d.e/e	eeUe^f                           d/e/eU         d0e/e]         d1e/e
e                  d2e/e]         d3e/e]         d4e/eee6df                  d5e/e^         d6e/e^         d7e/e^         d8e/e^         de/eeeJdf                  d9e/eee9df                  d:e/e]         d$eegeBf         f*d;            Z_eN ed<=           G d> d?                                  Z`eN	 	 	 d_dAee`         dBeadCeadDead$e	eF         f
dE            Zb eOd          d<e"dFdd@d@dGdHeAdIead)eUd,e
eU         de
eeeJf                  dDeadJead$eFfdK            ZceN	 	 	 	 d`dAee`         dLeadBeadCeadDead$e	eF         fdM            Zd eOd          d@e"dFdd@d@fdHeAdLead)eUd,e
eU         de
eeeJf                  dDeadJead$eFfdN            Ze eOd          dad)eUdIeafdO            Zf eOdP          	 dbdRe
edSef                  dTe]fdU            Zg eOdP          d$eUfdV            Zh eOd"          d$eLfdW            Zi eOd"          d)eUd$eFfdX            ZjeN	 	 	 dcdYeUdZe
eU         d[ead\ead$eFf
d]            ZkdS )d    Nwraps)AnyCallableDictListOptionalSequenceTypeUnion)	dataclass)	APIRouterFastAPI)ASGIApp)cloudpickle)pickle_dumps)	build_app)DeploymentConfigReplicaConfighandle_num_replicas_autoprepare_imperative_http_options)"RAY_SERVE_FORCE_LOCAL_TESTING_MODESERVE_DEFAULT_APP_NAMESERVE_LOGGER_NAME)ASGIAppReplicaWrappermake_fastapi_class_based_view)make_local_deployment_handle)configure_component_logger)ServeUsageTag)DEFAULTDefaultcopy_class_metadataensure_serialization_contextextract_self_if_method_callvalidate_route_prefixwait_for_interrupt)AutoscalingConfigHTTPOptionsProxyLocationRequestRouterConfiggRPCOptions)ReplicaContext_get_global_client_get_internal_replica_context_set_global_client)Application
Deployment)RayServeException)DeploymentHandle)_ModelMultiplexWrapper)LoggingConfigServeInstanceDetailsServeStatus)DeveloperAPI	PublicAPI)apistable)	stabilityproxy_locationhttp_optionsgrpc_optionslogging_configc                 P    t          | |          }t          j        d|||d| dS )a  Start Serve on the cluster.

    Used to set cluster-scoped configurations such as HTTP options. In most cases, this
    does not need to be called manually and Serve will be started when an application is
    first deployed to the cluster.

    These cluster-scoped options cannot be updated dynamically. To update them, start a
    new cluster or shut down Serve on the cluster and start it again.

    These options can also be set in the config file deployed via REST API.

    Args:
        proxy_location: Where to run proxies that handle ingress traffic to the
          cluster (defaults to every node in the cluster with at least one replica on
          it). See `ProxyLocation` for supported options.
        http_options: HTTP config options for the proxies. These can be passed as an
          unstructured dictionary or the structured `HTTPOptions` class. See
          `HTTPOptions` for supported options.
        grpc_options: [EXPERIMENTAL] gRPC config options for the proxies. These can
          be passed as an unstructured dictionary or the structured `gRPCOptions`
          class See `gRPCOptions` for supported options.
        logging_config: logging config options for the serve component (
            controller & proxy).
    )r>   r?   global_logging_configN )r   _private_apiserve_start)r=   r>   r?   r@   kwargss        a/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/serve/api.pystartrH   D   sP    @ 3><PPL !!,  	        c                      	 t                      } n+# t          $ r t                              d           Y dS w xY w|                                  t          d           dS )zqCompletely shut down Serve on the cluster.

    Deletes all applications and shuts down Serve system actors.
    ONothing to shut down. There's no Serve application running on this Ray cluster.N)r-   r2   loggerinfoshutdownr/   clients    rG   rN   rN   m   sy    #%%   +	
 	
 	
 	 OOts    $99alphac                     K   	 t                      } n+# t          $ r t                              d           Y dS w xY w|                                  d{V  t          d           dS )zCompletely shut down Serve on the cluster asynchronously.

    Deletes all applications and shuts down Serve system actors.
    rK   N)r-   r2   rL   rM   shutdown_asyncr/   rO   s    rG   rS   rS      s      #%%   +	
 	
 	
 	 


!
!!!!!!!!ts    $;;returnc                  D    t                      } | t          d          | S )a!  Returns the deployment and replica tag from within a replica at runtime.

    A replica tag uniquely identifies a single replica for a Ray Serve
    deployment.

    Raises:
        RayServeException: if not called from within a Ray Serve deployment.

    Example:

        .. code-block:: python

            from ray import serve
            @serve.deployment
            class MyDeployment:
                def __init__(self):
                    # Prints "MyDeployment"
                    print(serve.get_replica_context().deployment)

    NzT`serve.get_replica_context()` may only be called from within a Ray Serve deployment.)r.   r2   )internal_replica_contexts    rG   get_replica_contextrW      s5    ,  =>>'$
 
 	

 $#rI   appc                 ^     ddt           t          t                            dt          f fd}|S )a  Wrap a deployment class with an ASGI application for HTTP request parsing.
    There are a few different ways to use this functionality.

    Example:

    FastAPI app routes are defined inside the deployment class.

        .. code-block:: python

            from ray import serve
            from fastapi import FastAPI

            app = FastAPI()

            @serve.deployment
            @serve.ingress(app)
            class MyFastAPIDeployment:
                @app.get("/hi")
                def say_hi(self) -> str:
                    return "Hello world!"

            app = MyFastAPIDeployment.bind()

    You can also use a standalone FastAPI app without registering
    routes inside the deployment.

    .. code-block:: python

        from ray import serve
        from fastapi import FastAPI

        app = FastAPI()

        @app.get("/hi")
        def say_hi():
            return "Hello world!"

        deployment = serve.deployment(serve.ingress(app)())
        app = deployment.bind()

    You can also pass in a builder function that returns an ASGI app.
    The builder function is evaluated when the deployment is initialized on
    replicas. This example shows how to use a sub-deployment inside the routes
    defined outside the deployment class.

    .. code-block:: python

        from ray import serve

        @serve.deployment
        class SubDeployment:
            def __call__(self):
                return "Hello world!"

        def build_asgi_app():
            from fastapi import FastAPI

            app = FastAPI()

            def get_sub_deployment_handle():
                return serve.get_deployment_handle(SubDeployment.name, app_name="my_app")

            @app.get("/hi")
            async def say_hi(handle: Depends(get_sub_deployment_handle)):
                return await handle.remote()

            return app

        deployment = serve.deployment(serve.ingress(build_asgi_app)())
        app = deployment.bind(SubDeployment.bind(), name="my_app", route_prefix="/")

    Args:
        app: the FastAPI app to wrap this class with.
            Can be any ASGI-compatible callable.
            You can also pass in a builder function that returns an ASGI app.
    NclsrT   c                 
      G d d          }| t          j                   st          d          t           t          j        j                  rt          d          t          t          t          f          rt                      d t          j                  rn1t                       t          j        t          d                     G  fdd t                     }t#          |            |S )	Nc                       e Zd Zd ZdS )9ingress.<locals>.decorator.<locals>.ASGIIngressDeploymentc                 "    || _         || _        d S N)argsrF   )selfr`   rF   s      rG   __init__zBingress.<locals>.decorator.<locals>.ASGIIngressDeployment.__init__  s     $DI"(DKKKrI   N)__name__
__module____qualname__rb   rC   rI   rG   ASGIIngressDeploymentr]     s#        ) ) ) ) )rI   rf   z)@serve.ingress must be used with a class.z>Classes passed to @serve.ingress may not have __call__ method.z!Failed to serialize the ASGI app.)	error_msgc                   &    e Zd Z fdZ fdZdS )6ingress.<locals>.decorator.<locals>.ASGIIngressWrapperc                      j         | g|R i | t          j                            d           t	          j         |            d S )N1)rb   r   FASTAPI_USEDrecordr   )ra   r`   rF   rZ   frozen_app_or_funcs      rG   rb   z?ingress.<locals>.decorator.<locals>.ASGIIngressWrapper.__init__(  sV    T3D333F333*11#666%.t5GHHHHHrI   c                    K   t          j        |            d {V  t          d          rMt          j        j                  r                    |            d {V  d S                     |            d S d S )N__del__)r   rp   hasattrinspectiscoroutinefunction)ra   rZ   s    rG   rp   z>ingress.<locals>.decorator.<locals>.ASGIIngressWrapper.__del__/  s      +3D999999999 3	** *23;?? *!kk$///////////D)))))	* *rI   N)rc   rd   re   rb   rp   )rZ   rn   s   rG   ASGIIngressWrapperri   '  sR        I I I I I I* * * * * * *rI   rt   )rr   isclass
ValueError
issubclasscollectionsabcr   
isinstancer   r   r   
isfunctionr#   r   loadsr   r   r"   )rZ   rf   rt   rn   rX   s   `  @rG   	decoratorzingress.<locals>.decorator  sY   ;) ) ) ) ) ) ) )
 (Cs## 	JHIIIc;?344 	P   cGY/00 	4)#s3337;c"" 		!$
 )***!,!2S,OPPP" "	* 	* 	* 	* 	* 	* 	* 	*&; 	* 	* 	*$ 	.444!!rI   r_   )r	   r   r   r   )rX   r}   s   ` rG   ingressr~      sG    ^7" 7"xS	* 7"h 7" 7" 7" 7" 7" 7"r rI   _func_or_classnameversionnum_replicasroute_prefixray_actor_optionsplacement_group_bundlesplacement_group_strategymax_replicas_per_nodeuser_configmax_ongoing_requestsmax_queued_requestsautoscaling_configgraceful_shutdown_wait_loop_sgraceful_shutdown_timeout_shealth_check_period_shealth_check_timeout_srequest_router_configmax_constructor_retry_countc                    |t           j        urt          d          |
t          d          |dk    r4d}t          |
|          \  }
}t          j                            d           d t                                                      D             }|dk    rt          d          |t           j        ddfvr|t           j        dfvrt          d	          t           j        urt          
                    d
           t          |t                    r|                                }t          j        ||nd|	|
|||||||||          t!          |          _        fd}t%          |           r ||           n|S )a4  Decorator that converts a Python class to a `Deployment`.

    Example:

    .. code-block:: python

        from ray import serve

        @serve.deployment(num_replicas=2)
        class MyDeployment:
            pass

        app = MyDeployment.bind()

    Args:
        _func_or_class: The class or function to be decorated.
        name: Name uniquely identifying this deployment within the application.
            If not provided, the name of the class or function is used.
        version: Version of the deployment. Deprecated.
        num_replicas: Number of replicas to run that handle requests to
            this deployment. Defaults to 1.
        route_prefix: Route prefix for HTTP requests. Defaults to '/'. Deprecated.
        ray_actor_options: Options to pass to the Ray Actor decorator, such as
            resource requirements. Valid options are: `accelerator_type`, `memory`,
            `num_cpus`, `num_gpus`, `resources`, and `runtime_env`.
        placement_group_bundles: Defines a set of placement group bundles to be
            scheduled *for each replica* of this deployment. The replica actor will
            be scheduled in the first bundle provided, so the resources specified in
            `ray_actor_options` must be a subset of the first bundle's resources. All
            actors and tasks created by the replica actor will be scheduled in the
            placement group by default (`placement_group_capture_child_tasks` is set
            to True).
            This cannot be set together with max_replicas_per_node.
        placement_group_strategy: Strategy to use for the replica placement group
            specified via `placement_group_bundles`. Defaults to `PACK`.
        max_replicas_per_node: The max number of replicas of this deployment that can
            run on a single node. Valid values are None (default, no limit)
            or an integer in the range of [1, 100].
            This cannot be set together with placement_group_bundles.
        user_config: Config to pass to the reconfigure method of the deployment. This
            can be updated dynamically without restarting the replicas of the
            deployment. The user_config must be fully JSON-serializable.
        max_ongoing_requests: Maximum number of requests that are sent to a
            replica of this deployment without receiving a response. Defaults to 5.
        max_queued_requests: [EXPERIMENTAL] Maximum number of requests to this
            deployment that will be queued at each *caller* (proxy or DeploymentHandle).
            Once this limit is reached, subsequent requests will raise a
            BackPressureError (for handles) or return an HTTP 503 status code (for HTTP
            requests). Defaults to -1 (no limit).
        autoscaling_config: Parameters to configure autoscaling behavior. If this
            is set, `num_replicas` should be "auto" or not set.
        graceful_shutdown_wait_loop_s: Duration that replicas wait until there is
            no more work to be done before shutting down. Defaults to 2s.
        graceful_shutdown_timeout_s: Duration to wait for a replica to gracefully
            shut down before being forcefully killed. Defaults to 20s.
        health_check_period_s: Duration between health check calls for the replica.
            Defaults to 10s. The health check is by default a no-op Actor call to the
            replica, but you can define your own health check using the "check_health"
            method in your deployment that raises an exception when unhealthy.
        health_check_timeout_s: Duration in seconds, that replicas wait for a health
            check method to return before considering it as failed. Defaults to 30s.
        logging_config: Logging config options for the deployment. If provided,
            the config will be used to set up the Serve logger on the deployment.
        request_router_config: Config for the request router used for this deployment.
        max_constructor_retry_count: Maximum number of times to retry the deployment
            constructor. Defaults to 20.
    Returns:
        `Deployment`
    z`route_prefix` can no longer be specified at the deployment level. Pass it to `serve.run` or in the application config instead.Nz2`max_ongoing_requests` must be non-null, got None.autork   c                 @    g | ]\  }}|d k    |t           j        u|S )r   )r    VALUE).0optionvalues      rG   
<listcomp>zdeployment.<locals>.<listcomp>  s>     $ $ $FE%%%%w}*D*D 	*D*D*DrI   r   z)num_replicas is expected to larger than 0zQManually setting num_replicas is not allowed when autoscaling_config is provided.zDeprecationWarning: `version` in `@serve.deployment` has been deprecated. Explicitly specifying version will raise an error in the future!   )r   r   r   r   r   r   r   r   r   r@   r   r   c           
      ,   t          j        | d d t          j        urnd t          j        urnd t          j        urnd t          j        urnd           }t	          t          j        urn| j        |t          j        urnd d          S )N)	init_argsinit_kwargsr   r   r   r   T)r   	_internal)r   creater    r   r1   rc   )	r   replica_configdeployment_configr   r   r   r   r   r   s	     rG   r}   zdeployment.<locals>.decorator  s    &-%6gm%K%K!!QU +'-?? (' ,7=@@ )( )== &%)
 
 
0 --DD>3J 'w} < <WW$
 
 
 	
rI   )r    r   rv   r   r   AUTO_NUM_REPLICAS_USEDrm   localsitemsrL   warningrz   r5   dictr   from_defaultsetuser_configured_option_namescallable)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r@   r   r   r   r}   r   s    ``  ````             @rG   
deploymentr   @  s#   | 7=((K
 
 	

 #MNNNv3K "44
 4
00 	,33C888$ $#XX^^--$ $ $  qDEEEGM4888=OX > > .
 
 	

 gm##O	
 	
 	

 .-00 /',,..(5%1%=\\11/-&C$?35%3$?   699U5V5V2
 
 
 
 
 
 
 
 
 
 
F )1(@(@O99^$$$iOrI   T)frozenc                   ~    e Zd ZU dZeed<   eZeed<   dZ	e
e         ed<   dZe
eeef                  ed<   dZeed	<   dS )
	RunTargetz;Represents a Serve application to run for `serve.run_many`.targetr   /r   Nr@   Fexternal_scaler_enabled)rc   rd   re   __doc__r0   __annotations__r   r   strr   r	   r@   r   r   r5   r   boolrC   rI   rG   r   r     s{          FE&D#&&&"%L(3-%%%;?NHU4#678???$)T)))))rI   r   Ftargets$wait_for_ingress_deployment_creationwait_for_applications_running_local_testing_modec                    | st          d          t          r|st                              d           d}g }| D ]}t	          |j                  dk    rt          d          t          |j        t                    st          d          t          |j                   |                    t          |j        |j        |j        |j        |rt           nd|st#          j                    j        nd|j                             |rW|j        pt+                      }t          |t*                    st+          di |pi }t-          d	d
|d           d |D             S t/          j        ddid          }t2          j                            d           |                    |||          }|                    |           |S )zRun many applications and return the handles to their ingress deployments.

    This is only used internally with the _blocking not totally blocking the following
    code indefinitely until Ctrl-C'd.
    zNo applications provided.z=Overriding local_testing_mode=True from environment variable.Tr   z)Application name must a non-empty string.zE`serve.run` expects an `Application` returned by `Deployment.bind()`.N)r   r   r@   make_deployment_handledefault_runtime_envr   
local_test-)component_namecomponent_idr@   stream_handler_onlyc                 2    g | ]}|j         |j                 S rC   )deployment_handlesingress_deployment_name)r   bs     rG   r   z_run_many.<locals>.<listcomp>O  s#    TTTA$Q%>?TTTrI   location	EveryNode)r>   rB   v2)r   r   )r   rC   )rv   r   rL   rM   lenr   r2   rz   r   r0   	TypeErrorr%   r   appendr   r@   r   rayget_runtime_contextruntime_envr   r5   r   rD   rE   r   API_VERSIONrm   deploy_applicationswait_for_proxies_serving)	r   r   r   r   
built_appstr@   rP   handless	            rG   	_run_manyr     s4     64555) #" 	YKKWXXX"J 
 
qv;;!#$OPPP!(K00 	W   	an---V^ /&('C'C*%C$;$=$=$I$I()(A  	
 	
 	
 	
   )<]__.-88 	E*DDn.BDDN"') $		
 	
 	
 	
 UTTTTT)$k2"&
 
 
 	!((...,,1U*G - 
 
 	''*G 	( 	
 	
 	
 rI   r   )	_blockingr   r   r@   r   r   r   r   r   c          	      X    t          t          | ||||          g||          d         S )zRun an application and return a handle to its ingress deployment.

    This is only used internally with the _blocking not totally blocking the following
    code indefinitely until Ctrl-C'd.
    )r   r   r   r@   r   )r   r   r   )r   r   )r   r   r   r   r@   r   r   s          rG   _runr   e  sS      )-(?  	
 '0/   	 	rI   blockingc                 L    t          | |||          }|rt                       |S )ar  Run many applications and return the handles to their ingress deployments.

    Args:
        targets:
            A sequence of `RunTarget`,
            each containing information about an application to deploy.
        blocking: Whether this call should be blocking. If True, it
            will loop and log status until Ctrl-C'd.
        wait_for_ingress_deployment_creation: Whether to wait for the ingress
            deployments to be created.
        wait_for_applications_running: Whether to wait for the applications to be
            running. Note that this effectively implies
            `wait_for_ingress_deployment_creation=True`,
            because the ingress deployments must be created
            before the applications can be running.

    Returns:
        List[DeploymentHandle]: A list of handles that can be used
            to call the applications.
    )r   r   r   )r   r&   )r   r   r   r   r   r   s         rG   run_manyr     s?    8 -Q&C/	  G  NrI   c                 P    t          | |||||          }|rt                       |S )a  Run an application and return a handle to its ingress deployment.

    The application is returned by `Deployment.bind()`. Example:

    .. code-block:: python

        handle = serve.run(MyDeployment.bind())
        ray.get(handle.remote())

    Args:
        target:
            A Serve application returned by `Deployment.bind()`.
        blocking: Whether this call should be blocking. If True, it
            will loop and log status until Ctrl-C'd.
        name: Application name. If not provided, this will be the only
            application running on the cluster (it will delete all others).
        route_prefix: Route prefix for HTTP requests. Defaults to '/'.
            If `None` is passed, the application will not be exposed over HTTP
            (this may be useful if you only want the application to be exposed via
            gRPC or a `DeploymentHandle`).
        logging_config: Application logging config. If provided, the config will
            be applied to all deployments which doesn't have logging config.
        external_scaler_enabled: Whether external autoscaling is enabled for
            this application.

    Returns:
        DeploymentHandle: A handle that can be used to call the application.
    )r   r   r   r@   r   r   )r   r&   )r   r   r   r   r@   r   r   handles           rG   runr     sF    L !%/ 7  F  MrI   c                 R    t                      }|                    | g|           dS )z`Delete an application by its name.

    Deletes the app with all corresponding deployments.
    )r   N)r-   delete_apps)r   r   rP   s      rG   deleter     s0      !!F
v	22222rI   beta   func.max_num_models_per_replicac                     | t          |           st          d          t          j        |           st          d          t          j        |           }t          |j                  dk    st          |j                  dk    rt          d          t          t                    st          d          dk    rdk    rt          d	          d
t          ffd}t          |           r ||           n|S )a	  Wrap a callable or method used to load multiplexed models in a replica.

    The function can be standalone function or a method of a class. The
    function must have exactly one argument, the model id of type `str` for the
    model to be loaded.

    It is required to define the function with `async def` and the function must be
    an async function. It is recommended to define coroutines for long running
    IO tasks in the function to avoid blocking the event loop.

    The multiplexed function is called to load a model with the given model ID when
    necessary.

    When the number of models in one replica is larger than max_num_models_per_replica,
    the models will be unloaded using an LRU policy.

    If you want to release resources after the model is loaded, you can define
    a `__del__` method in your model class. The `__del__` method will be called when
    the model is unloaded.

    Example:

    .. code-block:: python

            from ray import serve

            @serve.deployment
            class MultiplexedDeployment:

                def __init__(self):
                    # Define s3 base path to load models.
                    self.s3_base_path = "s3://my_bucket/my_models"

                @serve.multiplexed(max_num_models_per_replica=5)
                async def load_model(self, model_id: str) -> Any:
                    # Load model with the given tag
                    # You can use any model loading library here
                    # and return the loaded model. load_from_s3 is
                    # a placeholder function.
                    return load_from_s3(model_id)

                async def __call__(self, request):
                    # Get the model_id from the request context.
                    model_id = serve.get_multiplexed_model_id()
                    # Load the model for the requested model_id.
                    # If the model is already cached locally,
                    # this will just be a dictionary lookup.
                    model = await self.load_model(model_id)
                    return model(request)


    Args:
        max_num_models_per_replica: the maximum number of models
            to be loaded on each replica. By default, it is 3, which
            means that each replica can cache up to 3 models. You can
            set it to a larger number if you have enough memory on
            the node resource, in opposite, you can set it to a smaller
            number if you want to save memory on the node resource.
    NzCThe `multiplexed` decorator must be used with a function or method.zK@serve.multiplexed can only be used to decorate async functions or methods.r      zp@serve.multiplexed can only be used to decorate functions or methods with at least one 'model_id: str' argument.z.max_num_models_per_replica must be an integer.z,max_num_models_per_replica must be positive.r   c                 @     t                      fd            }|S )Nc                  6  K   d}| s"t          |                    d                    t          |           }|@t          |           dk    r"t          |                    d                    }| d         }n?t          |           dk    r"t          |                    d                    |}| d         }d}t	          ||          s#t          |          }t          |||           nt          ||          }|                    |           d {V S )NzmFunctions decorated with `@serve.multiplexed` must take exactly onethe multiplexed model ID (str), but got {}zno arguments are provided.r   zmore than one arguments.r   r   __serve_multiplex_wrapper)	r   formatr$   r   rq   r4   setattrgetattr
load_model)	r`   args_check_error_msgra   multiplex_objectmodel_idmultiplex_attrmodel_multiplex_wrapperr   r   s	          rG   _multiplex_wrapperzEmultiplexed.<locals>._multiplex_decorator.<locals>._multiplex_wrapperG  sf     = !  (//0LMM   /tT::D
 |t99>>#,334NOO   $( 7 t99>>#,334NOO   $( 78N +^<< T*@$ :+ +' (.:QRRRR*12BN*S*S'0;;HEEEEEEEEErI   r   )r   r   r   s   ` rG   _multiplex_decoratorz)multiplexed.<locals>._multiplex_decoratorF  sA    	t'	F '	F '	F '	F '	F 
'	FR "!rI   )r   r   rr   rs   	signaturer   
parametersrz   intrv   r   )r   r   r   r   s    `  rG   multiplexedr     sD   @ ~~ 	U   *400 	(   %d++	y#$$))S1E-F-F-J-J>  
 0#66 JHIII!R'',F!,K,KGHHH+"8 +" +" +" +" +" +"Z *2$Q%%%=QQrI   c                  V    t           j        j                                        } | j        S )a  Get the multiplexed model ID for the current request.

    This is used with a function decorated with `@serve.multiplexed`
    to retrieve the model ID for the current request.

    .. code-block:: python

            import ray
            from ray import serve
            import requests

            # Set the multiplexed model id with the key
            # "ray_serve_multiplexed_model_id" in the request
            # headers when sending requests to the http proxy.
            requests.get("http://localhost:8000",
                headers={"ray_serve_multiplexed_model_id": "model_1"})

            # This can also be set when using `DeploymentHandle`.
            handle.options(multiplexed_model_id="model_1").remote("blablabla")

            # In your deployment code, you can retrieve the model id from
            # `get_multiplexed_model_id()`.
            @serve.deployment
            def my_deployment_function(request):
                assert serve.get_multiplexed_model_id() == "model_1"
    )r   servecontext_get_serve_request_contextmultiplexed_model_id)_request_contexts    rG   get_multiplexed_model_idr  v  s$    8 y(CCEE00rI   c                      t          d          } | t                      S t          j                            d           t          di |                                 }|                                S )a  Get the status of Serve on the cluster.

    Includes status of all HTTP Proxies, all active applications, and
    their deployments.

    .. code-block:: python

            @serve.deployment(num_replicas=2)
            class MyDeployment:
                pass

            serve.run(MyDeployment.bind())
            status = serve.status()
            assert status.applications["default"].status == "RUNNING"
    F)raise_if_no_controller_runningNrk   rC   )r-   r7   r   SERVE_STATUS_API_USEDrm   r6   get_serve_details_get_status)rP   detailss     rG   statusr    sk    $  uEEEF~}}'..s333"@@V%=%=%?%?@@G   rI   c                    t                      }t          j        |j        j                            |                     }|t          d|  d          t          j        	                    d           |
                    || d          S )aN  Get a handle to the application's ingress deployment by name.

    Args:
        name: Name of application to get a handle to.

    Raises:
        RayServeException: If no Serve controller is running, or if the
            application does not exist.

    .. code-block:: python

            import ray
            from ray import serve

            @serve.deployment
            def f(val: int) -> int:
                return val * 2

            serve.run(f.bind(), name="my_app")
            handle = serve.get_app_handle("my_app")
            assert handle.remote(3).result() == 6
    NzApplication 'z' does not exist.rk   Fcheck_exists)r-   r   get_controllerget_ingress_deployment_nameremoter2   r   SERVE_GET_APP_HANDLE_API_USEDrm   
get_handle)r   rP   r~   s      rG   get_app_handler    s    2  !!Fgf(DKKDQQRRG G G G GHHH/66s;;; Wd???rI   deployment_nameapp_name_check_exists_record_telemetryc                 *   t                      }t                      }||t          d          |j        }|rt          j                            d           |                    | ||          }|!|j        |                    |j	                   |S )a	  Get a handle to a deployment by name.

    This is a developer API and is for advanced Ray users and library developers.

    Args:
        deployment_name: Name of deployment to get a handle to.
        app_name: Application in which deployment resides. If calling
            from inside a Serve application and `app_name` is not
            specified, this will default to the application from which
            this API is called.

    Raises:
        RayServeException: If no Serve controller is running, or if
            calling from outside a Serve application and no application
            name is specified.

    The following example gets the handle to the ingress deployment of
    an application, which is equivalent to using `serve.get_app_handle`.

    .. testcode::

            import ray
            from ray import serve

            @serve.deployment
            def f(val: int) -> int:
                return val * 2

            serve.run(f.bind(), name="my_app")
            handle = serve.get_deployment_handle("f", app_name="my_app")
            assert handle.remote(3).result() == 6

            serve.shutdown()

    The following example demonstrates how you can use this API to get
    the handle to a non-ingress deployment in an application.

    .. testcode::

            import ray
            from ray import serve
            from ray.serve.handle import DeploymentHandle

            @serve.deployment
            class Multiplier:
                def __init__(self, multiple: int):
                    self._multiple = multiple

                def __call__(self, val: int) -> int:
                    return val * self._multiple

            @serve.deployment
            class Adder:
                def __init__(self, handle: DeploymentHandle, increment: int):
                    self._handle = handle
                    self._increment = increment

                async def __call__(self, val: int) -> int:
                    return await self._handle.remote(val) + self._increment


            # The app calculates 2 * x + 3
            serve.run(Adder.bind(Multiplier.bind(2), 3), name="math_app")
            handle = serve.get_app_handle("math_app")
            assert handle.remote(5).result() == 13

            # Get handle to Multiplier only
            handle = serve.get_deployment_handle("Multiplier", app_name="math_app")
            assert handle.remote(5).result() == 10

            serve.shutdown()
    NzcPlease specify an application name when getting a deployment handle outside of a Serve application.rk   r
  )
r-   r.   r2   r  r   $SERVE_GET_DEPLOYMENT_HANDLE_API_USEDrm   r  _handle_registration_callbackdeployment_id)r  r  r  r  rP   rV   r   s          rG   get_deployment_handler    s    `  !!F<>>#+#2  
 08H G:AA#FFF%00  1    F 	!,$BN >>v?STTTMrI   )NNNN)TTF)FTTF)T)Nr   )NTT)lrx   rr   logging	functoolsr   typingr   r   r   r   r	   r
   r   r   attrr   fastapir   r   starlette.typesr   r   r   ray._common.serializationr   ray.serve._private.build_appr   ray.serve._private.configr   r   r   r   ray.serve._private.constantsr   r   r   ray.serve._private.http_utilr   r   %ray.serve._private.local_testing_moder    ray.serve._private.logging_utilsr   ray.serve._private.usager   ray.serve._private.utilsr    r!   r"   r#   r$   r%   r&   ray.serve.configr'   r(   r)   r*   r+   ray.serve.contextr,   r-   r.   r/   ray.serve.deploymentr0   r1   ray.serve.exceptionsr2   ray.serve.handler3   ray.serve.multiplexr4   ray.serve.schemar5   r6   r7   ray.util.annotationsr8   r9   ray.serve._privater:   rD   	getLoggerrL   r   r   rH   rN   rS   rW   r~   r   r   floatr   r   r   r   r   r   r   r   r   r  r  r  r  rC   rI   rG   <module>r6     sB               M M M M M M M M M M M M M M M M M M M M       & & & & & & & & # # # # # # 



       2 2 2 2 2 2 2 2 2 2 2 2                    
        O N N N N N G G G G G G 2 2 2 2 2 2                                          9 8 8 8 8 8 8 8 2 2 2 2 2 2 - - - - - - 6 6 6 6 6 6 M M M M M M M M M M 8 8 8 8 8 8 8 8 2 2 2 2 2 2 
	,	-	- X6:37377;	% %$]23%dK/0% dK/0% $m34	% % % %P X  & W  & $^ $ $ $ $> XGw() Gh G G G GT X)- #M7>}.5m'.}?F}-4]*1-*1-)0(/HO4;M29-,3M-4]@G 	07-BP BPX&BP
#,BP S\BP (5c?34	BP
 %T	*+BP t}BP %T$sEz*:%;<BP &clBP #3<BP #'BP "#,BP !BP  d,=t&C DEBP $+5>BP ")BP  #5>!BP" $EN#BP$ E$t";<=%BP& #d'-.'BP, ")-BP. xj*$%/BP BP BP BPJ 
$* * * * * * *  *  26*. %	O Oi O*.O $(O 	O
 

O O O Od X &"%;? %$)	 	 		 	 		
 3-	 U4#678	 	 "	 	 	 	 	<  15*. %% %i %% +/% $(	%
 % 

% % % %P X &"%;? %$)1 111 1 3-	1
 U4#6781 1 "1 1 1 1 1h X3 3 3 3 3 3 3 VQRFR FR
8CH%
&FRKNFR FR FR FRR V1# 1 1 1 1> W! ! ! ! !6 W @  @!1  @  @  @  @F  #"	i iismi i 	i
 i i i i i irI   