
    &`iH.              	          d dl mZmZmZmZmZmZmZmZm	Z	m
Z
mZmZ d dlZd dlmZ erVd dlmZ d dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dl m!Z! d d	l"m#Z# d d
l$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+m,Z, d dl-m.Z. ed         Z/	 ee/e0e1f         Z2	 ee	e3df         ee3         f         Z4ed         Z5	 ee6ddf         Z7	 ed         Z8	 ee6ef         Z9	 e0Z:	 e0Z;	 e0Z<	 ee	e3ee3e	e3e3f         f         ee3e	e3e3f         f         f                  Z=	 eee6eee>e6f         f         e>e6f         Z?	 e0Z@	 ee3e6f         ZA	 eeejB        f         ZC	 edgeeC         f         ZD	 eZE	 e6ZF	 e6ZG	 eeFdf         ZH	 ed         ZI	 eeFed         geJf         ZK	 eeEeIgeGf         ZL	 eeeG         eeGed         geJf         f         ZM	 ee6e2f         ZN	 e
ed                  ZO	 ee3e6f         ZP	 e3ZQ	 eeEef         ZR	 eeAeRf         ZS	 eZT	 eZU	 e0ZV	 eZW	 ee6df         ZX	 eZY	 ed         ZZ	 eZZ[	 ed         Z\	 eZ]	 ee]e\f         Z^	 ee\         Z_	 ee6e\f         Z`	 eeaeeee3eaf                           ee	e3ee3eaf         f                  f         Zb	 e0Zc	 e0Zd	 eee	e/e/f                  ee/         f         Ze	 e0Zf	 ee6e/f         Zg	 eddee6ef         f         Zh	 eeji        jj        ee6eji        jj        f         e	eji        jj        df         f         Zk	 eee                  Zl	 e	e2elef         Zm	 e G d d                      Zne G d  d!                      Zoe G d" d#                      Zp ed$          ZqdS )%    )TYPE_CHECKINGAnyCallableDictHashableListOptionalSequenceTupleTypeTypeVarUnionN)OldAPIStack)NDArray)MultiRLModuleSpec)RLModuleSpec)
EnvContext)MultiAgentEpisode)SingleAgentEpisode)DynamicTFPolicyV2)EagerTFPolicyV2)
PolicySpec)MultiAgentBatchSampleBatch)ViewRequirement)zNDArray[Any]zjnp.ndarrayz	tf.Tensortorch.Tensor.)ztorch.nn.Modulezkeras.Modelztorch.deviceint)r   r   r   r   )r   r   r   )r   r   r   )ztorch.optim.Optimizerzkeras.optimizers.Optimizer)r   ztf.Variabler   c                   &    e Zd ZdZdededefdZdS )AgentConnectorDataTypea  Data type that is fed into and yielded from agent connectors.

    Args:
        env_id: ID of the environment.
        agent_id: ID to help identify the agent from which the data is received.
        data: A payload (``data``). With RLlib's default sampler, the payload
            is a dictionary of arbitrary data columns (obs, rewards, terminateds,
            truncateds, etc).
    env_idagent_iddatac                 0    || _         || _        || _        d S N)r    r!   r"   )selfr    r!   r"   s       j/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/utils/typing.py__init__zAgentConnectorDataType.__init__5  s     			    N)__name__
__module____qualname____doc__strr   r'    r(   r&   r   r   )  sG         s c       r(   r   c                   *    e Zd ZdZdedededefdZdS )ActionConnectorDataTypea?  Data type that is fed into and yielded from agent connectors.

    Args:
        env_id: ID of the environment.
        agent_id: ID to help identify the agent from which the data is received.
        input_dict: Input data that was passed into the policy.
            Sometimes output must be adapted based on the input, for example
            action masking. So the entire input data structure is provided here.
        output: An object of PolicyOutputType. It is is composed of the
            action output, the internal state output, and additional data fetches.

    r    r!   
input_dictoutputc                 >    || _         || _        || _        || _        d S r$   )r    r!   r1   r2   )r%   r    r!   r1   r2   s        r&   r'   z ActionConnectorDataType.__init__N  s$      $r(   N)r)   r*   r+   r,   r-   TensorStructTypePolicyOutputTyper'   r.   r(   r&   r0   r0   ?  sX         

 
 %	

 !
 
 
 
 
 
r(   r0   c                   2    e Zd ZdZdeeef         ddfdZdS )AgentConnectorsOutputa=  Final output data type of agent connectors.

    Args are populated depending on the AgentConnector settings.
    The branching happens in ViewRequirementAgentConnector.

    Args:
        raw_dict: The raw input dictionary that sampler can use to
            build episodes and training batches.
            This raw dict also gets passed into ActionConnectors in case
            it contains data useful for action adaptation (e.g. action masks).
        sample_batch: The SampleBatch that can be immediately used for
            querying the policy for next action.
    raw_dictsample_batchr   c                 "    || _         || _        d S r$   )r8   r9   )r%   r8   r9   s      r&   r'   zAgentConnectorsOutput.__init__o  s     !(r(   N)r)   r*   r+   r,   r   r-   r4   r'   r.   r(   r&   r7   r7   _  sM         )S"223)CP) ) ) ) ) )r(   r7   T)rtypingr   r   r   r   r   r   r	   r
   r   r   r   r   	gymnasiumgymray.rllib.utils.annotationsr   	jax.numpynumpyjnpkeras
tensorflowtftorchnumpy.typingr   (ray.rllib.core.rl_module.multi_rl_moduler   "ray.rllib.core.rl_module.rl_moduler   ray.rllib.env.env_contextr   !ray.rllib.env.multi_agent_episoder   "ray.rllib.env.single_agent_episoder   %ray.rllib.policy.dynamic_tf_policy_v2r   #ray.rllib.policy.eager_tf_policy_v2r   ray.rllib.policy.policyr   ray.rllib.policy.sample_batchr   r   !ray.rllib.policy.view_requirementr   
TensorTypedicttupler4   r   TensorShapeNetworkTyper-   
DeviceTypeRLModuleSpecType	StateDictAlgorithmConfigDictPartialAlgorithmConfigDictModelConfigDictConvFilterSpectypeFromConfigSpecEnvConfigDictEnvIDEnvEnvType
EnvCreatorAgentIDPolicyIDModuleIDMultiAgentPolicyConfigDictEpisodeTypeboolIsPolicyToTrainAgentToModuleMappingFnShouldModuleBeUpdatedFnPolicyStateTFPolicyV2Type	EpisodeIDUnrollIDMultiAgentDictMultiEnvDict
EnvObsTypeEnvActionTypeEnvInfoDictFileTypeViewRequirementsDict
ResultDictLocalOptimizer	OptimizerParamParamRef	ParamDict	ParamListNamedParamDictfloatLearningRateOrScheduleGradInfoDictLearnerStatsDictModelGradientsModelWeightsModelInputDictSampleBatchTypespacesSpaceSpaceStructStateBatchesr5   r   r0   r7   r;   r.   r(   r&   <module>r      s                                  3 3 3 3 3 3 BLLLLLL$$$$$$JJJJJJ??????444444CCCCCCEEEEEEGGGGGGCCCCCC222222JJJJJJJJAAAAAA MN

 T501  N E#s(OT#Y./45 3-.

 <=  5cN	 M  " 
 
 	#uS%S/)*E#uS#X2F,G
GH tCsD#~!667sBC
 
 	c3h SW

 |nhw&778
  D B D!(L"89  @=> C Hh/@&ABDHI "!7K"8("BC  5Xh!234d:;= 
 3(()
 eBCD 8#s(O	 7 Ogsl# 9E>)*
 
 B 9
  +C!223 
 
 LM +	 %+, 8 25!	 >K	 c5j! 9	eCJ	 !sE#u*%%	&') 

    tE*j"89:D<LLM
  Hc:o& 2'8$sCx.HI  Jd3
 00159I39N3OO
 DI )<=>  5
        *        > ) ) ) ) ) ) ) )4 GCLLr(   