
    &`i+                     
   d dl Z d dlmZmZmZmZmZmZmZm	Z	 d dl
Zd dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
l m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z' d dl(m)Z) erd dlm*Z*  e j+        e,          Z- e            \  Z.Z/Z0e	 	 	 d-de1ded         de$dej2        dej2        de3deeg df                  dee3         fd            Z4ede1dee$ee1ef         ee1e%f         f         fd            Z5e	 	 	 	 	 	 d.ddde1de1de&dee6         d ee7         d!ee7         d"ee         d#e7d$ee3         de&fd%            Z8eddd&e	eee1e&f         f         fd'            Z9 ed(d)*          d/d+            Z:d, Z;eZ<dS )0    N)TYPE_CHECKINGCallableDictListOptionalTupleTypeUnion)
Deprecated)validate_module_id)ATARI_OBS_SHAPE)
PolicySpec)SampleBatch)OldAPIStack)try_import_tf)ActionConnectorDataTypeAgentConnectorDataTypeAgentConnectorsOutputPartialAlgorithmConfigDictPolicyStateTensorStructType
TensorType)log_once)Policy	policy_idpolicy_classr   merged_configobservation_spaceaction_spaceworker_indexsession_creatorztf1.Sessionseedc           	         ddl m} t          ||          r|                                }| |d<   |                    dd          }	|	dv rx| |rd| ndz   }
|	dk    r$t
                                                                          5  |r |            }n0t
                              t          j	        di |d	         
          }|                                5  |t
          
                    |           t
                              |
          5   ||||          cddd           cddd           cddd           S # 1 swxY w Y   	 ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   dS dS t
                              |
          5   ||||          cddd           S # 1 swxY w Y   dS  ||||          S )a  Framework-specific policy creation logics.

    Args:
        policy_id: Policy ID.
        policy_class: Policy class type.
        merged_config: Complete policy config.
        observation_space: Observation space of env.
        action_space: Action space of env.
        worker_index: Index of worker holding this policy. Default is 0.
        session_creator: An optional tf1.Session creation callable.
        seed: Optional random seed.
    r   )AlgorithmConfig__policy_id	frameworktf)tf2r'   _wk tf_session_args)configN )%ray.rllib.algorithms.algorithm_configr$   
isinstanceto_dictgettf1Graph
as_defaultSessionConfigProtoset_random_seedvariable_scope)r   r   r   r   r   r    r!   r"   r$   r&   	var_scopesesss               j/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/utils/policy.pycreate_policy_for_frameworkr<   ,   sf   . FEEEEE-11 0%--// $-M- !!+t44IM!!!M!5|!5!5!52N	 ''))   # *?,,DD ;;"RR?P1QRR '  D __&&  '++D111++I66  +|-|]         	                      	                                , ##I.. T T#|$5|]SST T T T T T T T T T T T T T T T T T |-|]KKKss   	AE37EEE+E3EEEEE3E 	 E3#E 	$E33E7:E7F66F:=F:pathreturnc                 L   t          | d          5 }t          j        |          }ddd           n# 1 swxY w Y   t          j        |d                   }|d         }|                    d|d                   }|d         }d |                                D             }|||fS )	zRead and parse policy specifications from a checkpoint file.

    Args:
        path: Path to a policy checkpoint.

    Returns:
        A tuple of: base policy config, dictionary of policy specs, and
        dictionary of policy states.
    rbNworkerpolicy_configpolicy_statesstatepolicy_specsc                 >    i | ]\  }}|t          j        |          S r-   )r   deserialize).0idspecs      r;   
<dictcomp>z6parse_policy_specs_from_checkpoint.<locals>.<dictcomp>   s6       -5RJ"4((      )openpickleloadloadsr1   items)r=   fcheckpoint_dictwrB   rC   serialized_policy_specsrE   s           r;   "parse_policy_specs_from_checkpointrV   o   s     
dD		 )Q +a..) ) ) ) ) ) ) ) ) ) ) ) ) ) ) 	_X.//Ao&MEE/1W:66M/ 9P9V9V9X9X  L ,55s   266policyenv_idagent_idobsreward
terminated	truncatedinfoexploretimestepc
                    | j         s
J d            t          |           | j                                          | j                                         t          j        |i}
|||
t          j        <   |||
t          j        <   |||
t          j        <   |||
t          j	        <   t          |||
          g}|                      |          }g }|D ]}|                     |j        j        ||	          }t          j        d |          }t!          |||j        j        |          }| j        r|                     |          }|j        }n|d         }|                    |           | j                             |           |S )a  Run a connector enabled policy using environment observation.

    policy_inference manages policy and agent/action connectors,
    so the user does not have to care about RNN state buffering or
    extra fetch dictionaries.
    Note that connectors are intentionally run separately from
    compute_actions_from_input_dict(), so we can have the option
    of running per-user connectors on the client side in a
    server-client deployment.

    Args:
        policy: Policy object used in inference.
        env_id: Environment ID. RLlib builds environments' trajectories internally with
            connectors based on this, i.e. one trajectory per (env_id, agent_id) tuple.
        agent_id: Agent ID. RLlib builds agents' trajectories internally with connectors
            based on this, i.e. one trajectory per (env_id, agent_id) tuple.
        obs: Environment observation to base the action on.
        reward: Reward that is potentially used during inference. If not required,
            may be left empty. Some policies have ViewRequirements that require this.
            This can be set to zero at the first inference step - for example after
            calling gmy.Env.reset.
        terminated: `Terminated` flag that is potentially used during inference. If not
            required, may be left None. Some policies have ViewRequirements that
            require this extra information.
        truncated: `Truncated` flag that is potentially used during inference. If not
            required, may be left None. Some policies have ViewRequirements that
            require this extra information.
        info: Info that is potentially used durin inference. If not required,
            may be left empty. Some policies have ViewRequirements that require this.
        explore: Whether to pick an exploitation or exploration action
            (default: None -> use self.config["explore"]).
        timestep: The current (sampling) time step.

    Returns:
        List of outputs from policy forward pass.
    z<policy_inference only works with connector enabled policies.N)r_   r`   c                     | d         S )Nr   r-   )xs    r;   <lambda>z(local_policy_inference.<locals>.<lambda>   s
    QqT rL   r   )agent_connectors__check_atari_obs_spacein_evalaction_connectorsr   NEXT_OBSREWARDSTERMINATEDS
TRUNCATEDSINFOSr   compute_actions_from_input_dictdatasample_batchtreemap_structurer   raw_dictoutputappendon_policy_output)rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   
input_dictacd_list
ac_outputsoutputsacpolicy_outputaction_connector_dataacdactionss                      r;   local_policy_inferencer      s   d 	F FEF F C    ##%%%
$$&&& &,J*0
;&'.8
;*+-6
;)*(,
;$% 	vx<<.H /5.E.Eh.O.OJG H H>>G  ? 
 
 *>>=II 7Hbg.!
 !
 # 	'**+@AACjGG#A&Gw 	001FGGGGNrL   batchc                    d}                                 D ]}|                    d          r|dz  }d t          |          D             }|                     t          j                 t          j                 fd|D                                 t          j                                      t          j	                  | j
                            dd                    }|S )	a  Returns log likelihood for actions in given batch for policy.

    Computes likelihoods by passing the observations through the current
    policy's `compute_log_likelihoods()` method

    Args:
        batch: The SampleBatch or MultiAgentBatch to calculate action
            log likelihoods from. This batch/batches must contain OBS
            and ACTIONS keys.

    Returns:
        The probabilities of the actions in the batch, given the
        observations and the policy.
    r   	state_in_   c                 8    g | ]}d                      |          S )zstate_in_{})format)rH   is     r;   
<listcomp>z;compute_log_likelihoods_from_input_dict.<locals>.<listcomp>  s&    KKKa-&&q))KKKrL   c                      g | ]
}|         S r-   r-   )rH   kr   s     r;   r   z;compute_log_likelihoods_from_input_dict.<locals>.<listcomp>  s    444AuQx444rL   actions_in_input_normalizedF)r   	obs_batchstate_batchesprev_action_batchprev_reward_batchactions_normalized)keys
startswithrangecompute_log_likelihoodsr   ACTIONSOBSr1   PREV_ACTIONSPREV_REWARDSr,   )rW   r   num_state_inputsr   
state_keyslog_likelihoodss    `    r;   'compute_log_likelihoods_from_input_dictr      s    $ ZZ\\ " "<<$$ 	"!KK59I3J3JKKKJ"("@"@k)*(4444444))K$<==))K$<==!=,,-JERR #A # #O rL   z8Policy.from_checkpoint([checkpoint path], [policy IDs]?)T)newerrorc                     d S Nr-   )r=   
policy_idss     r;   load_policies_from_checkpointr     s    DrL   c                     t          d t          j        |           D                       r+t          d          rt                              d           d S d S d S )Nc              3   n   K   | ]0}t          |t          j                  r|j        t          k    nd V  1dS )FN)r/   npndarrayshaper   )rH   os     r;   	<genexpr>z*__check_atari_obs_space.<locals>.<genexpr>  sS         '1BJ&?&?J?""U     rL   )warn_about_possibly_non_wrapped_atari_enva(  The observation you fed into local_policy_inference() has dimensions (210, 160, 3), which is the standard for atari environments. If RLlib raises an error including a related dimensionality mismatch, you may need to use ray.rllib.env.wrappers.atari_wrappers.wrap_deepmind to wrap you environment.)anyrq   flattenr   loggerwarning)rZ   s    r;   rf   rf     s       c""      ?@@ 	NN#     	 	rL   )r   NN)NNNNNNr   )=loggingtypingr   r   r   r   r   r   r	   r
   	gymnasiumgymnumpyr   rq   ray.cloudpicklecloudpicklerN   ray._common.deprecationr   ray.rllib.core.rl_moduler   ray.rllib.models.preprocessorsr   ray.rllib.policy.policyr   ray.rllib.policy.sample_batchr   ray.rllib.utils.annotationsr   ray.rllib.utils.frameworkr   ray.rllib.utils.typingr   r   r   r   r   r   r   ray.utilr   r   	getLogger__name__r   r2   r'   tfvstrSpaceintr<   rV   floatboolr   r   r   rf   validate_policy_idr-   rL   r;   <module>r      s#   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	                      . . . . . . 7 7 7 7 7 7 : : : : : : . . . . . . 5 5 5 5 5 5 3 3 3 3 3 3 3 3 3 3 3 3                        /......		8	$	$}R  =A?L ?L?Lx.?L .?L y	?L
 )?L ?L hr='89:?L 3-?L ?L ?L ?LD 6
6
%tCO'<d3CS>TTU6 6 6 68  #!% $"e eee e 
	e
 UOe e ~e 4.e e sme e e e eP ";S:J5J0K#KL   B JRVWWW	 	 	 XW	  & (   rL   