
    &`i                     @    d dl Zd dlmZ ededefd            Zd ZdS )    N)DeveloperAPIgammalambda_c                    | d|z
  z  x}}t          j        |d          }||d|z
  z  |dd         z  z   }d|z
  }	g }
|d         }t          t          |j        d                             D ]A}||         |	|         |z  |z  |z  z   }|
                    |           ||         r||         }Bt          j        t          t          |
                    d          }|                    t           j                  S )zComputes value function (vf) targets given vf predictions and rewards.

    Note that advantages can then easily be computed via the formula:
    advantages = targets - vf_predictions
    g      ?g           Nr   )axis)	npappendreversedrangeshapestacklistastypefloat32)valuesrewardsterminateds
truncatedsr   r   orig_valuesflat_valuesintermediates	continuesRslasttvalue_targetss                 /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/utils/postprocessing/value_predictions.pycompute_value_targetsr       s	    !'#*; <<K+)K--Keq7{3k!""oEEMk!I	Br?DeM/23344 " "Q)A,"6"@4"GG
		$a= 	"q>D HT(2,,//a888M
+++    c                 D   g }t          |          |z  dk    r#t          dt          |           d| d          |dd         }d}|t          |          dz
  k     r|dz  }||         }||k     r;|                    | |                    | |d         } ||xx         |z  cc<   |dz  }nL||k    r)|                    | |                    | |dz   d         } n| dd         } ||dz   xx         |z  cc<   |t          |          dz
  k     t	          j        |          S )a  Returns a bootstrapped value batch given value predictions.

    Note that the incoming value predictions must have happened over (artificially)
    elongated episodes (by 1 timestep at the end). This way, we can either extract the
    `vf_preds` at these extra timesteps (as "bootstrap values") or skip over them
    entirely if they lie in the middle of the T-slices.

    For example, given an episodes structure like this:
    01234a 0123456b 01c 012- 0123e 012-
    where each episode is separated by a space and goes from 0 to n and ends in an
    artificially elongated timestep (denoted by 'a', 'b', 'c', '-', or 'e'), where '-'
    means that the episode was terminated and the bootstrap value at the end should be
    zero and 'a', 'b', 'c', etc.. represent truncated episode ends with computed vf
    estimates.
    The output for the above sequence (and T=4) should then be:
    4 3 b 2 3 -

    Args:
        vf_preds: The computed value function predictions over the artificially
            elongated episodes (by one timestep at the end).
        episode_lengths: The original (correct) episode lengths, NOT counting the
            artificially added timestep at the end.
        T: The size of the time dimension by which to slice the data. Note that the
            sum of all episode lengths (`sum(episode_lengths)`) must be dividable by T.

    Returns:
        The batch of bootstrapped values.
    r   zDCan only extract bootstrapped values if the sum of episode lengths (z) is dividable by the given T (z)!Nr   r   )sum
ValueErrorlenr   r
   array)vf_predsepisode_lengthsTbootstrapped_valuesieps_lens         r   extract_bootstrapped_valuesr-   )   s   : 
?a1$$KO$$K KEFK K K
 
 	
 &aaa(O
A
c/""Q&
&
&	Q!!$ w;;&&x{333|HA!#FAA '\\&&x{333A(HH
  |HAE"""g-"""3 c/""Q&
&
&6 8'(((r!   )numpyr
   ray.util.annotationsr   floatr    r-    r!   r   <module>r2      sv        - - - - - - ,
 , , , , ,DC) C) C) C) C)r!   