
    &`i                         d Z ddlZddlZddlZddlmZmZ ddlm	Z	 ddl
mZ  ej        e          Ze	dej        fd            Ze	dd	ed
edefd            Ze	d             ZdS )z7Utils for minibatch SGD across multiple RLlib policies.    N)MultiAgentBatchSampleBatch)OldAPIStack)LearnerInfoBuilderarrayc                 v    | |                                  z
  t          d|                                           z  S )zNormalize the values in an array.

    Args:
        array (np.ndarray): Array of values to normalize.

    Returns:
        array with zero mean and unit standard deviation.
    g-C6?)meanmaxstd)r   s    g/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/utils/sgd.pystandardizedr      s.     EJJLL Ceiikk$:$:::    Tsamplessgd_minibatch_sizeshufflec              #     K   |s| V  dS t          | t                    rt          d          d| vrd| vr|                                  |                     |          }|\  }}t          |          dk    r,|rt          j        |           |D ]\  }}| ||         V  dS t          t          ||                    }|rt          j        |           |D ]%\  \  }}\  }}	| 	                    ||||	          V  &dS )a  Return a generator yielding minibatches from a sample batch.

    Args:
        samples: SampleBatch to split up.
        sgd_minibatch_size: Size of minibatches to return.
        shuffle: Whether to shuffle the order of the generated minibatches.
            Note that in case of a non-recurrent policy, the incoming batch
            is globally shuffled first regardless of this setting, before
            the minibatches are generated from it!

    Yields:
        SampleBatch: Each of size `sgd_minibatch_size`.
    Nz;Minibatching not implemented for multi-agent in simple mode
state_in_0state_out_0r   )

isinstancer   NotImplementedErrorr   _get_slice_indiceslenrandomlistzipslice)
r   r   r   
all_slicesdata_slicesstate_slicesijsisjs
             r   minibatchesr$      sa       '?++ 
!I
 
 	
 7""}G'C'C++,>??J *K
<A 	(N;''' 	 	DAq!A#,	 	 #k<8899
 	'N:&&& * 	. 	.FQHR--1b"------	. 	.r   c           	         |                                  } t          d          }|                                D ]\  }}|| j        vr| j        |         }	|D ]}
t	          |	|
                   |	|
<   |                                rK|j        d         d         |k    r4t          d                    ||j        d         d                             t          |          D ][}t          |	|          D ]H}|                    t          ||i|j                            |         }|                    ||           I\|                                }|S )a	  Execute minibatch SGD.

    Args:
        samples: Batch of samples to optimize.
        policies: Dictionary of policies to optimize.
        local_worker: Master rollout worker instance.
        num_sgd_iter: Number of epochs of optimization to take.
        sgd_minibatch_size: Size of minibatches to use for optimization.
        standardize_fields: List of sample field names that should be
            normalized prior to optimization.

    Returns:
        averaged info fetches over the last SGD epoch taken.
       )num_devicesmodelmax_seq_lenzC`sgd_minibatch_size` ({}) cannot be smaller than`max_seq_len` ({}).)as_multi_agentr   itemspolicy_batchesr   is_recurrentconfig
ValueErrorformatranger$   learn_on_batchr   countadd_learn_on_batch_resultsfinalize)r   policieslocal_workernum_sgd_iterr   standardize_fieldslearner_info_builder	policy_idpolicybatchfieldr    	minibatchresultslearner_infos                  r   do_minibatch_sgdrB   H   s   2 $$&&G .!<<<%^^-- T T	6G222&y1' 	6 	6E'e55E%LL !!		g&}58JJJ&&,f&g(>}(M' '   |$$ 	T 	TA(0BCC T T	 //'I(>	PP  	
 %??SSSST	T (0022Lr   )T)__doc__loggingr   numpynpray.rllib.policy.sample_batchr   r   ray.rllib.utils.annotationsr   $ray.rllib.utils.metrics.learner_infor   	getLogger__name__loggerndarrayr   intboolr$   rB    r   r   <module>rQ      s   = =       F F F F F F F F 3 3 3 3 3 3 C C C C C C		8	$	$ 	;
 	; 	; 	; 	; (. (. (.# (. (. (. (. (.V @ @ @ @ @r   