加入一个提前停止的机制 还有减少环境步中间步骤传递至cpu
This commit is contained in:
@@ -984,17 +984,32 @@ class World:
|
||||
)
|
||||
|
||||
# run normal evaluation for eval_budget and record video
|
||||
active_mask = np.ones(self.num_envs, dtype=bool)
|
||||
last_eval_step = 0
|
||||
for i in range(eval_budget):
|
||||
video_frames[:, i] = self.infos['pixels'][:, -1]
|
||||
last_eval_step = i
|
||||
self.infos.update(goal_step)
|
||||
self.step()
|
||||
actions = self.policy.get_action(self.infos, active_mask=active_mask)
|
||||
(
|
||||
self.states,
|
||||
self.rewards,
|
||||
self.terminateds,
|
||||
self.truncateds,
|
||||
self.infos,
|
||||
) = self.envs.step(actions)
|
||||
results['episode_successes'] = np.logical_or(
|
||||
results['episode_successes'], self.terminateds
|
||||
)
|
||||
active_mask = np.logical_not(results['episode_successes'])
|
||||
if not np.any(active_mask):
|
||||
break
|
||||
# for auto-reset
|
||||
self.envs.unwrapped._autoreset_envs = np.zeros((self.num_envs,))
|
||||
|
||||
video_frames[:, -1] = self.infos['pixels'][:, -1]
|
||||
video_frames[:, last_eval_step] = self.infos['pixels'][:, -1]
|
||||
if last_eval_step + 1 < eval_budget:
|
||||
video_frames[:, last_eval_step + 1 :] = video_frames[:, last_eval_step : last_eval_step + 1]
|
||||
|
||||
n_episodes = len(episodes_idx)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user