加入一个提前停止的机制 还有减少环境步中间步骤传递至cpu

This commit is contained in:
qihuanye
2026-05-18 00:48:59 +08:00
parent 113e591899
commit 28f2fba0e8
5 changed files with 138 additions and 16 deletions

View File

@@ -984,17 +984,32 @@ class World:
)
# run normal evaluation for eval_budget and record video
active_mask = np.ones(self.num_envs, dtype=bool)
last_eval_step = 0
for i in range(eval_budget):
video_frames[:, i] = self.infos['pixels'][:, -1]
last_eval_step = i
self.infos.update(goal_step)
self.step()
actions = self.policy.get_action(self.infos, active_mask=active_mask)
(
self.states,
self.rewards,
self.terminateds,
self.truncateds,
self.infos,
) = self.envs.step(actions)
results['episode_successes'] = np.logical_or(
results['episode_successes'], self.terminateds
)
active_mask = np.logical_not(results['episode_successes'])
if not np.any(active_mask):
break
# for auto-reset
self.envs.unwrapped._autoreset_envs = np.zeros((self.num_envs,))
video_frames[:, -1] = self.infos['pixels'][:, -1]
video_frames[:, last_eval_step] = self.infos['pixels'][:, -1]
if last_eval_step + 1 < eval_budget:
video_frames[:, last_eval_step + 1 :] = video_frames[:, last_eval_step : last_eval_step + 1]
n_episodes = len(episodes_idx)