Skip to content
Snippets Groups Projects
Commit 93eb47d5 authored by Christof Kaufmann's avatar Christof Kaufmann
Browse files

Update gymnasium code to v1.0.0

parent 91c1fa0f
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id:cb5980cf-bb11-477c-975d-c555dbd61e22 tags: %% Cell type:markdown id:cb5980cf-bb11-477c-975d-c555dbd61e22 tags:
# Graphic Output of Gymnasium Environments # Graphic Output of Gymnasium Environments
Although values are probably better to monitor, when you got some experience, in the beginning, for debugging and for presentations you like to watch your agent playing games. Depending on the IDE there are partially different ways to do it. Although values are probably better to monitor, when you got some experience, in the beginning, for debugging and for presentations you like to watch your agent playing games. Depending on the IDE there are partially different ways to do it.
## Recording Videos ## Recording Videos
The best solution is probably to make periodic videos. In this way, you can look at the capabilities at different learning states and you can rewatch it later on. In JupyterLab right-click on a video and select *Open in New Browser Tab*. The best solution is probably to make periodic videos. In this way, you can look at the capabilities at different learning states and you can rewatch it later on. In JupyterLab right-click on a video and select *Open in New Browser Tab*.
%% Cell type:code id:b425cecc-4212-434f-b57c-a9292c5a08d1 tags: %% Cell type:code id:b425cecc-4212-434f-b57c-a9292c5a08d1 tags:
``` python ``` python
import gymnasium as gym import gymnasium as gym
import ale_py # noqa # pylint: disable=unused-import
# env = gym.make("Pong-v4", render_mode="rgb_array") # env = gym.make("ALE/Pong-v5", render_mode="rgb_array")
# env.metadata['render_fps'] = 30 # env.metadata['render_fps'] = 30
env = gym.make("LunarLander-v2", render_mode="rgb_array") env = gym.make("LunarLander-v3", render_mode="rgb_array")
env = gym.wrappers.RecordVideo(env, './video/', disable_logger=True, env = gym.wrappers.RecordVideo(env, './video/', disable_logger=True,
episode_trigger=lambda episode_id: episode_id % 5 == 0) # every 5th episode episode_trigger=lambda episode_id: episode_id % 5 == 0) # every 5th episode
observation, info = env.reset() observation, info = env.reset()
for _ in range(1000): for _ in range(1000):
action = env.action_space.sample() action = env.action_space.sample()
observation, reward, terminated, truncated, info = env.step(action) observation, reward, terminated, truncated, info = env.step(action)
if terminated or truncated: if terminated or truncated:
observation, info = env.reset() observation, info = env.reset()
env.close() env.close()
``` ```
%% Cell type:markdown id:f95a4861 tags: %% Cell type:markdown id:f95a4861 tags:
## Making GIFs ## Making GIFs
GIFs are fun and easy to make and look at. Here is a way to make them after every episode. GIFs are fun and easy to make and look at. Here is a way to make them after every episode.
%% Cell type:code id:4e2994b5 tags: %% Cell type:code id:4e2994b5 tags:
``` python ``` python
import gymnasium as gym import gymnasium as gym
import ale_py # noqa # pylint: disable=unused-import
import PIL.Image import PIL.Image
import os import os
# env = gym.make("Pong-v4", render_mode="rgb_array") # env = gym.make("ALE/Pong-v5", render_mode="rgb_array_list")
# env.metadata['render_fps'] = 30 # env.metadata['render_fps'] = 30
env = gym.make("CartPole-v1", render_mode="rgb_array_list") env = gym.make("CartPole-v1", render_mode="rgb_array_list")
def save_gif(frames, filename='gym.gif'): def save_gif(frames, filename='gym.gif'):
directory = os.path.dirname(filename) directory = os.path.dirname(filename)
if not os.path.exists(directory): os.makedirs(directory, exist_ok=True)
os.makedirs(directory)
images = [PIL.Image.fromarray(frame) for frame in frames] images = [PIL.Image.fromarray(frame) for frame in frames]
images[0].save(filename, format='GIF', append_images=images[1:], images[0].save(filename, format='GIF', append_images=images[1:],
save_all=True, duration=30, loop=0) save_all=True, duration=30, loop=0)
observation, info = env.reset() observation, info = env.reset()
gif_count = 0 gif_count = 0
for _ in range(1000): for _ in range(1000):
action = env.action_space.sample() action = env.action_space.sample()
observation, reward, terminated, truncated, info = env.step(action) observation, reward, terminated, truncated, info = env.step(action)
if terminated or truncated: if terminated or truncated:
# save gif for every episode # save gif for every episode
save_gif(env.render(), f'gifs/gym-{gif_count:04}.gif') save_gif(env.render(), f'gifs/gym-{gif_count:04}.gif')
gif_count += 1 gif_count += 1
observation, info = env.reset() observation, info = env.reset()
env.close() env.close()
``` ```
%% Cell type:markdown id:9f7a4b36-5204-4783-9b6e-55eb3568b591 tags: %% Cell type:markdown id:9f7a4b36-5204-4783-9b6e-55eb3568b591 tags:
## Live View using Matplotlib ## Live View using Matplotlib
You can also watch the game live during training, maybe every few episodes or so. For that, choose `render_mode="rgb_array"` and make a first `imshow` plot. *Very important: The cell has to end after making the plot, because the plot will only be visible when the cell finishes.* In the second cell the plot gets updated. You can also watch the game live during training, maybe every few episodes or so. For that, choose `render_mode="rgb_array"` and make a first `imshow` plot. *Very important: The cell has to end after making the plot, because the plot will only be visible when the cell finishes.* In the second cell the plot gets updated.
%% Cell type:code id:17be9d89-1246-4cd7-9258-49cff9ee6da9 tags: %% Cell type:code id:17be9d89-1246-4cd7-9258-49cff9ee6da9 tags:
``` python ``` python
import gymnasium as gym import gymnasium as gym
import ale_py # noqa # pylint: disable=unused-import
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
# you maybe have to restart your kernel, if you changed the matplotlib backend before # you maybe have to restart your kernel, if you changed the matplotlib backend before
%matplotlib widget %matplotlib widget
env = gym.make("LunarLander-v2", render_mode="rgb_array") env = gym.make("LunarLander-v3", render_mode="rgb_array")
observation, info = env.reset() # is required once before calling env.render() observation, info = env.reset() # is required once before calling env.render()
img = plt.imshow(env.render()) # only call this once to bring up the plot img = plt.imshow(env.render()) # only call this once to bring up the plot
plt.show() plt.show()
# IMPORTANT: Split your code at this point into two cells, so the plot can be drawn # IMPORTANT: Split your code at this point into two cells, so the plot can be drawn
``` ```
%% Cell type:code id:fe77c845-f5b0-4462-a4e6-c2b5c7fd71cf tags: %% Cell type:code id:fe77c845-f5b0-4462-a4e6-c2b5c7fd71cf tags:
``` python ``` python
observation, info = env.reset() observation, info = env.reset()
for _ in range(150): for _ in range(150):
action = env.action_space.sample() action = env.action_space.sample()
observation, reward, terminated, truncated, info = env.step(action) observation, reward, terminated, truncated, info = env.step(action)
if terminated or truncated: if terminated or truncated:
observation, info = env.reset() observation, info = env.reset()
# update plot # update plot
img.set_data(env.render()) img.set_data(env.render())
plt.gcf().canvas.draw() plt.gcf().canvas.draw()
env.close() env.close()
print('done.') print('done.')
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment