Skip to content

Commit 868960e

Browse files
authored
Merge pull request #17 from automl/master
Update to current master
2 parents 3020c04 + bfe2ea3 commit 868960e

34 files changed

+14884
-169
lines changed

‎.coveragerc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
[run]
2+
omit =
3+
example.py
4+
mdp_playground/analysis/mdpp_to_cave.py

‎codecov.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
coverage:
2+
range: 68..100
3+
round: down
4+
precision: 2
5+
status:
6+
project:
7+
default:
8+
# basic
9+
target: 68%
10+
threshold: 5%
11+
base: auto

‎example.py

Lines changed: 124 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
one for grid environments with image representations
99
one for wrapping Atari env qbert
1010
one for wrapping Mujoco env HalfCheetah
11+
one for wrapping MiniGrid env
12+
one for wrapping ProcGen env
1113
two examples at the end showing how to create toy envs using gym.make()
1214
1315
Many further examples can be found in test_mdp_playground.py.
@@ -21,6 +23,17 @@
2123
import numpy as np
2224

2325

26+
def display_image(obs, mode="RGB"):
27+
# Display the image observation associated with the next state
28+
from PIL import Image
29+
30+
# Because numpy is row-major and Image is column major, need to transpose
31+
obs = obs.transpose(1, 0, 2)
32+
img1 = Image.fromarray(np.squeeze(obs), mode) # squeeze() is
33+
# used because the image is 3-D because frameworks like Ray expect the image
34+
# to be 3-D.
35+
img1.show()
36+
2437
def discrete_environment_example():
2538

2639
config = {}
@@ -100,18 +113,10 @@ def discrete_environment_image_representations_example():
100113
# the current discrete state.
101114
print("sars', done =", state, action, reward, next_state, done)
102115

103-
# Display the image observation associated with the next state
104-
from PIL import Image
105-
106-
# Because numpy is row-major and Image is column major, need to transpose
107-
next_state_image = next_state_image.transpose(1, 0, 2)
108-
img1 = Image.fromarray(np.squeeze(next_state_image), "L") # 'L' is used for
109-
# black and white. squeeze() is used because the image is 3-D because
110-
# frameworks like Ray expect the image to be 3-D.
111-
img1.show()
112-
113116
env.close()
114117

118+
display_image(next_state_image, mode="L")
119+
115120

116121
def continuous_environment_example_move_along_a_line():
117122

@@ -235,15 +240,8 @@ def grid_environment_image_representations_example():
235240
env.reset()
236241
env.close()
237242

238-
# Display the image observation associated with the next state
239-
from PIL import Image
243+
display_image(next_obs)
240244

241-
# Because numpy is row-major and Image is column major, need to transpose
242-
next_obs = next_obs.transpose(1, 0, 2)
243-
img1 = Image.fromarray(np.squeeze(next_obs), "RGB") # squeeze() is
244-
# used because the image is 3-D because frameworks like Ray expect the image
245-
# to be 3-D.
246-
img1.show()
247245

248246

249247
def atari_wrapper_example():
@@ -256,29 +254,32 @@ def atari_wrapper_example():
256254
"state_space_type": "discrete",
257255
}
258256

259-
from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
257+
from mdp_playground.envs import GymEnvWrapper
260258
import gym
261259

262260
ae = gym.make("QbertNoFrameskip-v4")
263261
env = GymEnvWrapper(ae, **config)
264262
state = env.reset()
265263

266264
print(
267-
"Taking a step in the environment with a random action and printing the transition:"
268-
)
269-
action = env.action_space.sample()
270-
next_state, reward, done, info = env.step(action)
271-
print(
272-
"s.shape ar s'.shape, done =",
273-
state.shape,
274-
action,
275-
reward,
276-
next_state.shape,
277-
done,
265+
"Taking 10 steps in the environment with a random action and printing the transition:"
278266
)
267+
for i in range(10):
268+
action = env.action_space.sample()
269+
next_state, reward, done, info = env.step(action)
270+
print(
271+
"s.shape ar s'.shape, done =",
272+
state.shape,
273+
action,
274+
reward,
275+
next_state.shape,
276+
done,
277+
)
279278

280279
env.close()
281280

281+
display_image(next_state)
282+
282283

283284
def mujoco_wrapper_example():
284285

@@ -298,23 +299,103 @@ def mujoco_wrapper_example():
298299
# This actually makes a subclass and not a wrapper. Because, some
299300
# frameworks might need an instance of this class to also be an instance
300301
# of the Mujoco base_class.
301-
from mdp_playground.envs.mujoco_env_wrapper import get_mujoco_wrapper
302-
from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
302+
try:
303+
from mdp_playground.envs import get_mujoco_wrapper
304+
from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
305+
wrapped_mujoco_env = get_mujoco_wrapper(HalfCheetahEnv)
306+
307+
env = wrapped_mujoco_env(**config)
308+
state = env.reset()
309+
310+
print(
311+
"Taking a step in the environment with a random action and printing the transition:"
312+
)
313+
action = env.action_space.sample()
314+
next_state, reward, done, info = env.step(action)
315+
print("sars', done =", state, action, reward, next_state, done)
303316

304-
wrapped_mujoco_env = get_mujoco_wrapper(HalfCheetahEnv)
317+
env.close()
305318

306-
env = wrapped_mujoco_env(**config)
307-
state = env.reset()
319+
except ImportError as e:
320+
print("Exception:", type(e), e, "caught. You may need to install mujoco-py. NOT running mujoco_wrapper_example.")
321+
return
322+
323+
324+
def minigrid_wrapper_example():
325+
326+
config = {
327+
"seed": 0,
328+
"delay": 1,
329+
"transition_noise": 0.25,
330+
"reward_noise": lambda a: a.normal(0, 0.1),
331+
"state_space_type": "discrete",
332+
}
333+
334+
from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
335+
import gym
336+
337+
from gym_minigrid.wrappers import RGBImgPartialObsWrapper, ImgObsWrapper
338+
env = gym.make('MiniGrid-Empty-8x8-v0')
339+
env = RGBImgPartialObsWrapper(env) # Get pixel observations
340+
env = ImgObsWrapper(env) # Get rid of the 'mission' field
341+
342+
env = GymEnvWrapper(env, **config)
343+
obs = env.reset() # This now produces an RGB tensor only
308344

309345
print(
310346
"Taking a step in the environment with a random action and printing the transition:"
311347
)
312348
action = env.action_space.sample()
313-
next_state, reward, done, info = env.step(action)
314-
print("sars', done =", state, action, reward, next_state, done)
349+
next_obs, reward, done, info = env.step(action)
350+
print(
351+
"s.shape ar s'.shape, done =",
352+
obs.shape,
353+
action,
354+
reward,
355+
next_obs.shape,
356+
done,
357+
)
358+
359+
env.close()
360+
361+
display_image(next_obs)
362+
363+
364+
def procgen_wrapper_example():
365+
366+
config = {
367+
"seed": 0,
368+
"delay": 1,
369+
"transition_noise": 0.25,
370+
"reward_noise": lambda a: a.normal(0, 0.1),
371+
"state_space_type": "discrete",
372+
}
373+
374+
from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
375+
import gym
376+
377+
env = gym.make("procgen:procgen-coinrun-v0")
378+
env = GymEnvWrapper(env, **config)
379+
obs = env.reset()
380+
381+
print(
382+
"Taking a step in the environment with a random action and printing the transition:"
383+
)
384+
action = env.action_space.sample()
385+
next_obs, reward, done, info = env.step(action)
386+
print(
387+
"s.shape ar s'.shape, done =",
388+
obs.shape,
389+
action,
390+
reward,
391+
next_obs.shape,
392+
done,
393+
)
315394

316395
env.close()
317396

397+
display_image(next_obs)
398+
318399

319400
if __name__ == "__main__":
320401

@@ -358,6 +439,12 @@ def mujoco_wrapper_example():
358439
print(set_ansi_escape + "\nRunning Mujoco wrapper example:\n" + reset_ansi_escape)
359440
mujoco_wrapper_example()
360441

442+
print(set_ansi_escape + "\nRunning MiniGrid wrapper example:\n" + reset_ansi_escape)
443+
minigrid_wrapper_example()
444+
445+
# print(set_ansi_escape + "\nRunning ProcGen wrapper example:\n" + reset_ansi_escape)
446+
# procgen_wrapper_example()
447+
361448
# Using gym.make() example 1
362449
import mdp_playground
363450
import gym
@@ -371,7 +458,6 @@ def mujoco_wrapper_example():
371458
"action_space_size": 8,
372459
"state_space_type": "discrete",
373460
"action_space_type": "discrete",
374-
"terminal_state_density": 0.25,
375461
"maximally_connected": True,
376462
}
377463
)
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
import itertools
2+
from ray import tune
3+
from collections import OrderedDict
4+
num_seeds = 5
5+
timesteps_total = 10_000_000
6+
7+
var_env_configs = OrderedDict(
8+
{
9+
"image_transforms": [
10+
"none",
11+
"shift",
12+
# "scale",
13+
# "flip",
14+
# "rotate",
15+
# "shift,scale,rotate,flip",
16+
], # image_transforms,
17+
"dummy_seed": [i for i in range(num_seeds)],
18+
}
19+
)
20+
21+
var_configs = OrderedDict({"env": var_env_configs})
22+
23+
env_config = {
24+
"env": "GymEnvWrapper-Atari",
25+
"env_config": {
26+
"AtariEnv": {
27+
"game": "beam_rider",
28+
"obs_type": "image",
29+
"frameskip": 1,
30+
},
31+
# "GymEnvWrapper": {
32+
"atari_preprocessing": True,
33+
"frame_skip": 4,
34+
"grayscale_obs": False, # grayscale_obs gives a 2-D observation tensor.
35+
"image_width": 40,
36+
"image_padding": 30,
37+
"state_space_type": "discrete",
38+
"action_space_type": "discrete",
39+
"seed": 0,
40+
# },
41+
# 'seed': 0, #seed
42+
},
43+
}
44+
45+
algorithm = "A3C"
46+
agent_config = { # Taken from Ray tuned_examples
47+
"clip_rewards": True,
48+
"lr": 1e-4,
49+
# Value Function Loss coefficient
50+
"vf_loss_coeff": 2.5,
51+
# Entropy coefficient
52+
"entropy_coeff": 0.01,
53+
"min_iter_time_s": 0,
54+
"num_envs_per_worker": 5,
55+
"num_gpus": 0,
56+
"num_workers": 3,
57+
"rollout_fragment_length": 10,
58+
"timesteps_per_iteration": 10000,
59+
"tf_session_args": {
60+
# note: overriden by `local_tf_session_args`
61+
"intra_op_parallelism_threads": 4,
62+
"inter_op_parallelism_threads": 4,
63+
# "gpu_options": {
64+
# "allow_growth": True,
65+
# },
66+
# "log_device_placement": False,
67+
"device_count": {
68+
"CPU": 2,
69+
# "GPU": 0,
70+
},
71+
# "allow_soft_placement": True, # required by PPO multi-gpu
72+
},
73+
# Override the following tf session args on the local worker
74+
"local_tf_session_args": {
75+
"intra_op_parallelism_threads": 4,
76+
"inter_op_parallelism_threads": 4,
77+
},
78+
}
79+
80+
81+
filters_100x100 = [
82+
[
83+
16,
84+
[8, 8],
85+
4,
86+
], # changes from 42x42x1 with padding 2 to 22x22x16 (or 52x52x16 for 102x102x1)
87+
[32, [4, 4], 2],
88+
[
89+
128,
90+
[13, 13],
91+
1,
92+
],
93+
]
94+
95+
96+
model_config = {
97+
"model": {
98+
"fcnet_hiddens": [256, 256],
99+
# "custom_preprocessor": "ohe",
100+
"custom_options": {}, # extra options to pass to your preprocessor
101+
"conv_activation": "relu",
102+
"conv_filters": filters_100x100,
103+
# "fcnet_activation": "tanh",
104+
"use_lstm": False,
105+
"max_seq_len": 20,
106+
"lstm_cell_size": 256,
107+
"lstm_use_prev_action_reward": False,
108+
},
109+
}
110+
111+
112+
eval_config = {
113+
"evaluation_interval": None, # I think this means every x training_iterations
114+
"evaluation_config": {
115+
"explore": False,
116+
"exploration_fraction": 0,
117+
"exploration_final_eps": 0,
118+
"evaluation_num_episodes": 10,
119+
# "horizon": 100,
120+
"env_config": {
121+
"dummy_eval": True, # hack Used to check if we are in evaluation mode or training mode inside Ray callback on_episode_end() to be able to write eval stats
122+
"transition_noise": 0
123+
if "state_space_type" in env_config["env_config"]
124+
and env_config["env_config"]["state_space_type"] == "discrete"
125+
else tune.function(lambda a: a.normal(0, 0)),
126+
"reward_noise": tune.function(lambda a: a.normal(0, 0)),
127+
"action_loss_weight": 0.0,
128+
},
129+
},
130+
}

0 commit comments

Comments
 (0)