Skip to content

Commit 5e5dcb6

Browse files
Added examples to example.py + minor changes, including to test_image_continuous.py and improved documentation
1 parent c89192a commit 5e5dcb6

File tree

4 files changed

+161
-24
lines changed

4 files changed

+161
-24
lines changed

‎example.py

Lines changed: 133 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
Calling this file as a script, invokes the following examples:
44
one for basic discrete environments
55
one for discrete environments with image representations
6+
one for discrete environments with a diameter > 1 and image representations
67
one for continuous environments with reward function move to a target point
8+
one for continuous environments with reward function move to a target point with irrelevant features and image representations
9+
one for continuous environments with reward function move along a line
710
one for basic grid environments
811
one for grid environments with image representations
912
one for wrapping Atari env qbert
@@ -33,6 +36,7 @@ def display_image(obs, mode="RGB"):
3336
# used because the image is 3-D because frameworks like Ray expect the image
3437
# to be 3-D.
3538
img1.show()
39+
return img1
3640

3741

3842
def discrete_environment_example():
@@ -110,6 +114,7 @@ def discrete_environment_image_representations_example():
110114
)
111115
action = env.action_space.sample()
112116
next_state_image, reward, done, info = env.step(action)
117+
augmented_state_dict = env.get_augmented_state()
113118
next_state = augmented_state_dict["curr_state"] # Underlying MDP state holds
114119
# the current discrete state.
115120
print("sars', done =", state, action, reward, next_state, done)
@@ -119,28 +124,75 @@ def discrete_environment_image_representations_example():
119124
display_image(next_state_image, mode="L")
120125

121126

122-
def continuous_environment_example_move_along_a_line():
127+
def discrete_environment_diameter_image_representations_example():
123128

129+
config = {}
130+
config["seed"] = 3
131+
132+
config["state_space_type"] = "discrete"
133+
config["action_space_size"] = 4
134+
config["image_representations"] = True
135+
config["delay"] = 1
136+
config["diameter"] = 2
137+
config["sequence_length"] = 3
138+
config["reward_scale"] = 2.5
139+
config["reward_shift"] = -1.75
140+
config["reward_noise"] = 0.5 # std dev of a Gaussian dist.
141+
config["transition_noise"] = 0.1
142+
config["reward_density"] = 0.25
143+
config["make_denser"] = False
144+
config["terminal_state_density"] = 0.25
145+
config["maximally_connected"] = True
146+
config["repeats_in_sequences"] = False
147+
148+
config["generate_random_mdp"] = True
149+
env = RLToyEnv(**config)
150+
151+
# The environment maintains an augmented state which contains the underlying
152+
# state used by the MDP to perform transitions and hand out rewards. We can
153+
# fetch a dict containing the augmented state and current state like this:
154+
augmented_state_dict = env.get_augmented_state()
155+
state = augmented_state_dict["curr_state"]
156+
157+
print(
158+
"Taking a step in the environment with a random action and printing "
159+
"the transition:"
160+
)
161+
action = env.action_space.sample()
162+
next_state_image, reward, done, info = env.step(action)
163+
augmented_state_dict = env.get_augmented_state()
164+
next_state = augmented_state_dict["curr_state"] # Underlying MDP state holds
165+
# the current discrete state.
166+
print("sars', done =", state, action, reward, next_state, done)
167+
168+
env.close()
169+
170+
display_image(next_state_image, mode="L")
171+
172+
173+
def continuous_environment_example_move_to_a_point():
124174
config = {}
125175
config["seed"] = 0
126176

127177
config["state_space_type"] = "continuous"
128-
config["state_space_dim"] = 4
178+
config["state_space_dim"] = 2
129179
config["transition_dynamics_order"] = 1
130180
config["inertia"] = 1 # 1 unit, e.g. kg for mass, or kg * m^2 for moment of
131181
# inertia.
132182
config["time_unit"] = 1 # Discretization of time domain and the time
133183
# duration over which action is applied
134184

135-
config["delay"] = 0
136-
config["sequence_length"] = 10
137-
config["reward_scale"] = 1.0
138-
config["reward_noise"] = 0.1 # std dev of a Gaussian dist.
139-
config["transition_noise"] = 0.1 # std dev of a Gaussian dist.
140-
config["reward_function"] = "move_along_a_line"
185+
config["make_denser"] = True
186+
config["target_point"] = [0, 0]
187+
config["target_radius"] = 0.05
188+
config["state_space_max"] = 10
189+
config["action_space_max"] = 1
190+
config["action_loss_weight"] = 0.0
191+
192+
config["reward_function"] = "move_to_a_point"
141193

142194
env = RLToyEnv(**config)
143-
state = env.reset()
195+
state = env.reset().copy()
144196

145197
print(
146198
"Taking a step in the environment with a random action and printing "
@@ -153,12 +205,12 @@ def continuous_environment_example_move_along_a_line():
153205
env.close()
154206

155207

156-
def continuous_environment_example_move_to_a_point():
208+
def continuous_environment_example_move_to_a_point_irrelevant_image():
157209
config = {}
158210
config["seed"] = 0
159211

160212
config["state_space_type"] = "continuous"
161-
config["state_space_dim"] = 2
213+
config["state_space_dim"] = 4
162214
config["transition_dynamics_order"] = 1
163215
config["inertia"] = 1 # 1 unit, e.g. kg for mass, or kg * m^2 for moment of
164216
# inertia.
@@ -174,8 +226,55 @@ def continuous_environment_example_move_to_a_point():
174226

175227
config["reward_function"] = "move_to_a_point"
176228

229+
config["image_representations"] = True
230+
config["irrelevant_features"] = True
231+
config["relevant_indices"] = [0, 1]
232+
177233
env = RLToyEnv(**config)
178234
state = env.reset()
235+
augmented_state_dict = env.get_augmented_state()
236+
state = augmented_state_dict["curr_state"].copy() # Underlying MDP state holds
237+
# the current continuous state.
238+
239+
print(
240+
"Taking a step in the environment with a random action and printing "
241+
"the transition:"
242+
)
243+
action = env.action_space.sample()
244+
next_state_image, reward, done, info = env.step(action)
245+
augmented_state_dict = env.get_augmented_state()
246+
next_state = augmented_state_dict["curr_state"].copy() # Underlying MDP state holds
247+
# the current continuous state.
248+
print("sars', done =", state, action, reward, next_state, done)
249+
250+
env.close()
251+
252+
img1 = display_image(next_state_image, mode="RGB")
253+
img1.save("cont_env_irrelevant_image.pdf")
254+
255+
256+
def continuous_environment_example_move_along_a_line():
257+
258+
config = {}
259+
config["seed"] = 0
260+
261+
config["state_space_type"] = "continuous"
262+
config["state_space_dim"] = 4
263+
config["transition_dynamics_order"] = 1
264+
config["inertia"] = 1 # 1 unit, e.g. kg for mass, or kg * m^2 for moment of
265+
# inertia.
266+
config["time_unit"] = 1 # Discretization of time domain and the time
267+
# duration over which action is applied
268+
269+
config["delay"] = 0
270+
config["sequence_length"] = 10
271+
config["reward_scale"] = 1.0
272+
config["reward_noise"] = 0.1 # std dev of a Gaussian dist.
273+
config["transition_noise"] = 0.1 # std dev of a Gaussian dist.
274+
config["reward_function"] = "move_along_a_line"
275+
276+
env = RLToyEnv(**config)
277+
state = env.reset().copy()
179278

180279
print(
181280
"Taking a step in the environment with a random action and printing "
@@ -270,13 +369,14 @@ def atari_wrapper_example():
270369
action = env.action_space.sample()
271370
next_state, reward, done, info = env.step(action)
272371
print(
273-
"s.shape ar s'.shape, done =",
372+
"s.shape a r s'.shape, done =",
274373
state.shape,
275374
action,
276375
reward,
277376
next_state.shape,
278377
done,
279378
)
379+
state = next_state
280380

281381
env.close()
282382

@@ -422,13 +522,34 @@ def procgen_wrapper_example():
422522
)
423523
discrete_environment_image_representations_example()
424524

525+
print(
526+
set_ansi_escape
527+
+ "\nRunning discrete environment with diameter and image representations\n"
528+
+ reset_ansi_escape
529+
)
530+
discrete_environment_diameter_image_representations_example()
531+
425532
print(
426533
set_ansi_escape
427534
+ "\nRunning continuous environment: move_to_a_point\n"
428535
+ reset_ansi_escape
429536
)
430537
continuous_environment_example_move_to_a_point()
431538

539+
print(
540+
set_ansi_escape
541+
+ "\nRunning continuous environment: move_to_a_point with irrelevant features and image representations\n"
542+
+ reset_ansi_escape
543+
)
544+
continuous_environment_example_move_to_a_point_irrelevant_image()
545+
546+
print(
547+
set_ansi_escape
548+
+ "\nRunning continuous environment: move_along_a_line\n"
549+
+ reset_ansi_escape
550+
)
551+
continuous_environment_example_move_along_a_line()
552+
432553
print(
433554
set_ansi_escape
434555
+ "\nRunning grid environment: move_to_a_point\n"

‎mdp_playground/envs/rl_toy_env.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1298,7 +1298,7 @@ def get_sequences(maximum, length, fraction, repeats=False, diameter=1):
12981298
)
12991299
permutations = []
13001300
for i in range(length):
1301-
permutations.append(maximum - i // diameter)
1301+
permutations.append(maximum - (i // diameter))
13021302
# permutations = list(range(maximum + 1 - length, maximum + 1))
13031303
self.logger.info(
13041304
"No. of choices for each element in a"
@@ -1365,18 +1365,28 @@ def get_sequences(maximum, length, fraction, repeats=False, diameter=1):
13651365
list(range(maximum))
13661366
) # # has to contain every number up to n so
13671367
# that any one of them can be picked as part
1368-
# of the sequence below
1368+
# of the sequence below. An example of curr_rem_digits
1369+
# is [[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]] for
1370+
# diameter = 3 and maximum = 6 (when number of non-terminal
1371+
# states = 6). e.g. permutations = [6, 6, 6, 5, 5].
13691372
for enum, j in enumerate(permutations): # Goes
13701373
# from largest to smallest number among the factors of nPk
13711374
rem_ = curr_permutation % j
13721375
# rem_ = (enum // maximum) * maximum + rem_
13731376
seq_.append(
13741377
curr_rem_digits[(enum + i_s) % diameter][rem_]
1375-
+ ((enum + i_s) % diameter)
1376-
* self.action_space_size[0]
1377-
) # Use (enum + i_s)
1378+
+ (((enum + i_s) % diameter)
1379+
* self.action_space_size[0])
1380+
) # enum iterates over the current independent set
1381+
# for the sequence being constructed by getting the
1382+
# remainder with the diameter because each position in
1383+
# the sequence belongs to a different independent set.
1384+
# Adding i_s, i.e., the current independent set, to it
1385+
# just offsets every state in the sequence
13781386
# to allow other independent sets to have
1379-
# states beginning a rewardable sequence
1387+
# states beginning a rewardable sequence. The multiplication
1388+
# by self.action_space_size[0] is to get the correct state
1389+
# number for the selected independent set.
13801390
del curr_rem_digits[(enum + i_s) % diameter][rem_]
13811391
# print("curr_rem_digits", curr_rem_digits)
13821392
curr_permutation = curr_permutation // j

‎mdp_playground/spaces/image_continuous.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,12 @@ def __init__(
8383

8484
self.goal_colour = (0, 255, 0)
8585
self.agent_colour = (0, 0, 255)
86-
self.term_colour = (255, 0, 0)
87-
self.bg_colour = (0, 0, 0)
86+
self.term_colour = (0, 0, 0)
87+
self.bg_colour = (208, 208, 208)
8888
self.line_colour = (255, 255, 255)
89+
# Alternate scheme
90+
# self.term_colour = (255, 0, 0)
91+
# self.bg_colour = (0, 0, 0)
8992

9093
assert len(feature_space.shape) == 1
9194
relevant_dims = len(relevant_indices)

‎mdp_playground/spaces/test_image_continuous.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
class TestImageContinuous(unittest.TestCase):
1313
def test_image_continuous(self):
14+
# For images in the paper width and height were 100
15+
# circle_radius was 5
1416
render = False
1517

1618
lows = 0.0
@@ -38,18 +40,19 @@ def test_image_continuous(self):
3840
img1.show()
3941
# img1.save("cont_state_no_target.pdf")
4042

43+
pos = np.array([10.0, 10.0])
4144
target = np.array([10, 10])
4245
imc = ImageContinuous(
4346
cs2,
44-
circle_radius=10,
47+
circle_radius=5,
4548
target_point=target,
46-
width=400,
47-
height=400,
49+
width=100,
50+
height=100,
4851
)
4952
img1 = Image.fromarray(np.squeeze(imc.generate_image(pos)), "RGB")
5053
if render:
5154
img1.show()
52-
# img1.save("cont_state_target.pdf")
55+
img1.save("cont_state_target.pdf")
5356

5457
# Terminal sub-spaces
5558
lows = np.array([2.0, 4.0])

0 commit comments

Comments
 (0)