Skip to content

Commit 49551d2

Browse files
reward_every_n_steps updated to work with continuous and grid envs (added an example but not test case for these; fixed existing test cases for discrete envs); simplified reward buffer use. Improved example.py: better help message and made it possible to call individual example functions.
1 parent 2b9f7d5 commit 49551d2

File tree

5 files changed

+130
-2092
lines changed

5 files changed

+130
-2092
lines changed

‎.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@ __pycache__/
66
MUJOCO_LOG.TXT
77
*.pdf
88

9+
# Debugging stuff
10+
test_run*.txt
911
log*
12+
temp.txt
1013

1114
*.swp
1215
*.csv

‎example.py

Lines changed: 83 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
1-
"""We collect here some examples of basic usage for MDP Playground.
2-
Example call: python example.py --do_not_display_images --log_level INFO
1+
"""We collect some examples of basic usage for MDP Playground in this script.
2+
Example calls:
3+
python example.py --do_not_display_images --log_level INFO
4+
python example.py --do_not_display_images --func_list discrete_environment_example
5+
Equivalent call with short flags:
6+
python example.py -n -ll INFO
7+
python example.py -n -f discrete_environment_example
38
49
Calling this file as a script, invokes the following examples:
510
one for basic discrete environments
@@ -9,6 +14,7 @@
914
one for continuous environments with reward function move to a target point with irrelevant features and image representations
1015
one for continuous environments with reward function move along a line
1116
one for basic grid environments
17+
one for grid environments with reward_every_n_steps
1218
one for grid environments with image representations
1319
one for wrapping Atari env qbert
1420
one for wrapping Mujoco envs HalfCheetah, Pusher, Reacher
@@ -42,6 +48,7 @@ def display_image(obs, mode="RGB"):
4248

4349

4450
def discrete_environment_example():
51+
"""discrete environment example"""
4552

4653
config = {}
4754
config["seed"] = 0
@@ -82,6 +89,7 @@ def discrete_environment_example():
8289

8390

8491
def discrete_environment_image_representations_example():
92+
'''discrete environment with image representations example'''
8593

8694
config = {}
8795
config["seed"] = 0
@@ -128,6 +136,7 @@ def discrete_environment_image_representations_example():
128136

129137

130138
def discrete_environment_diameter_image_representations_example():
139+
'''discrete environment with diameter > 1 and image representations example'''
131140

132141
config = {}
133142
config["seed"] = 3
@@ -175,6 +184,8 @@ def discrete_environment_diameter_image_representations_example():
175184

176185

177186
def continuous_environment_example_move_to_a_point():
187+
'''continuous environment example: move to a point'''
188+
178189
config = {}
179190
config["seed"] = 0
180191

@@ -210,6 +221,8 @@ def continuous_environment_example_move_to_a_point():
210221

211222

212223
def continuous_environment_example_move_to_a_point_irrelevant_image():
224+
'''continuous environment example: move to a point with irrelevant features and image representations'''
225+
213226
config = {}
214227
config["seed"] = 0
215228

@@ -259,6 +272,7 @@ def continuous_environment_example_move_to_a_point_irrelevant_image():
259272

260273

261274
def continuous_environment_example_move_along_a_line():
275+
'''continuous environment example: move along a line'''
262276

263277
config = {}
264278
config["seed"] = 0
@@ -293,6 +307,36 @@ def continuous_environment_example_move_along_a_line():
293307

294308

295309
def grid_environment_example():
310+
'''grid environment example: move towards a goal point'''
311+
312+
config = {}
313+
config["seed"] = 0
314+
315+
config["state_space_type"] = "grid"
316+
config["grid_shape"] = (8, 8)
317+
318+
config["reward_function"] = "move_to_a_point"
319+
config["make_denser"] = True
320+
config["target_point"] = [5, 5]
321+
322+
env = RLToyEnv(**config)
323+
324+
state = env.get_augmented_state()["augmented_state"][-1]
325+
actions = [[0, 1], [-1, 0], [-1, 0], [1, 0], [0.5, -0.5], [1, 2], [1, 1], [0, 1]]
326+
327+
for i in range(len(actions)):
328+
action = actions[i]
329+
next_obs, reward, done, trunc, info = env.step(action)
330+
next_state = env.get_augmented_state()["augmented_state"][-1]
331+
print("sars', done =", state, action, reward, next_state, done)
332+
state = next_state
333+
334+
env.reset()[0]
335+
env.close()
336+
337+
def grid_environment_example_reward_every_n_steps():
338+
'''grid environment example: move towards a goal point but with sparser rewards using the reward_every_n_steps config'''
339+
296340
config = {}
297341
config["seed"] = 0
298342

@@ -301,6 +345,7 @@ def grid_environment_example():
301345

302346
config["reward_function"] = "move_to_a_point"
303347
config["make_denser"] = True
348+
config["reward_every_n_steps"] = 3
304349
config["target_point"] = [5, 5]
305350

306351
env = RLToyEnv(**config)
@@ -320,6 +365,8 @@ def grid_environment_example():
320365

321366

322367
def grid_environment_image_representations_example():
368+
'''grid environment example: move towards a goal point with image representations'''
369+
323370
config = {}
324371
config["seed"] = 0
325372

@@ -352,6 +399,7 @@ def grid_environment_image_representations_example():
352399

353400

354401
def atari_wrapper_example():
402+
'''wrapping Atari env qbert example'''
355403

356404
config = {
357405
"seed": 0,
@@ -391,6 +439,7 @@ def atari_wrapper_example():
391439

392440

393441
def mujoco_wrapper_examples():
442+
'''wrapping Mujoco envs HalfCheetah, Pusher, Reacher examples'''
394443

395444
# For Mujoco envs, a few specific dimensions need to be changed by fiddling with
396445
# attributes of the MujocoEnv class. This is achieved through a Mujoco
@@ -509,6 +558,7 @@ def mujoco_wrapper_examples():
509558

510559

511560
def minigrid_wrapper_example():
561+
'''wrapping MiniGrid env example'''
512562

513563
config = {
514564
"seed": 0,
@@ -551,6 +601,7 @@ def minigrid_wrapper_example():
551601

552602

553603
def procgen_wrapper_example():
604+
'''wrapping ProcGen env example'''
554605

555606
config = {
556607
"seed": 0,
@@ -592,12 +643,14 @@ def procgen_wrapper_example():
592643
# Use argparse to set display_images to False if you don't want to display images
593644
# and to set log level.
594645
import argparse
595-
parser = argparse.ArgumentParser()
646+
parser = argparse.ArgumentParser(epilog=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
596647
parser.add_argument("--display_images", "-di", help="Display image observations (available for some examples)", action="store_true")
597648
parser.add_argument("--do_not_display_images", "-n", help="Do not display image observations (available for some examples)", action="store_false", dest="display_images")
598-
parser.add_argument("--log_level", type=str, default="DEBUG", help="Set the log level")
649+
parser.add_argument("--log_level", "-ll", type=str, default="DEBUG", help="Set the log level")
650+
parser.add_argument("--func_list", "-f", type=str, nargs="+", help="Set the list of examples to run. Set it to the names of the functions corresponding to the examples inside this script.")
599651
parser.set_defaults(display_images=True)
600652
args = parser.parse_args()
653+
# print("Args:", args)
601654
display_images = args.display_images
602655

603656
# Set up logging globally for the MDP Playground library:
@@ -622,73 +675,41 @@ def procgen_wrapper_example():
622675
set_ansi_escape = "\033[33;1m" # Yellow, bold
623676
reset_ansi_escape = "\033[0m"
624677

625-
logger.info(set_ansi_escape + "Running discrete environment\n" + reset_ansi_escape)
626-
discrete_environment_example()
627-
628-
logger.info(
629-
set_ansi_escape
630-
+ "\nRunning discrete environment with image representations\n"
631-
+ reset_ansi_escape
632-
)
633-
discrete_environment_image_representations_example()
634-
635-
logger.info(
636-
set_ansi_escape
637-
+ "\nRunning discrete environment with diameter and image representations\n"
638-
+ reset_ansi_escape
639-
)
640-
discrete_environment_diameter_image_representations_example()
678+
# Run the examples called in the function list:
679+
if args.func_list:
680+
for func_name in args.func_list:
681+
logger.info(set_ansi_escape + "Running " + globals()[func_name].__doc__ + reset_ansi_escape)
682+
globals()[func_name]()
683+
exit()
641684

642-
logger.info(
643-
set_ansi_escape
644-
+ "\nRunning continuous environment: move_to_a_point\n"
645-
+ reset_ansi_escape
646-
)
647-
continuous_environment_example_move_to_a_point()
685+
# Else run all other examples except the ones disabled right now:
648686

649-
logger.info(
650-
set_ansi_escape
651-
+ "\nRunning continuous environment: move_to_a_point with irrelevant features and image representations\n"
652-
+ reset_ansi_escape
653-
)
654-
continuous_environment_example_move_to_a_point_irrelevant_image()
687+
# List all function names defined in the current script
688+
functions = [name for name, obj in globals().items() if callable(obj) and obj.__module__ == "__main__"]
689+
print("Available functions:", functions)
655690

656-
logger.info(
657-
set_ansi_escape
658-
+ "\nRunning continuous environment: move_along_a_line\n"
659-
+ reset_ansi_escape
660-
)
661-
continuous_environment_example_move_along_a_line()
691+
# Disabled examples:
692+
functions_to_ignore = ["display_image", "minigrid_wrapper_example", "procgen_wrapper_example"]
662693

663-
logger.info(
664-
set_ansi_escape
665-
+ "\nRunning grid environment: move_to_a_point\n"
666-
+ reset_ansi_escape
667-
)
668-
grid_environment_example()
694+
# Run all functions except the ones in functions_to_ignore:
695+
for func_name in functions:
696+
if func_name in functions_to_ignore:
697+
continue
698+
logger.info(set_ansi_escape + "Running " + globals()[func_name].__doc__ + reset_ansi_escape)
699+
globals()[func_name]()
669700

670-
logger.info(
671-
set_ansi_escape + "\nRunning grid environment: move_to_a_point "
672-
"with image representations\n" + reset_ansi_escape
673-
)
674-
grid_environment_image_representations_example()
701+
# Causes RuntimeError: dictionary changed size during iteration
702+
# global_vars = globals()
703+
# for func_name in global_vars:
704+
# if callable(global_vars[func_name]):
705+
# logger.info(func_name)
675706

676-
logger.info(set_ansi_escape + "\nRunning Atari wrapper example:\n" + reset_ansi_escape)
677-
atari_wrapper_example()
678-
679-
logger.info(set_ansi_escape + "\nRunning Mujoco wrapper example:\n" + reset_ansi_escape)
680-
mujoco_wrapper_examples()
681-
682-
# logger.info(set_ansi_escape + "\nRunning MiniGrid wrapper example:\n" + reset_ansi_escape)
683-
# minigrid_wrapper_example()
684-
685-
# logger.info(set_ansi_escape + "\nRunning ProcGen wrapper example:\n" + reset_ansi_escape)
686-
# procgen_wrapper_example()
687-
688-
# Using gym.make() example 1
707+
# Running extra examples to show using gym.make():
689708
import mdp_playground
690709
import gymnasium as gym
691710

711+
logger.info(set_ansi_escape + "Running 2 extra examples to show using gym.make()" + reset_ansi_escape)
712+
692713
# The following are with seed=None:
693714
gym.make("RLToy-v0")
694715

0 commit comments

Comments
 (0)