Skip to content

Commit 2b9f7d5

Browse files
MAJOR: Allow transition and reward noises to depend on the current state and action, improve the default noise for cont. envs. example.py: set up logging; add CLI argument to toggle displaying image observations. Improve logging in general.
1 parent 3ce485f commit 2b9f7d5

File tree

5 files changed

+2175
-109
lines changed

5 files changed

+2175
-109
lines changed

���.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ __pycache__/
66
MUJOCO_LOG.TXT
77
*.pdf
88

9+
log*
10+
911
*.swp
1012
*.csv
1113
.directory
@@ -114,4 +116,4 @@ venv.bak/
114116

115117
#whitelist
116118
!tests/files/mdpp_12744267_SAC_target_radius/*.csv
117-
!misc/sample_recorded_data/*/*.csv
119+
!misc/sample_recorded_data/*/*.csv

‎example.py

Lines changed: 83 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""We collect here some examples of basic usage for MDP Playground.
2+
Example call: python example.py --do_not_display_images --log_level INFO
23
34
Calling this file as a script, invokes the following examples:
45
one for basic discrete environments
@@ -10,7 +11,7 @@
1011
one for basic grid environments
1112
one for grid environments with image representations
1213
one for wrapping Atari env qbert
13-
one for wrapping Mujoco env HalfCheetah
14+
one for wrapping Mujoco envs HalfCheetah, Pusher, Reacher
1415
one for wrapping MiniGrid env # Currently commented out due to some errors
1516
one for wrapping ProcGen env # Currently commented out due to some errors
1617
two examples at the end showing how to create toy envs using gym.make()
@@ -25,6 +26,7 @@
2526
from mdp_playground.envs import RLToyEnv
2627
import numpy as np
2728

29+
display_images = True
2830

2931
def display_image(obs, mode="RGB"):
3032
# Display the image observation associated with the next state
@@ -121,7 +123,8 @@ def discrete_environment_image_representations_example():
121123

122124
env.close()
123125

124-
display_image(next_state_image, mode="L")
126+
if display_images:
127+
display_image(next_state_image, mode="L")
125128

126129

127130
def discrete_environment_diameter_image_representations_example():
@@ -167,7 +170,8 @@ def discrete_environment_diameter_image_representations_example():
167170

168171
env.close()
169172

170-
display_image(next_state_image, mode="L")
173+
if display_images:
174+
display_image(next_state_image, mode="L")
171175

172176

173177
def continuous_environment_example_move_to_a_point():
@@ -249,8 +253,9 @@ def continuous_environment_example_move_to_a_point_irrelevant_image():
249253

250254
env.close()
251255

252-
img1 = display_image(next_state_image, mode="RGB")
253-
img1.save("cont_env_irrelevant_image.pdf")
256+
if display_images:
257+
img1 = display_image(next_state_image, mode="RGB")
258+
# img1.save("cont_env_irrelevant_image.pdf")
254259

255260

256261
def continuous_environment_example_move_along_a_line():
@@ -342,7 +347,8 @@ def grid_environment_image_representations_example():
342347
env.reset()[0]
343348
env.close()
344349

345-
display_image(next_obs)
350+
if display_images:
351+
display_image(next_obs)
346352

347353

348354
def atari_wrapper_example():
@@ -351,7 +357,7 @@ def atari_wrapper_example():
351357
"seed": 0,
352358
"delay": 1,
353359
"transition_noise": 0.25,
354-
"reward_noise": lambda a: a.normal(0, 0.1),
360+
"reward_noise": lambda s, a, rng: rng.normal(0, 0.1),
355361
"state_space_type": "discrete",
356362
}
357363

@@ -380,7 +386,8 @@ def atari_wrapper_example():
380386

381387
env.close()
382388

383-
display_image(next_state)
389+
if display_images:
390+
display_image(next_state)
384391

385392

386393
def mujoco_wrapper_examples():
@@ -435,11 +442,13 @@ def mujoco_wrapper_examples():
435442
state = env.reset(seed=gym_wrap_config["seed"])[0]
436443

437444
print(
438-
"Taking a step in the environment with a random action and printing the transition:"
445+
"Taking steps in the HalfCheetah environment with a random action and printing the transition:"
439446
)
440-
action = env.action_space.sample()
441-
next_state, reward, done, trunc, info = env.step(action)
442-
print("sars', done =", state, action, reward, next_state, done)
447+
for i in range(3):
448+
action = env.action_space.sample()
449+
next_state, reward, done, trunc, info = env.step(action)
450+
print("sars', done =", state, action, reward, next_state, done)
451+
state = next_state
443452

444453
env.close()
445454

@@ -453,14 +462,16 @@ def mujoco_wrapper_examples():
453462
import gymnasium as gym
454463
env = GymEnvWrapper(env, **gym_wrap_config)
455464

456-
state = env.reset(seed=gym_wrap_config["seed"])[0]
465+
state = env.reset(seed=gym_wrap_config["seed"] + 1)[0]
457466

458467
print(
459-
"Taking a step in the environment with a random action and printing the transition:"
468+
"Taking steps in the Pusher environment with a random action and printing the transition:"
460469
)
461-
action = env.action_space.sample()
462-
next_state, reward, done, trunc, info = env.step(action)
463-
print("sars', done =", state, action, reward, next_state, done)
470+
for i in range(3):
471+
action = env.action_space.sample()
472+
next_state, reward, done, trunc, info = env.step(action)
473+
print("sars', done =", state, action, reward, next_state, done)
474+
state = next_state
464475

465476
env.close()
466477

@@ -474,14 +485,16 @@ def mujoco_wrapper_examples():
474485
import gymnasium as gym
475486
env = GymEnvWrapper(env, **gym_wrap_config)
476487

477-
state = env.reset(seed=gym_wrap_config["seed"])[0]
488+
state = env.reset(seed=gym_wrap_config["seed"] + 2)[0]
478489

479490
print(
480-
"Taking a step in the environment with a random action and printing the transition:"
491+
"Taking steps in the Reacher environment with a random action and printing the transition:"
481492
)
482-
action = env.action_space.sample()
483-
next_state, reward, done, trunc, info = env.step(action)
484-
print("sars', done =", state, action, reward, next_state, done)
493+
for i in range(3):
494+
action = env.action_space.sample()
495+
next_state, reward, done, trunc, info = env.step(action)
496+
print("sars', done =", state, action, reward, next_state, done)
497+
state = next_state
485498

486499
env.close()
487500

@@ -501,7 +514,7 @@ def minigrid_wrapper_example():
501514
"seed": 0,
502515
"delay": 1,
503516
"transition_noise": 0.25,
504-
"reward_noise": lambda a: a.normal(0, 0.1),
517+
"reward_noise": lambda s, a, rng: rng.normal(0, 0.1),
505518
"state_space_type": "discrete",
506519
}
507520

@@ -533,7 +546,8 @@ def minigrid_wrapper_example():
533546

534547
env.close()
535548

536-
display_image(next_obs)
549+
if display_images:
550+
display_image(next_obs)
537551

538552

539553
def procgen_wrapper_example():
@@ -542,7 +556,7 @@ def procgen_wrapper_example():
542556
"seed": 0,
543557
"delay": 1,
544558
"transition_noise": 0.25,
545-
"reward_noise": lambda a: a.normal(0, 0.1),
559+
"reward_noise": lambda s, a, rng: rng.normal(0, 0.1),
546560
"state_space_type": "discrete",
547561
}
548562

@@ -569,76 +583,106 @@ def procgen_wrapper_example():
569583

570584
env.close()
571585

572-
display_image(next_obs)
586+
if display_images:
587+
display_image(next_obs)
573588

574589

575590
if __name__ == "__main__":
576591

592+
# Use argparse to set display_images to False if you don't want to display images
593+
# and to set log level.
594+
import argparse
595+
parser = argparse.ArgumentParser()
596+
parser.add_argument("--display_images", "-di", help="Display image observations (available for some examples)", action="store_true")
597+
parser.add_argument("--do_not_display_images", "-n", help="Do not display image observations (available for some examples)", action="store_false", dest="display_images")
598+
parser.add_argument("--log_level", type=str, default="DEBUG", help="Set the log level")
599+
parser.set_defaults(display_images=True)
600+
args = parser.parse_args()
601+
display_images = args.display_images
602+
603+
# Set up logging globally for the MDP Playground library:
604+
import logging
605+
logger = logging.getLogger("mdp_playground")
606+
logger.setLevel(args.log_level)
607+
if not logger.handlers:
608+
log_filename = "log_file.txt"
609+
log_file_handler = logging.FileHandler(log_filename)
610+
log_file_handler.setFormatter(logging.Formatter('%(message)s - %(levelname)s - %(name)s - %(asctime)s', datefmt='%m.%d.%Y %I:%M:%S %p'))
611+
logger.addHandler(log_file_handler)
612+
# Add a console handler:
613+
console_handler = logging.StreamHandler()
614+
console_handler.setFormatter(logging.Formatter('%(message)s'))
615+
# Have less verbose logging to console:
616+
console_handler.setLevel(logging.INFO)
617+
logger.addHandler(console_handler)
618+
logger.info("Begin logging to: %s", log_filename)
619+
620+
577621
# Colour print
578622
set_ansi_escape = "\033[33;1m" # Yellow, bold
579623
reset_ansi_escape = "\033[0m"
580624

581-
print(set_ansi_escape + "Running discrete environment\n" + reset_ansi_escape)
625+
logger.info(set_ansi_escape + "Running discrete environment\n" + reset_ansi_escape)
582626
discrete_environment_example()
583627

584-
print(
628+
logger.info(
585629
set_ansi_escape
586630
+ "\nRunning discrete environment with image representations\n"
587631
+ reset_ansi_escape
588632
)
589633
discrete_environment_image_representations_example()
590634

591-
print(
635+
logger.info(
592636
set_ansi_escape
593637
+ "\nRunning discrete environment with diameter and image representations\n"
594638
+ reset_ansi_escape
595639
)
596640
discrete_environment_diameter_image_representations_example()
597641

598-
print(
642+
logger.info(
599643
set_ansi_escape
600644
+ "\nRunning continuous environment: move_to_a_point\n"
601645
+ reset_ansi_escape
602646
)
603647
continuous_environment_example_move_to_a_point()
604648

605-
print(
649+
logger.info(
606650
set_ansi_escape
607651
+ "\nRunning continuous environment: move_to_a_point with irrelevant features and image representations\n"
608652
+ reset_ansi_escape
609653
)
610654
continuous_environment_example_move_to_a_point_irrelevant_image()
611655

612-
print(
656+
logger.info(
613657
set_ansi_escape
614658
+ "\nRunning continuous environment: move_along_a_line\n"
615659
+ reset_ansi_escape
616660
)
617661
continuous_environment_example_move_along_a_line()
618662

619-
print(
663+
logger.info(
620664
set_ansi_escape
621665
+ "\nRunning grid environment: move_to_a_point\n"
622666
+ reset_ansi_escape
623667
)
624668
grid_environment_example()
625669

626-
print(
670+
logger.info(
627671
set_ansi_escape + "\nRunning grid environment: move_to_a_point "
628672
"with image representations\n" + reset_ansi_escape
629673
)
630674
grid_environment_image_representations_example()
631675

632-
print(set_ansi_escape + "\nRunning Atari wrapper example:\n" + reset_ansi_escape)
676+
logger.info(set_ansi_escape + "\nRunning Atari wrapper example:\n" + reset_ansi_escape)
633677
atari_wrapper_example()
634678

635-
print(set_ansi_escape + "\nRunning Mujoco wrapper example:\n" + reset_ansi_escape)
679+
logger.info(set_ansi_escape + "\nRunning Mujoco wrapper example:\n" + reset_ansi_escape)
636680
mujoco_wrapper_examples()
637681

638-
print(set_ansi_escape + "\nRunning MiniGrid wrapper example:\n" + reset_ansi_escape)
682+
# logger.info(set_ansi_escape + "\nRunning MiniGrid wrapper example:\n" + reset_ansi_escape)
639683
# minigrid_wrapper_example()
640684

641-
# print(set_ansi_escape + "\nRunning ProcGen wrapper example:\n" + reset_ansi_escape)
685+
# logger.info(set_ansi_escape + "\nRunning ProcGen wrapper example:\n" + reset_ansi_escape)
642686
# procgen_wrapper_example()
643687

644688
# Using gym.make() example 1
@@ -660,4 +704,4 @@ def procgen_wrapper_example():
660704
)
661705
env.reset()[0]
662706
for i in range(10):
663-
print(env.step(env.action_space.sample()))
707+
logger.info(env.step(env.action_space.sample()))

0 commit comments

Comments
 (0)