1
1
"""We collect here some examples of basic usage for MDP Playground.
2
+ Example call: python example.py --do_not_display_images --log_level INFO
2
3
3
4
Calling this file as a script, invokes the following examples:
4
5
one for basic discrete environments
10
11
one for basic grid environments
11
12
one for grid environments with image representations
12
13
one for wrapping Atari env qbert
13
- one for wrapping Mujoco env HalfCheetah
14
+ one for wrapping Mujoco envs HalfCheetah, Pusher, Reacher
14
15
one for wrapping MiniGrid env # Currently commented out due to some errors
15
16
one for wrapping ProcGen env # Currently commented out due to some errors
16
17
two examples at the end showing how to create toy envs using gym.make()
25
26
from mdp_playground .envs import RLToyEnv
26
27
import numpy as np
27
28
29
+ display_images = True
28
30
29
31
def display_image (obs , mode = "RGB" ):
30
32
# Display the image observation associated with the next state
@@ -121,7 +123,8 @@ def discrete_environment_image_representations_example():
121
123
122
124
env .close ()
123
125
124
- display_image (next_state_image , mode = "L" )
126
+ if display_images :
127
+ display_image (next_state_image , mode = "L" )
125
128
126
129
127
130
def discrete_environment_diameter_image_representations_example ():
@@ -167,7 +170,8 @@ def discrete_environment_diameter_image_representations_example():
167
170
168
171
env .close ()
169
172
170
- display_image (next_state_image , mode = "L" )
173
+ if display_images :
174
+ display_image (next_state_image , mode = "L" )
171
175
172
176
173
177
def continuous_environment_example_move_to_a_point ():
@@ -249,8 +253,9 @@ def continuous_environment_example_move_to_a_point_irrelevant_image():
249
253
250
254
env .close ()
251
255
252
- img1 = display_image (next_state_image , mode = "RGB" )
253
- img1 .save ("cont_env_irrelevant_image.pdf" )
256
+ if display_images :
257
+ img1 = display_image (next_state_image , mode = "RGB" )
258
+ # img1.save("cont_env_irrelevant_image.pdf")
254
259
255
260
256
261
def continuous_environment_example_move_along_a_line ():
@@ -342,7 +347,8 @@ def grid_environment_image_representations_example():
342
347
env .reset ()[0 ]
343
348
env .close ()
344
349
345
- display_image (next_obs )
350
+ if display_images :
351
+ display_image (next_obs )
346
352
347
353
348
354
def atari_wrapper_example ():
@@ -351,7 +357,7 @@ def atari_wrapper_example():
351
357
"seed" : 0 ,
352
358
"delay" : 1 ,
353
359
"transition_noise" : 0.25 ,
354
- "reward_noise" : lambda a : a .normal (0 , 0.1 ),
360
+ "reward_noise" : lambda s , a , rng : rng .normal (0 , 0.1 ),
355
361
"state_space_type" : "discrete" ,
356
362
}
357
363
@@ -380,7 +386,8 @@ def atari_wrapper_example():
380
386
381
387
env .close ()
382
388
383
- display_image (next_state )
389
+ if display_images :
390
+ display_image (next_state )
384
391
385
392
386
393
def mujoco_wrapper_examples ():
@@ -435,11 +442,13 @@ def mujoco_wrapper_examples():
435
442
state = env .reset (seed = gym_wrap_config ["seed" ])[0 ]
436
443
437
444
print (
438
- "Taking a step in the environment with a random action and printing the transition:"
445
+ "Taking steps in the HalfCheetah environment with a random action and printing the transition:"
439
446
)
440
- action = env .action_space .sample ()
441
- next_state , reward , done , trunc , info = env .step (action )
442
- print ("sars', done =" , state , action , reward , next_state , done )
447
+ for i in range (3 ):
448
+ action = env .action_space .sample ()
449
+ next_state , reward , done , trunc , info = env .step (action )
450
+ print ("sars', done =" , state , action , reward , next_state , done )
451
+ state = next_state
443
452
444
453
env .close ()
445
454
@@ -453,14 +462,16 @@ def mujoco_wrapper_examples():
453
462
import gymnasium as gym
454
463
env = GymEnvWrapper (env , ** gym_wrap_config )
455
464
456
- state = env .reset (seed = gym_wrap_config ["seed" ])[0 ]
465
+ state = env .reset (seed = gym_wrap_config ["seed" ] + 1 )[0 ]
457
466
458
467
print (
459
- "Taking a step in the environment with a random action and printing the transition:"
468
+ "Taking steps in the Pusher environment with a random action and printing the transition:"
460
469
)
461
- action = env .action_space .sample ()
462
- next_state , reward , done , trunc , info = env .step (action )
463
- print ("sars', done =" , state , action , reward , next_state , done )
470
+ for i in range (3 ):
471
+ action = env .action_space .sample ()
472
+ next_state , reward , done , trunc , info = env .step (action )
473
+ print ("sars', done =" , state , action , reward , next_state , done )
474
+ state = next_state
464
475
465
476
env .close ()
466
477
@@ -474,14 +485,16 @@ def mujoco_wrapper_examples():
474
485
import gymnasium as gym
475
486
env = GymEnvWrapper (env , ** gym_wrap_config )
476
487
477
- state = env .reset (seed = gym_wrap_config ["seed" ])[0 ]
488
+ state = env .reset (seed = gym_wrap_config ["seed" ] + 2 )[0 ]
478
489
479
490
print (
480
- "Taking a step in the environment with a random action and printing the transition:"
491
+ "Taking steps in the Reacher environment with a random action and printing the transition:"
481
492
)
482
- action = env .action_space .sample ()
483
- next_state , reward , done , trunc , info = env .step (action )
484
- print ("sars', done =" , state , action , reward , next_state , done )
493
+ for i in range (3 ):
494
+ action = env .action_space .sample ()
495
+ next_state , reward , done , trunc , info = env .step (action )
496
+ print ("sars', done =" , state , action , reward , next_state , done )
497
+ state = next_state
485
498
486
499
env .close ()
487
500
@@ -501,7 +514,7 @@ def minigrid_wrapper_example():
501
514
"seed" : 0 ,
502
515
"delay" : 1 ,
503
516
"transition_noise" : 0.25 ,
504
- "reward_noise" : lambda a : a .normal (0 , 0.1 ),
517
+ "reward_noise" : lambda s , a , rng : rng .normal (0 , 0.1 ),
505
518
"state_space_type" : "discrete" ,
506
519
}
507
520
@@ -533,7 +546,8 @@ def minigrid_wrapper_example():
533
546
534
547
env .close ()
535
548
536
- display_image (next_obs )
549
+ if display_images :
550
+ display_image (next_obs )
537
551
538
552
539
553
def procgen_wrapper_example ():
@@ -542,7 +556,7 @@ def procgen_wrapper_example():
542
556
"seed" : 0 ,
543
557
"delay" : 1 ,
544
558
"transition_noise" : 0.25 ,
545
- "reward_noise" : lambda a : a .normal (0 , 0.1 ),
559
+ "reward_noise" : lambda s , a , rng : rng .normal (0 , 0.1 ),
546
560
"state_space_type" : "discrete" ,
547
561
}
548
562
@@ -569,76 +583,106 @@ def procgen_wrapper_example():
569
583
570
584
env .close ()
571
585
572
- display_image (next_obs )
586
+ if display_images :
587
+ display_image (next_obs )
573
588
574
589
575
590
if __name__ == "__main__" :
576
591
592
+ # Use argparse to set display_images to False if you don't want to display images
593
+ # and to set log level.
594
+ import argparse
595
+ parser = argparse .ArgumentParser ()
596
+ parser .add_argument ("--display_images" , "-di" , help = "Display image observations (available for some examples)" , action = "store_true" )
597
+ parser .add_argument ("--do_not_display_images" , "-n" , help = "Do not display image observations (available for some examples)" , action = "store_false" , dest = "display_images" )
598
+ parser .add_argument ("--log_level" , type = str , default = "DEBUG" , help = "Set the log level" )
599
+ parser .set_defaults (display_images = True )
600
+ args = parser .parse_args ()
601
+ display_images = args .display_images
602
+
603
+ # Set up logging globally for the MDP Playground library:
604
+ import logging
605
+ logger = logging .getLogger ("mdp_playground" )
606
+ logger .setLevel (args .log_level )
607
+ if not logger .handlers :
608
+ log_filename = "log_file.txt"
609
+ log_file_handler = logging .FileHandler (log_filename )
610
+ log_file_handler .setFormatter (logging .Formatter ('%(message)s - %(levelname)s - %(name)s - %(asctime)s' , datefmt = '%m.%d.%Y %I:%M:%S %p' ))
611
+ logger .addHandler (log_file_handler )
612
+ # Add a console handler:
613
+ console_handler = logging .StreamHandler ()
614
+ console_handler .setFormatter (logging .Formatter ('%(message)s' ))
615
+ # Have less verbose logging to console:
616
+ console_handler .setLevel (logging .INFO )
617
+ logger .addHandler (console_handler )
618
+ logger .info ("Begin logging to: %s" , log_filename )
619
+
620
+
577
621
# Colour print
578
622
set_ansi_escape = "\033 [33;1m" # Yellow, bold
579
623
reset_ansi_escape = "\033 [0m"
580
624
581
- print (set_ansi_escape + "Running discrete environment\n " + reset_ansi_escape )
625
+ logger . info (set_ansi_escape + "Running discrete environment\n " + reset_ansi_escape )
582
626
discrete_environment_example ()
583
627
584
- print (
628
+ logger . info (
585
629
set_ansi_escape
586
630
+ "\n Running discrete environment with image representations\n "
587
631
+ reset_ansi_escape
588
632
)
589
633
discrete_environment_image_representations_example ()
590
634
591
- print (
635
+ logger . info (
592
636
set_ansi_escape
593
637
+ "\n Running discrete environment with diameter and image representations\n "
594
638
+ reset_ansi_escape
595
639
)
596
640
discrete_environment_diameter_image_representations_example ()
597
641
598
- print (
642
+ logger . info (
599
643
set_ansi_escape
600
644
+ "\n Running continuous environment: move_to_a_point\n "
601
645
+ reset_ansi_escape
602
646
)
603
647
continuous_environment_example_move_to_a_point ()
604
648
605
- print (
649
+ logger . info (
606
650
set_ansi_escape
607
651
+ "\n Running continuous environment: move_to_a_point with irrelevant features and image representations\n "
608
652
+ reset_ansi_escape
609
653
)
610
654
continuous_environment_example_move_to_a_point_irrelevant_image ()
611
655
612
- print (
656
+ logger . info (
613
657
set_ansi_escape
614
658
+ "\n Running continuous environment: move_along_a_line\n "
615
659
+ reset_ansi_escape
616
660
)
617
661
continuous_environment_example_move_along_a_line ()
618
662
619
- print (
663
+ logger . info (
620
664
set_ansi_escape
621
665
+ "\n Running grid environment: move_to_a_point\n "
622
666
+ reset_ansi_escape
623
667
)
624
668
grid_environment_example ()
625
669
626
- print (
670
+ logger . info (
627
671
set_ansi_escape + "\n Running grid environment: move_to_a_point "
628
672
"with image representations\n " + reset_ansi_escape
629
673
)
630
674
grid_environment_image_representations_example ()
631
675
632
- print (set_ansi_escape + "\n Running Atari wrapper example:\n " + reset_ansi_escape )
676
+ logger . info (set_ansi_escape + "\n Running Atari wrapper example:\n " + reset_ansi_escape )
633
677
atari_wrapper_example ()
634
678
635
- print (set_ansi_escape + "\n Running Mujoco wrapper example:\n " + reset_ansi_escape )
679
+ logger . info (set_ansi_escape + "\n Running Mujoco wrapper example:\n " + reset_ansi_escape )
636
680
mujoco_wrapper_examples ()
637
681
638
- print (set_ansi_escape + "\n Running MiniGrid wrapper example:\n " + reset_ansi_escape )
682
+ # logger.info (set_ansi_escape + "\nRunning MiniGrid wrapper example:\n" + reset_ansi_escape)
639
683
# minigrid_wrapper_example()
640
684
641
- # print (set_ansi_escape + "\nRunning ProcGen wrapper example:\n" + reset_ansi_escape)
685
+ # logger.info (set_ansi_escape + "\nRunning ProcGen wrapper example:\n" + reset_ansi_escape)
642
686
# procgen_wrapper_example()
643
687
644
688
# Using gym.make() example 1
@@ -660,4 +704,4 @@ def procgen_wrapper_example():
660
704
)
661
705
env .reset ()[0 ]
662
706
for i in range (10 ):
663
- print (env .step (env .action_space .sample ()))
707
+ logger . info (env .step (env .action_space .sample ()))
0 commit comments