1
- """We collect here some examples of basic usage for MDP Playground.
2
- Example call: python example.py --do_not_display_images --log_level INFO
1
+ """We collect some examples of basic usage for MDP Playground in this script.
2
+ Example calls:
3
+ python example.py --do_not_display_images --log_level INFO
4
+ python example.py --do_not_display_images --func_list discrete_environment_example
5
+ Equivalent call with short flags:
6
+ python example.py -n -ll INFO
7
+ python example.py -n -f discrete_environment_example
3
8
4
9
Calling this file as a script, invokes the following examples:
5
10
one for basic discrete environments
9
14
one for continuous environments with reward function move to a target point with irrelevant features and image representations
10
15
one for continuous environments with reward function move along a line
11
16
one for basic grid environments
17
+ one for grid environments with reward_every_n_steps
12
18
one for grid environments with image representations
13
19
one for wrapping Atari env qbert
14
20
one for wrapping Mujoco envs HalfCheetah, Pusher, Reacher
@@ -42,6 +48,7 @@ def display_image(obs, mode="RGB"):
42
48
43
49
44
50
def discrete_environment_example ():
51
+ """discrete environment example"""
45
52
46
53
config = {}
47
54
config ["seed" ] = 0
@@ -82,6 +89,7 @@ def discrete_environment_example():
82
89
83
90
84
91
def discrete_environment_image_representations_example ():
92
+ '''discrete environment with image representations example'''
85
93
86
94
config = {}
87
95
config ["seed" ] = 0
@@ -128,6 +136,7 @@ def discrete_environment_image_representations_example():
128
136
129
137
130
138
def discrete_environment_diameter_image_representations_example ():
139
+ '''discrete environment with diameter > 1 and image representations example'''
131
140
132
141
config = {}
133
142
config ["seed" ] = 3
@@ -175,6 +184,8 @@ def discrete_environment_diameter_image_representations_example():
175
184
176
185
177
186
def continuous_environment_example_move_to_a_point ():
187
+ '''continuous environment example: move to a point'''
188
+
178
189
config = {}
179
190
config ["seed" ] = 0
180
191
@@ -210,6 +221,8 @@ def continuous_environment_example_move_to_a_point():
210
221
211
222
212
223
def continuous_environment_example_move_to_a_point_irrelevant_image ():
224
+ '''continuous environment example: move to a point with irrelevant features and image representations'''
225
+
213
226
config = {}
214
227
config ["seed" ] = 0
215
228
@@ -259,6 +272,7 @@ def continuous_environment_example_move_to_a_point_irrelevant_image():
259
272
260
273
261
274
def continuous_environment_example_move_along_a_line ():
275
+ '''continuous environment example: move along a line'''
262
276
263
277
config = {}
264
278
config ["seed" ] = 0
@@ -293,6 +307,36 @@ def continuous_environment_example_move_along_a_line():
293
307
294
308
295
309
def grid_environment_example ():
310
+ '''grid environment example: move towards a goal point'''
311
+
312
+ config = {}
313
+ config ["seed" ] = 0
314
+
315
+ config ["state_space_type" ] = "grid"
316
+ config ["grid_shape" ] = (8 , 8 )
317
+
318
+ config ["reward_function" ] = "move_to_a_point"
319
+ config ["make_denser" ] = True
320
+ config ["target_point" ] = [5 , 5 ]
321
+
322
+ env = RLToyEnv (** config )
323
+
324
+ state = env .get_augmented_state ()["augmented_state" ][- 1 ]
325
+ actions = [[0 , 1 ], [- 1 , 0 ], [- 1 , 0 ], [1 , 0 ], [0.5 , - 0.5 ], [1 , 2 ], [1 , 1 ], [0 , 1 ]]
326
+
327
+ for i in range (len (actions )):
328
+ action = actions [i ]
329
+ next_obs , reward , done , trunc , info = env .step (action )
330
+ next_state = env .get_augmented_state ()["augmented_state" ][- 1 ]
331
+ print ("sars', done =" , state , action , reward , next_state , done )
332
+ state = next_state
333
+
334
+ env .reset ()[0 ]
335
+ env .close ()
336
+
337
+ def grid_environment_example_reward_every_n_steps ():
338
+ '''grid environment example: move towards a goal point but with sparser rewards using the reward_every_n_steps config'''
339
+
296
340
config = {}
297
341
config ["seed" ] = 0
298
342
@@ -301,6 +345,7 @@ def grid_environment_example():
301
345
302
346
config ["reward_function" ] = "move_to_a_point"
303
347
config ["make_denser" ] = True
348
+ config ["reward_every_n_steps" ] = 3
304
349
config ["target_point" ] = [5 , 5 ]
305
350
306
351
env = RLToyEnv (** config )
@@ -320,6 +365,8 @@ def grid_environment_example():
320
365
321
366
322
367
def grid_environment_image_representations_example ():
368
+ '''grid environment example: move towards a goal point with image representations'''
369
+
323
370
config = {}
324
371
config ["seed" ] = 0
325
372
@@ -352,6 +399,7 @@ def grid_environment_image_representations_example():
352
399
353
400
354
401
def atari_wrapper_example ():
402
+ '''wrapping Atari env qbert example'''
355
403
356
404
config = {
357
405
"seed" : 0 ,
@@ -391,6 +439,7 @@ def atari_wrapper_example():
391
439
392
440
393
441
def mujoco_wrapper_examples ():
442
+ '''wrapping Mujoco envs HalfCheetah, Pusher, Reacher examples'''
394
443
395
444
# For Mujoco envs, a few specific dimensions need to be changed by fiddling with
396
445
# attributes of the MujocoEnv class. This is achieved through a Mujoco
@@ -509,6 +558,7 @@ def mujoco_wrapper_examples():
509
558
510
559
511
560
def minigrid_wrapper_example ():
561
+ '''wrapping MiniGrid env example'''
512
562
513
563
config = {
514
564
"seed" : 0 ,
@@ -551,6 +601,7 @@ def minigrid_wrapper_example():
551
601
552
602
553
603
def procgen_wrapper_example ():
604
+ '''wrapping ProcGen env example'''
554
605
555
606
config = {
556
607
"seed" : 0 ,
@@ -592,12 +643,14 @@ def procgen_wrapper_example():
592
643
# Use argparse to set display_images to False if you don't want to display images
593
644
# and to set log level.
594
645
import argparse
595
- parser = argparse .ArgumentParser ()
646
+ parser = argparse .ArgumentParser (epilog = __doc__ , formatter_class = argparse . RawDescriptionHelpFormatter )
596
647
parser .add_argument ("--display_images" , "-di" , help = "Display image observations (available for some examples)" , action = "store_true" )
597
648
parser .add_argument ("--do_not_display_images" , "-n" , help = "Do not display image observations (available for some examples)" , action = "store_false" , dest = "display_images" )
598
- parser .add_argument ("--log_level" , type = str , default = "DEBUG" , help = "Set the log level" )
649
+ parser .add_argument ("--log_level" , "-ll" , type = str , default = "DEBUG" , help = "Set the log level" )
650
+ parser .add_argument ("--func_list" , "-f" , type = str , nargs = "+" , help = "Set the list of examples to run. Set it to the names of the functions corresponding to the examples inside this script." )
599
651
parser .set_defaults (display_images = True )
600
652
args = parser .parse_args ()
653
+ # print("Args:", args)
601
654
display_images = args .display_images
602
655
603
656
# Set up logging globally for the MDP Playground library:
@@ -622,73 +675,41 @@ def procgen_wrapper_example():
622
675
set_ansi_escape = "\033 [33;1m" # Yellow, bold
623
676
reset_ansi_escape = "\033 [0m"
624
677
625
- logger .info (set_ansi_escape + "Running discrete environment\n " + reset_ansi_escape )
626
- discrete_environment_example ()
627
-
628
- logger .info (
629
- set_ansi_escape
630
- + "\n Running discrete environment with image representations\n "
631
- + reset_ansi_escape
632
- )
633
- discrete_environment_image_representations_example ()
634
-
635
- logger .info (
636
- set_ansi_escape
637
- + "\n Running discrete environment with diameter and image representations\n "
638
- + reset_ansi_escape
639
- )
640
- discrete_environment_diameter_image_representations_example ()
678
+ # Run the examples called in the function list:
679
+ if args .func_list :
680
+ for func_name in args .func_list :
681
+ logger .info (set_ansi_escape + "Running " + globals ()[func_name ].__doc__ + reset_ansi_escape )
682
+ globals ()[func_name ]()
683
+ exit ()
641
684
642
- logger .info (
643
- set_ansi_escape
644
- + "\n Running continuous environment: move_to_a_point\n "
645
- + reset_ansi_escape
646
- )
647
- continuous_environment_example_move_to_a_point ()
685
+ # Else run all other examples except the ones disabled right now:
648
686
649
- logger .info (
650
- set_ansi_escape
651
- + "\n Running continuous environment: move_to_a_point with irrelevant features and image representations\n "
652
- + reset_ansi_escape
653
- )
654
- continuous_environment_example_move_to_a_point_irrelevant_image ()
687
+ # List all function names defined in the current script
688
+ functions = [name for name , obj in globals ().items () if callable (obj ) and obj .__module__ == "__main__" ]
689
+ print ("Available functions:" , functions )
655
690
656
- logger .info (
657
- set_ansi_escape
658
- + "\n Running continuous environment: move_along_a_line\n "
659
- + reset_ansi_escape
660
- )
661
- continuous_environment_example_move_along_a_line ()
691
+ # Disabled examples:
692
+ functions_to_ignore = ["display_image" , "minigrid_wrapper_example" , "procgen_wrapper_example" ]
662
693
663
- logger . info (
664
- set_ansi_escape
665
- + " \n Running grid environment: move_to_a_point \n "
666
- + reset_ansi_escape
667
- )
668
- grid_environment_example ()
694
+ # Run all functions except the ones in functions_to_ignore:
695
+ for func_name in functions :
696
+ if func_name in functions_to_ignore :
697
+ continue
698
+ logger . info ( set_ansi_escape + "Running " + globals ()[ func_name ]. __doc__ + reset_ansi_escape )
699
+ globals ()[ func_name ] ()
669
700
670
- logger . info (
671
- set_ansi_escape + " \n Running grid environment: move_to_a_point "
672
- "with image representations \n " + reset_ansi_escape
673
- )
674
- grid_environment_image_representations_example ( )
701
+ # Causes RuntimeError: dictionary changed size during iteration
702
+ # global_vars = globals()
703
+ # for func_name in global_vars:
704
+ # if callable(global_vars[func_name]):
705
+ # logger.info(func_name )
675
706
676
- logger .info (set_ansi_escape + "\n Running Atari wrapper example:\n " + reset_ansi_escape )
677
- atari_wrapper_example ()
678
-
679
- logger .info (set_ansi_escape + "\n Running Mujoco wrapper example:\n " + reset_ansi_escape )
680
- mujoco_wrapper_examples ()
681
-
682
- # logger.info(set_ansi_escape + "\nRunning MiniGrid wrapper example:\n" + reset_ansi_escape)
683
- # minigrid_wrapper_example()
684
-
685
- # logger.info(set_ansi_escape + "\nRunning ProcGen wrapper example:\n" + reset_ansi_escape)
686
- # procgen_wrapper_example()
687
-
688
- # Using gym.make() example 1
707
+ # Running extra examples to show using gym.make():
689
708
import mdp_playground
690
709
import gymnasium as gym
691
710
711
+ logger .info (set_ansi_escape + "Running 2 extra examples to show using gym.make()" + reset_ansi_escape )
712
+
692
713
# The following are with seed=None:
693
714
gym .make ("RLToy-v0" )
694
715
0 commit comments