3
3
Calling this file as a script, invokes the following examples:
4
4
one for basic discrete environments
5
5
one for discrete environments with image representations
6
+ one for discrete environments with a diameter > 1 and image representations
6
7
one for continuous environments with reward function move to a target point
8
+ one for continuous environments with reward function move to a target point with irrelevant features and image representations
9
+ one for continuous environments with reward function move along a line
7
10
one for basic grid environments
8
11
one for grid environments with image representations
9
12
one for wrapping Atari env qbert
@@ -33,6 +36,7 @@ def display_image(obs, mode="RGB"):
33
36
# used because the image is 3-D because frameworks like Ray expect the image
34
37
# to be 3-D.
35
38
img1 .show ()
39
+ return img1
36
40
37
41
38
42
def discrete_environment_example ():
@@ -110,6 +114,7 @@ def discrete_environment_image_representations_example():
110
114
)
111
115
action = env .action_space .sample ()
112
116
next_state_image , reward , done , info = env .step (action )
117
+ augmented_state_dict = env .get_augmented_state ()
113
118
next_state = augmented_state_dict ["curr_state" ] # Underlying MDP state holds
114
119
# the current discrete state.
115
120
print ("sars', done =" , state , action , reward , next_state , done )
@@ -119,28 +124,75 @@ def discrete_environment_image_representations_example():
119
124
display_image (next_state_image , mode = "L" )
120
125
121
126
122
- def continuous_environment_example_move_along_a_line ():
127
+ def discrete_environment_diameter_image_representations_example ():
123
128
129
+ config = {}
130
+ config ["seed" ] = 3
131
+
132
+ config ["state_space_type" ] = "discrete"
133
+ config ["action_space_size" ] = 4
134
+ config ["image_representations" ] = True
135
+ config ["delay" ] = 1
136
+ config ["diameter" ] = 2
137
+ config ["sequence_length" ] = 3
138
+ config ["reward_scale" ] = 2.5
139
+ config ["reward_shift" ] = - 1.75
140
+ config ["reward_noise" ] = 0.5 # std dev of a Gaussian dist.
141
+ config ["transition_noise" ] = 0.1
142
+ config ["reward_density" ] = 0.25
143
+ config ["make_denser" ] = False
144
+ config ["terminal_state_density" ] = 0.25
145
+ config ["maximally_connected" ] = True
146
+ config ["repeats_in_sequences" ] = False
147
+
148
+ config ["generate_random_mdp" ] = True
149
+ env = RLToyEnv (** config )
150
+
151
+ # The environment maintains an augmented state which contains the underlying
152
+ # state used by the MDP to perform transitions and hand out rewards. We can
153
+ # fetch a dict containing the augmented state and current state like this:
154
+ augmented_state_dict = env .get_augmented_state ()
155
+ state = augmented_state_dict ["curr_state" ]
156
+
157
+ print (
158
+ "Taking a step in the environment with a random action and printing "
159
+ "the transition:"
160
+ )
161
+ action = env .action_space .sample ()
162
+ next_state_image , reward , done , info = env .step (action )
163
+ augmented_state_dict = env .get_augmented_state ()
164
+ next_state = augmented_state_dict ["curr_state" ] # Underlying MDP state holds
165
+ # the current discrete state.
166
+ print ("sars', done =" , state , action , reward , next_state , done )
167
+
168
+ env .close ()
169
+
170
+ display_image (next_state_image , mode = "L" )
171
+
172
+
173
+ def continuous_environment_example_move_to_a_point ():
124
174
config = {}
125
175
config ["seed" ] = 0
126
176
127
177
config ["state_space_type" ] = "continuous"
128
- config ["state_space_dim" ] = 4
178
+ config ["state_space_dim" ] = 2
129
179
config ["transition_dynamics_order" ] = 1
130
180
config ["inertia" ] = 1 # 1 unit, e.g. kg for mass, or kg * m^2 for moment of
131
181
# inertia.
132
182
config ["time_unit" ] = 1 # Discretization of time domain and the time
133
183
# duration over which action is applied
134
184
135
- config ["delay" ] = 0
136
- config ["sequence_length" ] = 10
137
- config ["reward_scale" ] = 1.0
138
- config ["reward_noise" ] = 0.1 # std dev of a Gaussian dist.
139
- config ["transition_noise" ] = 0.1 # std dev of a Gaussian dist.
140
- config ["reward_function" ] = "move_along_a_line"
185
+ config ["make_denser" ] = True
186
+ config ["target_point" ] = [0 , 0 ]
187
+ config ["target_radius" ] = 0.05
188
+ config ["state_space_max" ] = 10
189
+ config ["action_space_max" ] = 1
190
+ config ["action_loss_weight" ] = 0.0
191
+
192
+ config ["reward_function" ] = "move_to_a_point"
141
193
142
194
env = RLToyEnv (** config )
143
- state = env .reset ()
195
+ state = env .reset (). copy ()
144
196
145
197
print (
146
198
"Taking a step in the environment with a random action and printing "
@@ -153,12 +205,12 @@ def continuous_environment_example_move_along_a_line():
153
205
env .close ()
154
206
155
207
156
- def continuous_environment_example_move_to_a_point ():
208
+ def continuous_environment_example_move_to_a_point_irrelevant_image ():
157
209
config = {}
158
210
config ["seed" ] = 0
159
211
160
212
config ["state_space_type" ] = "continuous"
161
- config ["state_space_dim" ] = 2
213
+ config ["state_space_dim" ] = 4
162
214
config ["transition_dynamics_order" ] = 1
163
215
config ["inertia" ] = 1 # 1 unit, e.g. kg for mass, or kg * m^2 for moment of
164
216
# inertia.
@@ -174,8 +226,55 @@ def continuous_environment_example_move_to_a_point():
174
226
175
227
config ["reward_function" ] = "move_to_a_point"
176
228
229
+ config ["image_representations" ] = True
230
+ config ["irrelevant_features" ] = True
231
+ config ["relevant_indices" ] = [0 , 1 ]
232
+
177
233
env = RLToyEnv (** config )
178
234
state = env .reset ()
235
+ augmented_state_dict = env .get_augmented_state ()
236
+ state = augmented_state_dict ["curr_state" ].copy () # Underlying MDP state holds
237
+ # the current continuous state.
238
+
239
+ print (
240
+ "Taking a step in the environment with a random action and printing "
241
+ "the transition:"
242
+ )
243
+ action = env .action_space .sample ()
244
+ next_state_image , reward , done , info = env .step (action )
245
+ augmented_state_dict = env .get_augmented_state ()
246
+ next_state = augmented_state_dict ["curr_state" ].copy () # Underlying MDP state holds
247
+ # the current continuous state.
248
+ print ("sars', done =" , state , action , reward , next_state , done )
249
+
250
+ env .close ()
251
+
252
+ img1 = display_image (next_state_image , mode = "RGB" )
253
+ img1 .save ("cont_env_irrelevant_image.pdf" )
254
+
255
+
256
+ def continuous_environment_example_move_along_a_line ():
257
+
258
+ config = {}
259
+ config ["seed" ] = 0
260
+
261
+ config ["state_space_type" ] = "continuous"
262
+ config ["state_space_dim" ] = 4
263
+ config ["transition_dynamics_order" ] = 1
264
+ config ["inertia" ] = 1 # 1 unit, e.g. kg for mass, or kg * m^2 for moment of
265
+ # inertia.
266
+ config ["time_unit" ] = 1 # Discretization of time domain and the time
267
+ # duration over which action is applied
268
+
269
+ config ["delay" ] = 0
270
+ config ["sequence_length" ] = 10
271
+ config ["reward_scale" ] = 1.0
272
+ config ["reward_noise" ] = 0.1 # std dev of a Gaussian dist.
273
+ config ["transition_noise" ] = 0.1 # std dev of a Gaussian dist.
274
+ config ["reward_function" ] = "move_along_a_line"
275
+
276
+ env = RLToyEnv (** config )
277
+ state = env .reset ().copy ()
179
278
180
279
print (
181
280
"Taking a step in the environment with a random action and printing "
@@ -270,13 +369,14 @@ def atari_wrapper_example():
270
369
action = env .action_space .sample ()
271
370
next_state , reward , done , info = env .step (action )
272
371
print (
273
- "s.shape ar s'.shape, done =" ,
372
+ "s.shape a r s'.shape, done =" ,
274
373
state .shape ,
275
374
action ,
276
375
reward ,
277
376
next_state .shape ,
278
377
done ,
279
378
)
379
+ state = next_state
280
380
281
381
env .close ()
282
382
@@ -422,13 +522,34 @@ def procgen_wrapper_example():
422
522
)
423
523
discrete_environment_image_representations_example ()
424
524
525
+ print (
526
+ set_ansi_escape
527
+ + "\n Running discrete environment with diameter and image representations\n "
528
+ + reset_ansi_escape
529
+ )
530
+ discrete_environment_diameter_image_representations_example ()
531
+
425
532
print (
426
533
set_ansi_escape
427
534
+ "\n Running continuous environment: move_to_a_point\n "
428
535
+ reset_ansi_escape
429
536
)
430
537
continuous_environment_example_move_to_a_point ()
431
538
539
+ print (
540
+ set_ansi_escape
541
+ + "\n Running continuous environment: move_to_a_point with irrelevant features and image representations\n "
542
+ + reset_ansi_escape
543
+ )
544
+ continuous_environment_example_move_to_a_point_irrelevant_image ()
545
+
546
+ print (
547
+ set_ansi_escape
548
+ + "\n Running continuous environment: move_along_a_line\n "
549
+ + reset_ansi_escape
550
+ )
551
+ continuous_environment_example_move_along_a_line ()
552
+
432
553
print (
433
554
set_ansi_escape
434
555
+ "\n Running grid environment: move_to_a_point\n "
0 commit comments