automl
diff --git a/‎example.py
Lines changed: 133 additions & 12 deletions b/‎example.py
Lines changed: 133 additions & 12 deletions
diff --git a/‎mdp_playground/envs/rl_toy_env.py
Lines changed: 16 additions & 6 deletions b/‎mdp_playground/envs/rl_toy_env.py
Lines changed: 16 additions & 6 deletions
diff --git a/‎mdp_playground/spaces/image_continuous.py
Lines changed: 5 additions & 2 deletions b/‎mdp_playground/spaces/image_continuous.py
Lines changed: 5 additions & 2 deletions
diff --git a/‎mdp_playground/spaces/test_image_continuous.py
Lines changed: 7 additions & 4 deletions b/‎mdp_playground/spaces/test_image_continuous.py
Lines changed: 7 additions & 4 deletions
@@ -3,7 +3,10 @@
 Calling this file as a script, invokes the following examples:
     one for basic discrete environments
     one for discrete environments with image representations
+    one for discrete environments with a diameter > 1 and image representations
     one for continuous environments with reward function move to a target point
+    one for continuous environments with reward function move to a target point with irrelevant features and image representations
+    one for continuous environments with reward function move along a line
     one for basic grid environments
     one for grid environments with image representations
     one for wrapping Atari env qbert
@@ -33,6 +36,7 @@ def display_image(obs, mode="RGB"):
     # used because the image is 3-D because frameworks like Ray expect the image
     # to be 3-D.
     img1.show()
+    return img1
 
 
 def discrete_environment_example():
@@ -110,6 +114,7 @@ def discrete_environment_image_representations_example():
     )
     action = env.action_space.sample()
     next_state_image, reward, done, info = env.step(action)
+    augmented_state_dict = env.get_augmented_state()
     next_state = augmented_state_dict["curr_state"]  # Underlying MDP state holds
     # the current discrete state.
     print("sars', done =", state, action, reward, next_state, done)
@@ -119,28 +124,75 @@ def discrete_environment_image_representations_example():
     display_image(next_state_image, mode="L")
 
 
-def continuous_environment_example_move_along_a_line():
+def discrete_environment_diameter_image_representations_example():
 
+    config = {}
+    config["seed"] = 3
+
+    config["state_space_type"] = "discrete"
+    config["action_space_size"] = 4
+    config["image_representations"] = True
+    config["delay"] = 1
+    config["diameter"] = 2
+    config["sequence_length"] = 3
+    config["reward_scale"] = 2.5
+    config["reward_shift"] = -1.75
+    config["reward_noise"] = 0.5  # std dev of a Gaussian dist.
+    config["transition_noise"] = 0.1
+    config["reward_density"] = 0.25
+    config["make_denser"] = False
+    config["terminal_state_density"] = 0.25
+    config["maximally_connected"] = True
+    config["repeats_in_sequences"] = False
+
+    config["generate_random_mdp"] = True
+    env = RLToyEnv(**config)
+
+    # The environment maintains an augmented state which contains the underlying
+    # state used by the MDP to perform transitions and hand out rewards. We can
+    # fetch a dict containing the augmented state and current state like this:
+    augmented_state_dict = env.get_augmented_state()
+    state = augmented_state_dict["curr_state"]
+
+    print(
+        "Taking a step in the environment with a random action and printing "
+        "the transition:"
+    )
+    action = env.action_space.sample()
+    next_state_image, reward, done, info = env.step(action)
+    augmented_state_dict = env.get_augmented_state()
+    next_state = augmented_state_dict["curr_state"]  # Underlying MDP state holds
+    # the current discrete state.
+    print("sars', done =", state, action, reward, next_state, done)
+
+    env.close()
+
+    display_image(next_state_image, mode="L")
+
+
+def continuous_environment_example_move_to_a_point():
     config = {}
     config["seed"] = 0
 
     config["state_space_type"] = "continuous"
-    config["state_space_dim"] = 4
+    config["state_space_dim"] = 2
     config["transition_dynamics_order"] = 1
     config["inertia"] = 1  # 1 unit, e.g. kg for mass, or kg * m^2 for moment of
     # inertia.
     config["time_unit"] = 1  # Discretization of time domain and the time
     # duration over which action is applied
 
-    config["delay"] = 0
-    config["sequence_length"] = 10
-    config["reward_scale"] = 1.0
-    config["reward_noise"] = 0.1  # std dev of a Gaussian dist.
-    config["transition_noise"] = 0.1  # std dev of a Gaussian dist.
-    config["reward_function"] = "move_along_a_line"
+    config["make_denser"] = True
+    config["target_point"] = [0, 0]
+    config["target_radius"] = 0.05
+    config["state_space_max"] = 10
+    config["action_space_max"] = 1
+    config["action_loss_weight"] = 0.0
+
+    config["reward_function"] = "move_to_a_point"
 
     env = RLToyEnv(**config)
-    state = env.reset()
+    state = env.reset().copy()
 
     print(
         "Taking a step in the environment with a random action and printing "
@@ -153,12 +205,12 @@ def continuous_environment_example_move_along_a_line():
     env.close()
 
 
-def continuous_environment_example_move_to_a_point():
+def continuous_environment_example_move_to_a_point_irrelevant_image():
     config = {}
     config["seed"] = 0
 
     config["state_space_type"] = "continuous"
-    config["state_space_dim"] = 2
+    config["state_space_dim"] = 4
     config["transition_dynamics_order"] = 1
     config["inertia"] = 1  # 1 unit, e.g. kg for mass, or kg * m^2 for moment of
     # inertia.
@@ -174,8 +226,55 @@ def continuous_environment_example_move_to_a_point():
 
     config["reward_function"] = "move_to_a_point"
 
+    config["image_representations"] = True
+    config["irrelevant_features"] = True
+    config["relevant_indices"] = [0, 1]
+
     env = RLToyEnv(**config)
     state = env.reset()
+    augmented_state_dict = env.get_augmented_state()
+    state = augmented_state_dict["curr_state"].copy()  # Underlying MDP state holds
+    # the current continuous state.
+
+    print(
+        "Taking a step in the environment with a random action and printing "
+        "the transition:"
+    )
+    action = env.action_space.sample()
+    next_state_image, reward, done, info = env.step(action)
+    augmented_state_dict = env.get_augmented_state()
+    next_state = augmented_state_dict["curr_state"].copy()  # Underlying MDP state holds
+    # the current continuous state.
+    print("sars', done =", state, action, reward, next_state, done)
+
+    env.close()
+
+    img1 = display_image(next_state_image, mode="RGB")
+    img1.save("cont_env_irrelevant_image.pdf")
+
+
+def continuous_environment_example_move_along_a_line():
+
+    config = {}
+    config["seed"] = 0
+
+    config["state_space_type"] = "continuous"
+    config["state_space_dim"] = 4
+    config["transition_dynamics_order"] = 1
+    config["inertia"] = 1  # 1 unit, e.g. kg for mass, or kg * m^2 for moment of
+    # inertia.
+    config["time_unit"] = 1  # Discretization of time domain and the time
+    # duration over which action is applied
+
+    config["delay"] = 0
+    config["sequence_length"] = 10
+    config["reward_scale"] = 1.0
+    config["reward_noise"] = 0.1  # std dev of a Gaussian dist.
+    config["transition_noise"] = 0.1  # std dev of a Gaussian dist.
+    config["reward_function"] = "move_along_a_line"
+
+    env = RLToyEnv(**config)
+    state = env.reset().copy()
 
     print(
         "Taking a step in the environment with a random action and printing "
@@ -270,13 +369,14 @@ def atari_wrapper_example():
         action = env.action_space.sample()
         next_state, reward, done, info = env.step(action)
         print(
-            "s.shape ar s'.shape, done =",
+            "s.shape a r s'.shape, done =",
             state.shape,
             action,
             reward,
             next_state.shape,
             done,
         )
+        state = next_state
 
     env.close()
 
@@ -422,13 +522,34 @@ def procgen_wrapper_example():
     )
     discrete_environment_image_representations_example()
 
+    print(
+        set_ansi_escape
+        + "\nRunning discrete environment with diameter and image representations\n"
+        + reset_ansi_escape
+    )
+    discrete_environment_diameter_image_representations_example()
+
     print(
         set_ansi_escape
         + "\nRunning continuous environment: move_to_a_point\n"
         + reset_ansi_escape
     )
     continuous_environment_example_move_to_a_point()
 
+    print(
+        set_ansi_escape
+        + "\nRunning continuous environment: move_to_a_point with irrelevant features and image representations\n"
+        + reset_ansi_escape
+    )
+    continuous_environment_example_move_to_a_point_irrelevant_image()
+
+    print(
+        set_ansi_escape
+        + "\nRunning continuous environment: move_along_a_line\n"
+        + reset_ansi_escape
+    )
+    continuous_environment_example_move_along_a_line()
+
     print(
         set_ansi_escape
         + "\nRunning grid environment: move_to_a_point\n"
 
@@ -1298,7 +1298,7 @@ def get_sequences(maximum, length, fraction, repeats=False, diameter=1):
                         )
                         permutations = []
                         for i in range(length):
-                            permutations.append(maximum - i // diameter)
+                            permutations.append(maximum - (i // diameter))
                         # permutations = list(range(maximum + 1 - length, maximum + 1))
                         self.logger.info(
                             "No. of choices for each element in a"
@@ -1365,18 +1365,28 @@ def get_sequences(maximum, length, fraction, repeats=False, diameter=1):
                                         list(range(maximum))
                                     )  # # has to contain every number up to n so
                                     # that any one of them can be picked as part
-                                    # of the sequence below
+                                    # of the sequence below. An example of curr_rem_digits
+                                    # is [[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]] for
+                                    # diameter = 3 and maximum = 6 (when number of non-terminal
+                                    # states = 6). e.g. permutations = [6, 6, 6, 5, 5].
                                 for enum, j in enumerate(permutations):  # Goes
                                     # from largest to smallest number among the factors of nPk
                                     rem_ = curr_permutation % j
                                     # rem_ = (enum // maximum) * maximum + rem_
                                     seq_.append(
                                         curr_rem_digits[(enum + i_s) % diameter][rem_]
-                                        + ((enum + i_s) % diameter)
-                                        * self.action_space_size[0]
-                                    )  # Use (enum + i_s)
+                                        + (((enum + i_s) % diameter)
+                                        * self.action_space_size[0])
+                                    )  # enum iterates over the current independent set
+                                    # for the sequence being constructed by getting the
+                                    # remainder with the diameter because each position in
+                                    # the sequence belongs to a different independent set.
+                                    # Adding i_s, i.e., the current independent set, to it
+                                    # just offsets every state in the sequence
                                     # to allow other independent sets to have
-                                    # states beginning a rewardable sequence
+                                    # states beginning a rewardable sequence. The multiplication
+                                    # by self.action_space_size[0] is to get the correct state
+                                    # number for the selected independent set.
                                     del curr_rem_digits[(enum + i_s) % diameter][rem_]
                                     #         print("curr_rem_digits", curr_rem_digits)
                                     curr_permutation = curr_permutation // j
 
@@ -83,9 +83,12 @@ def __init__(
 
         self.goal_colour = (0, 255, 0)
         self.agent_colour = (0, 0, 255)
-        self.term_colour = (255, 0, 0)
-        self.bg_colour = (0, 0, 0)
+        self.term_colour = (0, 0, 0)
+        self.bg_colour = (208, 208, 208)
         self.line_colour = (255, 255, 255)
+        # Alternate scheme
+        # self.term_colour = (255, 0, 0)
+        # self.bg_colour = (0, 0, 0)
 
         assert len(feature_space.shape) == 1
         relevant_dims = len(relevant_indices)
 
@@ -11,6 +11,8 @@
 
 class TestImageContinuous(unittest.TestCase):
     def test_image_continuous(self):
+        # For images in the paper width and height were 100
+        # circle_radius was 5
         render = False
 
         lows = 0.0
@@ -38,18 +40,19 @@ def test_image_continuous(self):
             img1.show()
         # img1.save("cont_state_no_target.pdf")
 
+        pos = np.array([10.0, 10.0])
         target = np.array([10, 10])
         imc = ImageContinuous(
             cs2,
-            circle_radius=10,
+            circle_radius=5,
             target_point=target,
-            width=400,
-            height=400,
+            width=100,
+            height=100,
         )
         img1 = Image.fromarray(np.squeeze(imc.generate_image(pos)), "RGB")
         if render:
             img1.show()
-        # img1.save("cont_state_target.pdf")
+        img1.save("cont_state_target.pdf")
 
         # Terminal sub-spaces
         lows = np.array([2.0, 4.0])