automl
diff --git a/‎.coveragerc
Lines changed: 4 additions & 0 deletions b/‎.coveragerc
Lines changed: 4 additions & 0 deletions
diff --git a/‎codecov.yml
Lines changed: 11 additions & 0 deletions b/‎codecov.yml
Lines changed: 11 additions & 0 deletions
diff --git a/‎example.py
Lines changed: 124 additions & 38 deletions b/‎example.py
Lines changed: 124 additions & 38 deletions
diff --git a/‎experiments/a3c_beam_rider_image_transforms_42.py
Lines changed: 130 additions & 0 deletions b/‎experiments/a3c_beam_rider_image_transforms_42.py
Lines changed: 130 additions & 0 deletions
@@ -0,0 +1,4 @@
+[run]
+omit = 
+    example.py 
+    mdp_playground/analysis/mdpp_to_cave.py
@@ -0,0 +1,11 @@
+coverage:
+  range: 68..100
+  round: down
+  precision: 2
+  status:
+    project:
+      default:
+        # basic
+        target: 68%
+        threshold: 5%
+        base: auto 
@@ -8,6 +8,8 @@
     one for grid environments with image representations
     one for wrapping Atari env qbert
     one for wrapping Mujoco env HalfCheetah
+    one for wrapping MiniGrid env
+    one for wrapping ProcGen env
     two examples at the end showing how to create toy envs using gym.make()
 
 Many further examples can be found in test_mdp_playground.py.
@@ -21,6 +23,17 @@
 import numpy as np
 
 
+def display_image(obs, mode="RGB"):
+    # Display the image observation associated with the next state
+    from PIL import Image
+
+    # Because numpy is row-major and Image is column major, need to transpose
+    obs = obs.transpose(1, 0, 2)
+    img1 = Image.fromarray(np.squeeze(obs), mode)  # squeeze() is
+    # used because the image is 3-D because frameworks like Ray expect the image
+    # to be 3-D.
+    img1.show()
+
 def discrete_environment_example():
 
     config = {}
@@ -100,18 +113,10 @@ def discrete_environment_image_representations_example():
     # the current discrete state.
     print("sars', done =", state, action, reward, next_state, done)
 
-    # Display the image observation associated with the next state
-    from PIL import Image
-
-    # Because numpy is row-major and Image is column major, need to transpose
-    next_state_image = next_state_image.transpose(1, 0, 2)
-    img1 = Image.fromarray(np.squeeze(next_state_image), "L")  # 'L' is used for
-    # black and white. squeeze() is used because the image is 3-D because
-    # frameworks like Ray expect the image to be 3-D.
-    img1.show()
-
     env.close()
 
+    display_image(next_state_image, mode="L")
+
 
 def continuous_environment_example_move_along_a_line():
 
@@ -235,15 +240,8 @@ def grid_environment_image_representations_example():
     env.reset()
     env.close()
 
-    # Display the image observation associated with the next state
-    from PIL import Image
+    display_image(next_obs)
 
-    # Because numpy is row-major and Image is column major, need to transpose
-    next_obs = next_obs.transpose(1, 0, 2)
-    img1 = Image.fromarray(np.squeeze(next_obs), "RGB")  # squeeze() is
-    # used because the image is 3-D because frameworks like Ray expect the image
-    # to be 3-D.
-    img1.show()
 
 
 def atari_wrapper_example():
@@ -256,29 +254,32 @@ def atari_wrapper_example():
         "state_space_type": "discrete",
     }
 
-    from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
+    from mdp_playground.envs import GymEnvWrapper
     import gym
 
     ae = gym.make("QbertNoFrameskip-v4")
     env = GymEnvWrapper(ae, **config)
     state = env.reset()
 
     print(
-        "Taking a step in the environment with a random action and printing the transition:"
-    )
-    action = env.action_space.sample()
-    next_state, reward, done, info = env.step(action)
-    print(
-        "s.shape ar s'.shape, done =",
-        state.shape,
-        action,
-        reward,
-        next_state.shape,
-        done,
+        "Taking 10 steps in the environment with a random action and printing the transition:"
     )
+    for i in range(10):
+        action = env.action_space.sample()
+        next_state, reward, done, info = env.step(action)
+        print(
+            "s.shape ar s'.shape, done =",
+            state.shape,
+            action,
+            reward,
+            next_state.shape,
+            done,
+        )
 
     env.close()
 
+    display_image(next_state)
+
 
 def mujoco_wrapper_example():
 
@@ -298,23 +299,103 @@ def mujoco_wrapper_example():
     # This actually makes a subclass and not a wrapper. Because, some
     # frameworks might need an instance of this class to also be an instance
     # of the Mujoco base_class.
-    from mdp_playground.envs.mujoco_env_wrapper import get_mujoco_wrapper
-    from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
+    try:
+        from mdp_playground.envs import get_mujoco_wrapper
+        from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
+        wrapped_mujoco_env = get_mujoco_wrapper(HalfCheetahEnv)
+
+        env = wrapped_mujoco_env(**config)
+        state = env.reset()
+
+        print(
+            "Taking a step in the environment with a random action and printing the transition:"
+        )
+        action = env.action_space.sample()
+        next_state, reward, done, info = env.step(action)
+        print("sars', done =", state, action, reward, next_state, done)
 
-    wrapped_mujoco_env = get_mujoco_wrapper(HalfCheetahEnv)
+        env.close()
 
-    env = wrapped_mujoco_env(**config)
-    state = env.reset()
+    except ImportError as e:
+        print("Exception:", type(e), e, "caught. You may need to install mujoco-py. NOT running mujoco_wrapper_example.")
+        return
+
+
+def minigrid_wrapper_example():
+
+    config = {
+        "seed": 0,
+        "delay": 1,
+        "transition_noise": 0.25,
+        "reward_noise": lambda a: a.normal(0, 0.1),
+        "state_space_type": "discrete",
+    }
+
+    from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
+    import gym
+
+    from gym_minigrid.wrappers import RGBImgPartialObsWrapper, ImgObsWrapper
+    env = gym.make('MiniGrid-Empty-8x8-v0')
+    env = RGBImgPartialObsWrapper(env) # Get pixel observations
+    env = ImgObsWrapper(env) # Get rid of the 'mission' field
+
+    env = GymEnvWrapper(env, **config)
+    obs = env.reset() # This now produces an RGB tensor only
 
     print(
         "Taking a step in the environment with a random action and printing the transition:"
     )
     action = env.action_space.sample()
-    next_state, reward, done, info = env.step(action)
-    print("sars', done =", state, action, reward, next_state, done)
+    next_obs, reward, done, info = env.step(action)
+    print(
+        "s.shape ar s'.shape, done =",
+        obs.shape,
+        action,
+        reward,
+        next_obs.shape,
+        done,
+    )
+
+    env.close()
+
+    display_image(next_obs)
+
+
+def procgen_wrapper_example():
+
+    config = {
+        "seed": 0,
+        "delay": 1,
+        "transition_noise": 0.25,
+        "reward_noise": lambda a: a.normal(0, 0.1),
+        "state_space_type": "discrete",
+    }
+
+    from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
+    import gym
+
+    env = gym.make("procgen:procgen-coinrun-v0")
+    env = GymEnvWrapper(env, **config)
+    obs = env.reset()
+
+    print(
+        "Taking a step in the environment with a random action and printing the transition:"
+    )
+    action = env.action_space.sample()
+    next_obs, reward, done, info = env.step(action)
+    print(
+        "s.shape ar s'.shape, done =",
+        obs.shape,
+        action,
+        reward,
+        next_obs.shape,
+        done,
+    )
 
     env.close()
 
+    display_image(next_obs)
+
 
 if __name__ == "__main__":
 
@@ -358,6 +439,12 @@ def mujoco_wrapper_example():
     print(set_ansi_escape + "\nRunning Mujoco wrapper example:\n" + reset_ansi_escape)
     mujoco_wrapper_example()
 
+    print(set_ansi_escape + "\nRunning MiniGrid wrapper example:\n" + reset_ansi_escape)
+    minigrid_wrapper_example()
+
+    # print(set_ansi_escape + "\nRunning ProcGen wrapper example:\n" + reset_ansi_escape)
+    # procgen_wrapper_example()
+
     # Using gym.make() example 1
     import mdp_playground
     import gym
@@ -371,7 +458,6 @@ def mujoco_wrapper_example():
             "action_space_size": 8,
             "state_space_type": "discrete",
             "action_space_type": "discrete",
-            "terminal_state_density": 0.25,
             "maximally_connected": True,
         }
     )
 
@@ -0,0 +1,130 @@
+import itertools
+from ray import tune
+from collections import OrderedDict
+num_seeds = 5
+timesteps_total = 10_000_000
+
+var_env_configs = OrderedDict(
+    {
+        "image_transforms": [
+            "none",
+            "shift",
+            # "scale",
+            # "flip",
+            # "rotate",
+            # "shift,scale,rotate,flip",
+        ],  # image_transforms,
+        "dummy_seed": [i for i in range(num_seeds)],
+    }
+)
+
+var_configs = OrderedDict({"env": var_env_configs})
+
+env_config = {
+    "env": "GymEnvWrapper-Atari",
+    "env_config": {
+        "AtariEnv": {
+            "game": "beam_rider",
+            "obs_type": "image",
+            "frameskip": 1,
+        },
+        # "GymEnvWrapper": {
+        "atari_preprocessing": True,
+        "frame_skip": 4,
+        "grayscale_obs": False,  # grayscale_obs gives a 2-D observation tensor.
+        "image_width": 40,
+        "image_padding": 30,
+        "state_space_type": "discrete",
+        "action_space_type": "discrete",
+        "seed": 0,
+        # },
+        # 'seed': 0, #seed
+    },
+}
+
+algorithm = "A3C"
+agent_config = {  # Taken from Ray tuned_examples
+    "clip_rewards": True,
+    "lr": 1e-4,
+    # Value Function Loss coefficient
+    "vf_loss_coeff": 2.5,
+    # Entropy coefficient
+    "entropy_coeff": 0.01,
+    "min_iter_time_s": 0,
+    "num_envs_per_worker": 5,
+    "num_gpus": 0,
+    "num_workers": 3,
+    "rollout_fragment_length": 10,
+    "timesteps_per_iteration": 10000,
+    "tf_session_args": {
+        # note: overriden by `local_tf_session_args`
+        "intra_op_parallelism_threads": 4,
+        "inter_op_parallelism_threads": 4,
+        # "gpu_options": {
+        #     "allow_growth": True,
+        # },
+        # "log_device_placement": False,
+        "device_count": {
+            "CPU": 2,
+            # "GPU": 0,
+        },
+        # "allow_soft_placement": True,  # required by PPO multi-gpu
+    },
+    # Override the following tf session args on the local worker
+    "local_tf_session_args": {
+        "intra_op_parallelism_threads": 4,
+        "inter_op_parallelism_threads": 4,
+    },
+}
+
+
+filters_100x100 = [
+    [
+        16,
+        [8, 8],
+        4,
+    ],  # changes from 42x42x1 with padding 2 to 22x22x16 (or 52x52x16 for 102x102x1)
+    [32, [4, 4], 2],
+    [
+        128,
+        [13, 13],
+        1,
+    ],
+]
+
+
+model_config = {
+    "model": {
+        "fcnet_hiddens": [256, 256],
+        # "custom_preprocessor": "ohe",
+        "custom_options": {},  # extra options to pass to your preprocessor
+        "conv_activation": "relu",
+        "conv_filters": filters_100x100,
+        # "fcnet_activation": "tanh",
+        "use_lstm": False,
+        "max_seq_len": 20,
+        "lstm_cell_size": 256,
+        "lstm_use_prev_action_reward": False,
+    },
+}
+
+
+eval_config = {
+    "evaluation_interval": None,  # I think this means every x training_iterations
+    "evaluation_config": {
+        "explore": False,
+        "exploration_fraction": 0,
+        "exploration_final_eps": 0,
+        "evaluation_num_episodes": 10,
+        # "horizon": 100,
+        "env_config": {
+            "dummy_eval": True,  # hack Used to check if we are in evaluation mode or training mode inside Ray callback on_episode_end() to be able to write eval stats
+            "transition_noise": 0
+            if "state_space_type" in env_config["env_config"]
+            and env_config["env_config"]["state_space_type"] == "discrete"
+            else tune.function(lambda a: a.normal(0, 0)),
+            "reward_noise": tune.function(lambda a: a.normal(0, 0)),
+            "action_loss_weight": 0.0,
+        },
+    },
+}
-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +[run]
 +omit =
 +    example.py
 +    mdp_playground/analysis/mdpp_to_cave.py