Skip to content

Merge master into new_expts #13

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Sep 24, 2021
Merged
11 changes: 11 additions & 0 deletions codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
coverage:
range: 68..100
round: down
precision: 2
status:
project:
default:
# basic
target: 68%
threshold: 5%
base: auto
11 changes: 7 additions & 4 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def atari_wrapper_example():
"state_space_type": "discrete",
}

from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
from mdp_playground.envs import GymEnvWrapper
import gym

ae = gym.make("QbertNoFrameskip-v4")
Expand Down Expand Up @@ -298,8 +298,12 @@ def mujoco_wrapper_example():
# This actually makes a subclass and not a wrapper. Because, some
# frameworks might need an instance of this class to also be an instance
# of the Mujoco base_class.
from mdp_playground.envs.mujoco_env_wrapper import get_mujoco_wrapper
from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
try:
from mdp_playground.envs import get_mujoco_wrapper
from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
except Exception as e:
print("Exception:", e, "caught. You may need to install mujoco-py. NOT running mujoco_wrapper_example.")
return

wrapped_mujoco_env = get_mujoco_wrapper(HalfCheetahEnv)

Expand Down Expand Up @@ -371,7 +375,6 @@ def mujoco_wrapper_example():
"action_space_size": 8,
"state_space_type": "discrete",
"action_space_type": "discrete",
"terminal_state_density": 0.25,
"maximally_connected": True,
}
)
Expand Down
6 changes: 6 additions & 0 deletions mdp_playground/envs/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,7 @@
from mdp_playground.envs.rl_toy_env import RLToyEnv

try:
from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
from mdp_playground.envs.mujoco_env_wrapper import get_mujoco_wrapper
except Exception as e:
print("Exception:", e, "caught. You may need to install Ray or mujoco-py.")
21 changes: 18 additions & 3 deletions mdp_playground/envs/rl_toy_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class RLToyEnv(gym.Env):
diameter : int > 0
For discrete environments, if diameter = d, the set of states is set to be a d-partite graph (and NOT a complete d-partite graph), where, if we order the d sets as 1, 2, .., d, states from set 1 will have actions leading to states in set 2 and so on, with the final set d having actions leading to states in set 1. Number of actions for each state will, thus, be = (number of states) / (d). Default value: 1 for discrete environments. For continuous environments, this dimension is set automatically based on the state_space_max value.
terminal_state_density : float in range [0, 1]
For discrete environments, the fraction of states that are terminal; the terminal states are fixed to the "last" states when we consider them to be ordered by their numerical value. This is w.l.o.g. because discrete states are categorical. For continuous environments, please see terminal_states and term_state_edge for how to control terminal states.
For discrete environments, the fraction of states that are terminal; the terminal states are fixed to the "last" states when we consider them to be ordered by their numerical value. This is w.l.o.g. because discrete states are categorical. For continuous environments, please see terminal_states and term_state_edge for how to control terminal states. Default value: 0.25.
term_state_reward : float
Adds this to the reward if a terminal state was reached at the current time step. Default value: 0.
image_representations : boolean
Expand Down Expand Up @@ -217,6 +217,16 @@ def __init__(self, **config):

print("Passed config:", config, "\n")

if config == {}:
config = {
"state_space_size": 8,
"action_space_size": 8,
"state_space_type": "discrete",
"action_space_type": "discrete",
"terminal_state_density": 0.25,
"maximally_connected": True,
}

# Print initial "banner"
screen_output_width = 132 # #hardcoded #TODO get from system
repeat_equal_sign = (screen_output_width - 20) // 2
Expand Down Expand Up @@ -329,6 +339,11 @@ def __init__(self, **config):
# if config["state_space_type"] == "discrete":
# assert "init_state_dist" in config

if "terminal_state_density" not in config:
self.terminal_state_density = 0.25
else:
self.terminal_state_density = config["terminal_state_density"]

if not self.use_custom_mdp:
if "generate_random_mdp" not in config:
self.generate_random_mdp = True
Expand Down Expand Up @@ -786,7 +801,7 @@ def init_terminal_states(self):
"""Initialises terminal state set to be the 'last' states for discrete environments. For continuous environments, terminal states will be in a hypercube centred around config['terminal_states'] with the edge of the hypercube of length config['term_state_edge']."""
if self.config["state_space_type"] == "discrete":
if (
self.use_custom_mdp and "terminal_state_density" not in self.config
self.use_custom_mdp and "terminal_states" in self.config
): # custom/user-defined terminal states
self.is_terminal_state = (
self.config["terminal_states"]
Expand All @@ -796,7 +811,7 @@ def init_terminal_states(self):
else:
# Define the no. of terminal states per independent set of the state space
self.num_terminal_states = int(
self.config["terminal_state_density"] * self.action_space_size[0]
self.terminal_state_density * self.action_space_size[0]
) # #hardcoded ####IMP Using action_space_size
# since it contains state_space_size // diameter
# if self.num_terminal_states == 0: # Have at least 1 terminal state?
Expand Down