You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
"grayscale_obs": False, # grayscale_obs gives a 2-D observation tensor.
35
+
"image_width": 40,
36
+
"image_padding": 30,
37
+
"state_space_type": "discrete",
38
+
"action_space_type": "discrete",
39
+
"seed": 0,
40
+
# },
41
+
# 'seed': 0, #seed
42
+
},
43
+
}
44
+
45
+
algorithm="A3C"
46
+
agent_config= { # Taken from Ray tuned_examples
47
+
"clip_rewards": True,
48
+
"lr": 1e-4,
49
+
# Value Function Loss coefficient
50
+
"vf_loss_coeff": 2.5,
51
+
# Entropy coefficient
52
+
"entropy_coeff": 0.01,
53
+
"min_iter_time_s": 0,
54
+
"num_envs_per_worker": 5,
55
+
"num_gpus": 0,
56
+
"num_workers": 3,
57
+
"rollout_fragment_length": 10,
58
+
"timesteps_per_iteration": 10000,
59
+
"tf_session_args": {
60
+
# note: overriden by `local_tf_session_args`
61
+
"intra_op_parallelism_threads": 4,
62
+
"inter_op_parallelism_threads": 4,
63
+
# "gpu_options": {
64
+
# "allow_growth": True,
65
+
# },
66
+
# "log_device_placement": False,
67
+
"device_count": {
68
+
"CPU": 2,
69
+
# "GPU": 0,
70
+
},
71
+
# "allow_soft_placement": True, # required by PPO multi-gpu
72
+
},
73
+
# Override the following tf session args on the local worker
74
+
"local_tf_session_args": {
75
+
"intra_op_parallelism_threads": 4,
76
+
"inter_op_parallelism_threads": 4,
77
+
},
78
+
}
79
+
80
+
81
+
filters_100x100= [
82
+
[
83
+
16,
84
+
[8, 8],
85
+
4,
86
+
], # changes from 42x42x1 with padding 2 to 22x22x16 (or 52x52x16 for 102x102x1)
87
+
[32, [4, 4], 2],
88
+
[
89
+
128,
90
+
[13, 13],
91
+
1,
92
+
],
93
+
]
94
+
95
+
96
+
model_config= {
97
+
"model": {
98
+
"fcnet_hiddens": [256, 256],
99
+
# "custom_preprocessor": "ohe",
100
+
"custom_options": {}, # extra options to pass to your preprocessor
101
+
"conv_activation": "relu",
102
+
"conv_filters": filters_100x100,
103
+
# "fcnet_activation": "tanh",
104
+
"use_lstm": False,
105
+
"max_seq_len": 20,
106
+
"lstm_cell_size": 256,
107
+
"lstm_use_prev_action_reward": False,
108
+
},
109
+
}
110
+
111
+
112
+
eval_config= {
113
+
"evaluation_interval": None, # I think this means every x training_iterations
114
+
"evaluation_config": {
115
+
"explore": False,
116
+
"exploration_fraction": 0,
117
+
"exploration_final_eps": 0,
118
+
"evaluation_num_episodes": 10,
119
+
# "horizon": 100,
120
+
"env_config": {
121
+
"dummy_eval": True, # hack Used to check if we are in evaluation mode or training mode inside Ray callback on_episode_end() to be able to write eval stats
0 commit comments