From 104cab8c0f5b8ea8bd003b0128e0d2416e76e6be Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Sun, 4 Jun 2023 11:01:50 +0200 Subject: [PATCH] example_mountain_car: update the code to the new gymnasium API Code of the example is outdated and had to be updated to work with the newest OpenAI Gymnasium API. --- examples/example_mountain_car.py | 53 +++++++++++++++++--------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/examples/example_mountain_car.py b/examples/example_mountain_car.py index 9b808ea5..65819313 100644 --- a/examples/example_mountain_car.py +++ b/examples/example_mountain_car.py @@ -35,7 +35,7 @@ import cgp try: - import gym + import gymnasium as gym except ImportError: raise ImportError( "Failed to import the OpenAI Gym package. Please install it via `pip install gym`." @@ -68,29 +68,22 @@ class ConstantFloatTen(cgp.ConstantFloat): def inner_objective(f, seed, n_runs_per_individual, n_total_steps, *, render): - - env = gym.make("MountainCarContinuous-v0") - - env.seed(seed) + env = gym.make("MountainCarContinuous-v0", render_mode="human" if render else None) cum_reward_all_episodes = [] for _ in range(n_runs_per_individual): - observation = env.reset() + observation, _ = env.reset(seed=seed) cum_reward_this_episode = 0 for _ in range(n_total_steps): - - if render: - env.render() - continuous_action = f(*observation) - observation, reward, done, _ = env.step([continuous_action]) + observation, reward, terminated, truncated, _ = env.step([continuous_action]) cum_reward_this_episode += reward - if done: + if terminated or truncated: cum_reward_all_episodes.append(cum_reward_this_episode) cum_reward_this_episode = 0 - observation = env.reset() + observation, _ = env.reset(seed=seed) env.close() @@ -147,8 +140,14 @@ def evolve(seed): objective_params = {"n_runs_per_individual": 3, "n_total_steps": 2000} + population_params = {"n_parents": 1, "seed": seed} + genome_params = { "n_inputs": 2, + "n_outputs": 1, + "n_columns": 16, + "n_rows": 1, + "levels_back": None, "primitives": ( cgp.Add, cgp.Sub, @@ -160,14 +159,19 @@ def evolve(seed): ), } - ea_params = {"n_processes": 4} + ea_params = { + "n_offsprings": 4, + "tournament_size": 1, + "mutation_rate": 0.04, + "n_processes": 4 + } evolve_params = { "max_generations": int(args["--max-generations"]), "termination_fitness": 100.0, } - pop = cgp.Population(genome_params=genome_params) + pop = cgp.Population(**population_params, genome_params=genome_params) ea = cgp.ea.MuPlusLambda(**ea_params) @@ -186,8 +190,8 @@ def recording_callback(pop): n_total_steps=objective_params["n_total_steps"], ) - pop = cgp.evolve( - obj, pop, ea, **evolve_params, print_progress=True, callback=recording_callback + cgp.evolve( + pop, obj, ea, **evolve_params, print_progress=True, callback=recording_callback ) return history, pop.champion @@ -217,9 +221,7 @@ def plot_fitness_over_generation_index(history): def evaluate_champion(ind): env = gym.make("MountainCarContinuous-v0") - - env.seed(seed) - observation = env.reset() + observation, _ = env.reset(seed=seed) f = ind.to_func() @@ -228,13 +230,14 @@ def evaluate_champion(ind): while len(cum_reward_all_episodes) < 100: continuous_action = f(*observation) - observation, reward, done, _ = env.step([continuous_action]) + observation, reward, terminated, truncated, _ = env.step([continuous_action]) cum_reward_this_episode += reward - if done: + + if terminated or truncated: cum_reward_all_episodes.append(cum_reward_this_episode) cum_reward_this_episode = 0 - observation = env.reset() + observation, _ = env.reset(seed=seed) env.close() @@ -272,8 +275,8 @@ def visualize_behaviour_for_evolutionary_jumps(seed, history, only_final_solutio x_0, x_1 = sympy.symbols("x_0, x_1") f_lambdify = sympy.lambdify([x_0, x_1], expr) - def f(x, v): - return f_lambdify(x, v) + def f(x, y): + return f_lambdify(x, y) inner_objective(f, seed, n_runs_per_individual, n_total_steps, render=True)