commit 789214169906c7fb8133f46e210a99fb0ebd4538 Author: Mahdi Dibaiee Date: Sun Apr 2 16:48:56 2017 +0430 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..81d0f16 --- /dev/null +++ b/.gitignore @@ -0,0 +1,93 @@ +#### joe made this: http://goel.io/joe +#### python #### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# IPython Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# dotenv +.env + +# virtualenv +.venv/ +venv/ +ENV/ + +# Spyder project settings +.spyderproject + +# Rope project settings +.ropeproject + diff --git a/README.md b/README.md new file mode 100644 index 0000000..88154eb --- /dev/null +++ b/README.md @@ -0,0 +1,40 @@ +Playing Flappy Bird using Evolution Strategies +============================================== + +After reading [Evolution Strategies as a Scalable Alternative to Reinforcement Learning](https://blog.openai.com/evolution-strategies/), I wanted to experiment something using Evolution Strategies, and Flappy Bird has always been one of my favorites when it comes to Game experiments. A simple yet challenging game. + +The model learns to play very well after ~1500 iterations, but not completely flawless and it usually loses in difficult cases (high difference between two wall entrances). +Training process is pretty fast as there is no backpropagation, and is not very costy in terms of memory as there is no need to record actions as in policy gradients. + +Here is a demonstration of the model before training and after ~3000 iterations (less than an hour of training): + +Before training: + +![before training](/demo/flappy-lose.gif) +![after training](/demo/flappy-success.gif) + +For each frame the bird stays alive, +1 score is given to him. For each wall he passes, +10 score is given. + +Try it yourself +--------------- +First, install dependencies: + +``` +pip install -r requirements +``` + +The pretrained parameters are in a file named `load.npy` and will be loaded when you run `train.py` or `demo.py`. + +`train.py` will train the model, saving the parameters to `saves//save-`. + +`demo.py` shows the game in a GTK window so you can see how the AI actually plays. + +`play.py` if you feel like playing the game yourself, space: jump, once lost, press enter to play again. :grin: +_pro tip: reach 100 score and you will become THUG FOR LIFE :smoking:_ + +Notes +----- + +It seems training for too long reduces the performance after a while, learning rate decay might help with that. +To try it yourself, there is a `long.npy` file, rename it to `load.npy` (backup `load.npy` before doing so) and run `demo.py`, +you will see the bird failing more often than not. `long.py` was trained for ~2000 more iterations than `load.npy`. diff --git a/assets/birdie-thug.png b/assets/birdie-thug.png new file mode 100644 index 0000000..a9d6588 Binary files /dev/null and b/assets/birdie-thug.png differ diff --git a/assets/birdie.png b/assets/birdie.png new file mode 100644 index 0000000..018c7a1 Binary files /dev/null and b/assets/birdie.png differ diff --git a/assets/pipe-down.png b/assets/pipe-down.png new file mode 100644 index 0000000..5b60698 Binary files /dev/null and b/assets/pipe-down.png differ diff --git a/assets/pipe-up.png b/assets/pipe-up.png new file mode 100644 index 0000000..e4661cd Binary files /dev/null and b/assets/pipe-up.png differ diff --git a/assets/pipe.png b/assets/pipe.png new file mode 100644 index 0000000..f541b15 Binary files /dev/null and b/assets/pipe.png differ diff --git a/assets/thug-text.png b/assets/thug-text.png new file mode 100644 index 0000000..cc1ac50 Binary files /dev/null and b/assets/thug-text.png differ diff --git a/demo.py b/demo.py new file mode 100644 index 0000000..44716df --- /dev/null +++ b/demo.py @@ -0,0 +1,39 @@ +from es import EvolutionStrategy +import numpy as np +from game import Game, play +from win import Window, GAME_SPEED +import gi +from gi.repository import Gtk, GLib, Gdk +from os import path +import os +import time + +es = EvolutionStrategy(fn=play, noisep=50, sigma=0.1, alpha=0.001, layer_sizes=[[4, 500], [500, 1]], input_size=4) +load = path.join(path.dirname(__file__), 'load.npy') + +# if load.npy exists, load the parameters from it +if path.exists(load): + es.layers = np.load(load) + +def step(game, update): + win = Window(game) + GLib.timeout_add(GAME_SPEED, lambda: timeout_kill(win, game)) + GLib.timeout_add(GAME_SPEED, update) + GLib.timeout_add(GAME_SPEED, win.update) + win.show_all() + Gtk.main() + +# once the bird has lost, kill the window and stop Gtk loop +def timeout_kill(win, game): + if game.lost: + Gtk.main_quit() + win.destroy() + return False + + return True + +time.sleep(5) + +for i in range(10000): + play(es.forward, step=step) + Gtk.main_quit() diff --git a/demo/flappy-lose.gif b/demo/flappy-lose.gif new file mode 100644 index 0000000..f339cad Binary files /dev/null and b/demo/flappy-lose.gif differ diff --git a/demo/flappy-success.gif b/demo/flappy-success.gif new file mode 100644 index 0000000..4b8cb82 Binary files /dev/null and b/demo/flappy-success.gif differ diff --git a/es.py b/es.py new file mode 100644 index 0000000..ef3b293 --- /dev/null +++ b/es.py @@ -0,0 +1,74 @@ +import tensorflow as tf +import numpy as np + +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + +class EvolutionStrategy(): + # fn: function that plays the game and returns the rewards, this function must take as argument another function that + # determines whether the bird should jump or not + # e.g. def fn shouldJump = if shouldJump(some_input): ... return reward + # noisep: noise population, how many different noises should be tried at each step + # sigma: standard deviation of generated noise + # alpha: learning rate + # layer_sizes: sizes of neural network layers, e.g. [[4, 500], [500, 1]] + # input_size: number of inputs + def __init__(self, fn, noisep, sigma, alpha, layer_sizes, input_size): + self.fn = fn + + self.sigma = sigma + self.noisep = noisep + self.alpha = alpha + self.layer_sizes = layer_sizes + self.input_size = input_size + + # initialize layers randomly + self.layers = [] + for i, layer in enumerate(layer_sizes): + self.layers.append(np.random.uniform(-0.1, 0.1, layer)) + + # forward propagation: sigmoid(xW) for every layer + def forward(self, input): + output = input + for i, layer in enumerate(self.layers): + output = sigmoid(np.dot(output, layer)) + + return output + + # train the model + def train(self): + N = [[] for i in range(len(self.layers))] + R = np.zeros(self.noisep) + + for i in range(self.noisep): + noisy_layers = [] + + for j, (layer_size, layer) in enumerate(zip(self.layer_sizes, self.layers)): + # for each layer, generate a noise + n = np.random.randn(*layer_size) + N[j].append(n) + + # add noise to layer + noisy_w = layer + self.sigma * n + + noisy_layers.append(noisy_w) + + # generate another network with the same parameters, but with noisy layers + es = EvolutionStrategy(fn=self.fn, noisep=self.noisep, sigma=self.sigma, alpha=self.alpha, layer_sizes=self.layer_sizes, input_size=self.input_size) + + es.layers = noisy_layers + # run a forward propagation using the noisy layer and save the reward + R[i] = self.fn(es.forward) + + # normalize the rewards + A = (R - np.mean(R)) / np.std(R) + + # update layers + for n, i in zip(N, range(len(self.layers))): + n = np.array(n) + + # np.dot(n.T, A) scales each noise's contribution to the update by how much reward it had received + update = self.alpha / (self.noisep * self.sigma) * np.dot(n.T, A).T + self.layers[i] = self.layers[i] + update + + diff --git a/game.py b/game.py new file mode 100644 index 0000000..1cf565b --- /dev/null +++ b/game.py @@ -0,0 +1,130 @@ +import numpy as np +import random + +GRAVITY = 2 +FRICTION = 0.9 +class Game(): + def __init__(self, width, height): + self.width = width + self.height = height + + self.bird = Bird(width / 2, height / 2) + self.wall = self.create_wall() + self.lost = False + self.score = 0; + + def update(self): + self.bird.update() + self.wall.update() + + # 10 score for passing a wall + if (self.wall.x + self.wall.width) < self.width / 2: + self.wall = self.create_wall() + self.score += 10 + print("\033[32m+\033[0m", end='') + + if self.intercept(self.bird, self.wall): + self.lost = True + + if self.bird.y < 0 or self.bird.y > self.height: + self.lost = True + + # a constant score for each movement, this way + # our birds will try to stay alive longer and + # our evolution strategy won't start with zero reward + self.score += 0.1 + + # create a wall, the wall is between the 15%-65% of the screen + def create_wall(self): + return Wall(self.width - WALL_WIDTH, self.height * (0.15 + np.random.random() * 0.5) ) + + def intercept(self, bird, wall): + return ((bird.x + bird.width) > wall.x and + ((bird.y + bird.height) > (wall.gate.y + wall.gate.height) or + (bird.y) < wall.gate.y)) + +JUMP_STEPS = 2 +JUMP_SPEED = 7 +class Bird(): + def __init__(self, x, y): + self.x = x + self.y = y + self.width = 24 + self.height = 18 + + self.velocity = np.array([0, 0], dtype=np.float64) + self.acceleration = np.array([0, 0], dtype=np.float64) + + def jump(self): + self.velocity[1] = -JUMP_SPEED + self.acceleration[1] = 0 + + def update(self): + self.x += self.velocity[0] + self.y += self.velocity[1] + + self.velocity += self.acceleration + self.velocity *= FRICTION + + self.acceleration[1] = GRAVITY + +WALL_WIDTH = 30 +GATE_HEIGHT = 60 +class Wall(): + def __init__(self, x, y): + self.x = x + + self.gate = dotdict({ + 'y': y, + 'height': GATE_HEIGHT + }) + + self.width = WALL_WIDTH + + def update(self): + self.x -= 2 + +class dotdict(dict): + """dot.notation access to dictionary attributes""" + __getattr__ = dict.get + __setattr__ = dict.__setitem__ + __delattr__ = dict.__delitem__ + +# limit the game to 1000 frames while training, sometimes a game might take +# too long to finish after a while of training +MAX_FRAMES = 10000 +def play(fn, step=None): + game = Game(200, 200) + frame = 0 + + # while showing to user, we want to update the GTK frontend + # the `step` function is responsible for doing just that, see index.py + if step: + return step(game, lambda: show_update(fn, game)) + + while not game.lost and frame < MAX_FRAMES: + frame += 1 + # input of the model: bird x, bird y, distance to next wall, height of wall's entrance + data = np.array([[game.bird.x, game.bird.y, game.bird.x - game.wall.x, game.wall.gate.y]]) + jump = fn(data)[0][0] + + if jump > 0.5: + game.bird.jump() + + game.update() + + return game.score + +def show_update(fn, game): + if not game.lost: + data = np.array([[game.bird.x, game.bird.y, game.bird.x - game.wall.x, game.wall.gate.y]]) + jump = fn(data)[0][0] + + if jump > 0.5: + game.bird.jump() + + game.update() + return True + else: + return False + diff --git a/load.npy b/load.npy new file mode 100644 index 0000000..c9ac916 Binary files /dev/null and b/load.npy differ diff --git a/long.npy b/long.npy new file mode 100644 index 0000000..3343557 Binary files /dev/null and b/long.npy differ diff --git a/play.py b/play.py new file mode 100644 index 0000000..f4175ec --- /dev/null +++ b/play.py @@ -0,0 +1,8 @@ +from win import Window +import gi +from gi.repository import Gtk, GLib, Gdk + +win = Window() +win.connect('delete-event', Gtk.main_quit) +win.show_all() +Gtk.main() diff --git a/requirements b/requirements new file mode 100644 index 0000000..d4ab54e --- /dev/null +++ b/requirements @@ -0,0 +1,2 @@ +pygobject==3.22.0 +numpy=1.12.0 diff --git a/train.py b/train.py new file mode 100644 index 0000000..a686231 --- /dev/null +++ b/train.py @@ -0,0 +1,61 @@ +from es import EvolutionStrategy +import numpy as np +from game import Game, play +from win import Window, GAME_SPEED +import gi +from gi.repository import Gtk, GLib, Gdk +from datetime import datetime +from os import path +import os + +es = EvolutionStrategy(fn=play, noisep=50, sigma=0.1, alpha=0.001, layer_sizes=[[4, 500], [500, 1]], input_size=4) +load = path.join(path.dirname(__file__), 'load.npy') + +# if load.npy exists, load the parameters from it +if path.exists(load): + es.layers = np.load(load) + +# show the game every n iterations +SHOW_EVERY = 100 +# save the parameters every n iterations +SAVE_EVERY = 100 + +# an id for saving the parameters in a folder +run_id = str(datetime.now()) +print("run {}".format(run_id)) +os.mkdir(path.join(path.dirname(__file__), 'saves', run_id)) + +# this function is called when showing the game to user +def step(game, update): + win = Window(game) + GLib.timeout_add(GAME_SPEED, lambda: timeout_kill(win, game)) + GLib.timeout_add(GAME_SPEED, update) + GLib.timeout_add(GAME_SPEED, win.update) + win.show_all() + Gtk.main() + +# once the bird has lost, kill the window and stop Gtk loop +def timeout_kill(win, game): + if game.lost: + Gtk.main_quit() + win.destroy() + return False + + return True + +for i in range(10000): + print("{}: ".format(i), end='') + es.train() + + if i % SHOW_EVERY == 0: + play(es.forward, step=step) + Gtk.main_quit() + print(' shown') + else: + score = play(es.forward) + print(' score: {:.2f}'.format(score)) + + if i % SAVE_EVERY == 0: + p = path.join(path.dirname(__file__), 'saves', run_id, 'save-{}'.format(i)) + np.save(p, es.layers) + diff --git a/win.py b/win.py new file mode 100644 index 0000000..8e22e93 --- /dev/null +++ b/win.py @@ -0,0 +1,99 @@ +import gi +import os +from os import path +from game import Game + +gi.require_version('Gtk', '3.0') + +from gi.repository import Gtk, GLib, Gdk + +GAME_SPEED = 35 +THUG_SCORE = 100 + +class Window(Gtk.Window): + + def __init__(self, game=None): + Gtk.Window.__init__(self, title='Flappy Bird Evolution Strategies') + + (width, height) = self.get_size() + self.width = width + self.height = height + + if game: + self.game = game + else: + self.game = Game(width, height) + + self.fixed = Gtk.Fixed() + self.birdie = Gtk.Image.new_from_file(path.join(path.dirname(__file__), 'assets/birdie.png')) + self.fixed.add(self.birdie) + + self.wall_top = Gtk.Box() + self.wall_bottom = Gtk.Box() + + bg = Gdk.RGBA(0.1, 1, 0.1, 1) + self.wall_top.override_background_color(0, bg) + self.wall_bottom.override_background_color(0, bg) + + self.fixed.add(self.wall_top) + self.fixed.add(self.wall_bottom) + + self.add(self.fixed) + + self.gameover = Gtk.Label.new('') + self.gameover.override_color(0, Gdk.RGBA(1, 0.2, 0.2, 1)) + + self.score = Gtk.Label.new('') + + self.thug = Gtk.Image.new_from_file(path.join(path.dirname(__file__), 'assets/thug-text.png')) + + self.fixed.add(self.gameover) + self.fixed.add(self.score) + self.fixed.add(self.thug) + self.fixed.move(self.gameover, width / 2, height / 2) + self.fixed.move(self.score, 10, 10) + self.fixed.move(self.thug, -75, -75) + + self.update() + + if not game: + GLib.timeout_add(GAME_SPEED, self.update) + + self.connect("key-press-event", self.on_key) + + def update(self): + if self.game.lost: + self.gameover.set_text('Game Over!') + return True + else: + self.gameover.set_text('') + + self.score.set_text(str(self.game.score)) + + self.game.update() + + if self.game.score > THUG_SCORE: + self.birdie.set_from_file(path.join(path.dirname(__file__), 'assets/birdie-thug.png')) + self.fixed.move(self.thug, 10, self.height - 60) + + self.wall_top.set_size_request(self.game.wall.width, self.game.wall.gate.y) + self.wall_bottom.set_size_request(self.game.wall.width, self.height - self.game.wall.gate.y - self.game.wall.gate.height) + + self.fixed.move(self.birdie, self.width / 2, self.game.bird.y) + self.fixed.move(self.wall_top, self.game.wall.x, 0) + self.fixed.move(self.wall_bottom, self.game.wall.x, self.game.wall.gate.y + self.game.wall.gate.height) + + return True + + def on_key(self, win, key): + if key.keyval == Gdk.KEY_space: + self.game.bird.jump() + + if key.keyval == Gdk.KEY_Return and self.game.lost: + self.game = Game(self.game.width, self.game.height) + + +# win = Window() +# win.connect('delete-event', Gtk.main_quit) +# win.show_all() +# Gtk.main()