initial commit

2017-04-02 16:48:56 +04:30 · 2017-04-02 16:48:56 +04:30 · 7892141699
commit 7892141699
19 changed files with 546 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,93 @@
+#### joe made this: http://goel.io/joe
+#### python ####
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# IPython Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# dotenv
+.env
+
+# virtualenv
+.venv/
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+
+# Rope project settings
+.ropeproject
+
--- a/README.md
+++ b/README.md
@ -0,0 +1,40 @@
+Playing Flappy Bird using Evolution Strategies
+==============================================
+
+After reading [Evolution Strategies as a Scalable Alternative to Reinforcement Learning](https://blog.openai.com/evolution-strategies/), I wanted to experiment something using Evolution Strategies, and Flappy Bird has always been one of my favorites when it comes to Game experiments. A simple yet challenging game.
+
+The model learns to play very well after ~1500 iterations, but not completely flawless and it usually loses in difficult cases (high difference between two wall entrances).
+Training process is pretty fast as there is no backpropagation, and is not very costy in terms of memory as there is no need to record actions as in policy gradients.
+
+Here is a demonstration of the model before training and after ~3000 iterations (less than an hour of training):
+
+Before training:
+
+![before training](/demo/flappy-lose.gif)
+![after training](/demo/flappy-success.gif)
+
+For each frame the bird stays alive, +1 score is given to him. For each wall he passes, +10 score is given.
+
+Try it yourself
+---------------
+First, install dependencies:
+
+```
+pip install -r requirements
+```
+
+The pretrained parameters are in a file named `load.npy` and will be loaded when you run `train.py` or `demo.py`.
+
+`train.py` will train the model, saving the parameters to `saves/<TIMESTAMP>/save-<ITERATION>`.
+
+`demo.py` shows the game in a GTK window so you can see how the AI actually plays.
+
+`play.py` if you feel like playing the game yourself, space: jump, once lost, press enter to play again. :grin:
+_pro tip: reach 100 score and you will become THUG FOR LIFE :smoking:_
+
+Notes
+-----
+
+It seems training for too long reduces the performance after a while, learning rate decay might help with that.
+To try it yourself, there is a `long.npy` file, rename it to `load.npy` (backup `load.npy` before doing so) and run `demo.py`,
+you will see the bird failing more often than not. `long.py` was trained for ~2000 more iterations than `load.npy`.
--- a/assets/birdie-thug.png
+++ b/assets/birdie-thug.png
--- a/assets/birdie.png
+++ b/assets/birdie.png
--- a/assets/pipe-down.png
+++ b/assets/pipe-down.png
--- a/assets/pipe-up.png
+++ b/assets/pipe-up.png
--- a/assets/pipe.png
+++ b/assets/pipe.png
--- a/assets/thug-text.png
+++ b/assets/thug-text.png
--- a/demo.py
+++ b/demo.py
@ -0,0 +1,39 @@
+from es import EvolutionStrategy
+import numpy as np
+from game import Game, play
+from win import Window, GAME_SPEED
+import gi
+from gi.repository import Gtk, GLib, Gdk
+from os import path
+import os
+import time
+
+es = EvolutionStrategy(fn=play, noisep=50, sigma=0.1, alpha=0.001, layer_sizes=[[4, 500], [500, 1]], input_size=4)
+load = path.join(path.dirname(__file__), 'load.npy')
+
+# if load.npy exists, load the parameters from it
+if path.exists(load):
+    es.layers = np.load(load)
+
+def step(game, update):
+    win = Window(game)
+    GLib.timeout_add(GAME_SPEED, lambda: timeout_kill(win, game))
+    GLib.timeout_add(GAME_SPEED, update)
+    GLib.timeout_add(GAME_SPEED, win.update)
+    win.show_all()
+    Gtk.main()
+
+# once the bird has lost, kill the window and stop Gtk loop
+def timeout_kill(win, game):
+    if game.lost:
+        Gtk.main_quit()
+        win.destroy()
+        return False
+    
+    return True
+
+time.sleep(5)
+
+for i in range(10000):
+    play(es.forward, step=step)
+    Gtk.main_quit()
--- a/demo/flappy-lose.gif
+++ b/demo/flappy-lose.gif
--- a/demo/flappy-success.gif
+++ b/demo/flappy-success.gif
--- a/es.py
+++ b/es.py
@ -0,0 +1,74 @@
+import tensorflow as tf
+import numpy as np
+
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+
+class EvolutionStrategy():
+    # fn: function that plays the game and returns the rewards, this function must take as argument another function that
+    #     determines whether the bird should jump or not
+    #     e.g. def fn shouldJump = if shouldJump(some_input): ... return reward
+    # noisep: noise population, how many different noises should be tried at each step
+    # sigma: standard deviation of generated noise
+    # alpha: learning rate
+    # layer_sizes: sizes of neural network layers, e.g. [[4, 500], [500, 1]]
+    # input_size: number of inputs
+    def __init__(self, fn, noisep, sigma, alpha, layer_sizes, input_size):
+        self.fn = fn
+
+        self.sigma = sigma
+        self.noisep = noisep
+        self.alpha = alpha
+        self.layer_sizes = layer_sizes
+        self.input_size = input_size
+
+        # initialize layers randomly
+        self.layers = []
+        for i, layer in enumerate(layer_sizes):
+            self.layers.append(np.random.uniform(-0.1, 0.1, layer))
+
+    # forward propagation: sigmoid(xW) for every layer
+    def forward(self, input):
+        output = input
+        for i, layer in enumerate(self.layers):
+            output = sigmoid(np.dot(output, layer))
+
+        return output
+        
+    # train the model
+    def train(self):
+        N = [[] for i in range(len(self.layers))]
+        R = np.zeros(self.noisep)
+
+        for i in range(self.noisep):
+            noisy_layers = []
+
+            for j, (layer_size, layer) in enumerate(zip(self.layer_sizes, self.layers)):
+                # for each layer, generate a noise
+                n = np.random.randn(*layer_size)
+                N[j].append(n)
+
+                # add noise to layer
+                noisy_w = layer + self.sigma * n
+
+                noisy_layers.append(noisy_w)
+
+            # generate another network with the same parameters, but with noisy layers
+            es = EvolutionStrategy(fn=self.fn, noisep=self.noisep, sigma=self.sigma, alpha=self.alpha, layer_sizes=self.layer_sizes, input_size=self.input_size)
+
+            es.layers = noisy_layers
+            # run a forward propagation using the noisy layer and save the reward
+            R[i] = self.fn(es.forward)
+
+        # normalize the rewards
+        A = (R - np.mean(R)) / np.std(R)
+
+        # update layers
+        for n, i in zip(N, range(len(self.layers))):
+            n = np.array(n)
+
+            # np.dot(n.T, A) scales each noise's contribution to the update by how much reward it had received
+            update = self.alpha / (self.noisep * self.sigma) * np.dot(n.T, A).T
+            self.layers[i] = self.layers[i] + update
+            
+
--- a/game.py
+++ b/game.py
@ -0,0 +1,130 @@
+import numpy as np
+import random
+
+GRAVITY = 2
+FRICTION = 0.9
+class Game():
+    def __init__(self, width, height):
+        self.width = width
+        self.height = height
+
+        self.bird = Bird(width / 2, height / 2)
+        self.wall = self.create_wall()
+        self.lost = False
+        self.score = 0;
+
+    def update(self):
+        self.bird.update()
+        self.wall.update()
+
+        # 10 score for passing a wall
+        if (self.wall.x + self.wall.width) < self.width / 2:
+            self.wall = self.create_wall()
+            self.score += 10
+            print("\033[32m+\033[0m", end='')
+
+        if self.intercept(self.bird, self.wall):
+            self.lost = True
+
+        if self.bird.y < 0 or self.bird.y > self.height:
+            self.lost = True
+
+        # a constant score for each movement, this way
+        # our birds will try to stay alive longer and
+        # our evolution strategy won't start with zero reward
+        self.score += 0.1
+
+    # create a wall, the wall is between the 15%-65% of the screen
+    def create_wall(self):
+        return Wall(self.width - WALL_WIDTH, self.height * (0.15 + np.random.random() * 0.5) )
+        
+    def intercept(self, bird, wall):
+        return ((bird.x + bird.width) > wall.x and
+                ((bird.y + bird.height) > (wall.gate.y + wall.gate.height) or
+                 (bird.y) < wall.gate.y))
+
+JUMP_STEPS = 2
+JUMP_SPEED = 7
+class Bird():
+    def __init__(self, x, y):
+        self.x = x
+        self.y = y
+        self.width = 24
+        self.height = 18
+
+        self.velocity = np.array([0, 0], dtype=np.float64)
+        self.acceleration = np.array([0, 0], dtype=np.float64)
+
+    def jump(self):
+        self.velocity[1] = -JUMP_SPEED
+        self.acceleration[1] = 0
+
+    def update(self):
+        self.x += self.velocity[0]
+        self.y += self.velocity[1]
+
+        self.velocity += self.acceleration
+        self.velocity *= FRICTION
+
+        self.acceleration[1] = GRAVITY
+
+WALL_WIDTH = 30
+GATE_HEIGHT = 60
+class Wall():
+    def __init__(self, x, y):
+        self.x = x
+
+        self.gate = dotdict({
+            'y': y,
+            'height': GATE_HEIGHT
+        })
+
+        self.width = WALL_WIDTH
+
+    def update(self):
+        self.x -= 2
+
+class dotdict(dict):
+  """dot.notation access to dictionary attributes"""
+  __getattr__ = dict.get
+  __setattr__ = dict.__setitem__
+  __delattr__ = dict.__delitem__
+
+# limit the game to 1000 frames while training, sometimes a game might take
+# too long to finish after a while of training
+MAX_FRAMES = 10000
+def play(fn, step=None):
+    game = Game(200, 200)
+    frame = 0
+
+    # while showing to user, we want to update the GTK frontend
+    # the `step` function is responsible for doing just that, see index.py
+    if step:
+        return step(game, lambda: show_update(fn, game))
+
+    while not game.lost and frame < MAX_FRAMES:
+        frame += 1
+        # input of the model: bird x, bird y, distance to next wall, height of wall's entrance
+        data = np.array([[game.bird.x, game.bird.y, game.bird.x - game.wall.x, game.wall.gate.y]])
+        jump = fn(data)[0][0]
+
+        if jump > 0.5:
+            game.bird.jump()
+
+        game.update()
+
+    return game.score
+
+def show_update(fn, game):
+    if not game.lost:
+        data = np.array([[game.bird.x, game.bird.y, game.bird.x - game.wall.x, game.wall.gate.y]])
+        jump = fn(data)[0][0]
+
+        if jump > 0.5:
+            game.bird.jump()
+
+        game.update()
+        return True
+    else:
+        return False
+
--- a/load.npy
+++ b/load.npy
--- a/long.npy
+++ b/long.npy
--- a/play.py
+++ b/play.py
@ -0,0 +1,8 @@
+from win import Window
+import gi
+from gi.repository import Gtk, GLib, Gdk
+
+win = Window()
+win.connect('delete-event', Gtk.main_quit)
+win.show_all()
+Gtk.main()
--- a/2
+++ b/2
@ -0,0 +1,2 @@
+pygobject==3.22.0
+numpy=1.12.0
--- a/train.py
+++ b/train.py
@ -0,0 +1,61 @@
+from es import EvolutionStrategy
+import numpy as np
+from game import Game, play
+from win import Window, GAME_SPEED
+import gi
+from gi.repository import Gtk, GLib, Gdk
+from datetime import datetime
+from os import path
+import os
+
+es = EvolutionStrategy(fn=play, noisep=50, sigma=0.1, alpha=0.001, layer_sizes=[[4, 500], [500, 1]], input_size=4)
+load = path.join(path.dirname(__file__), 'load.npy')
+
+# if load.npy exists, load the parameters from it
+if path.exists(load):
+    es.layers = np.load(load)
+
+# show the game every n iterations
+SHOW_EVERY = 100
+# save the parameters every n iterations
+SAVE_EVERY = 100
+
+# an id for saving the parameters in a folder
+run_id = str(datetime.now())
+print("run {}".format(run_id))
+os.mkdir(path.join(path.dirname(__file__), 'saves', run_id))
+
+# this function is called when showing the game to user
+def step(game, update):
+    win = Window(game)
+    GLib.timeout_add(GAME_SPEED, lambda: timeout_kill(win, game))
+    GLib.timeout_add(GAME_SPEED, update)
+    GLib.timeout_add(GAME_SPEED, win.update)
+    win.show_all()
+    Gtk.main()
+
+# once the bird has lost, kill the window and stop Gtk loop
+def timeout_kill(win, game):
+    if game.lost:
+        Gtk.main_quit()
+        win.destroy()
+        return False
+    
+    return True
+
+for i in range(10000):
+    print("{}: ".format(i), end='')
+    es.train()
+
+    if i % SHOW_EVERY == 0:
+        play(es.forward, step=step)
+        Gtk.main_quit()
+        print(' shown')
+    else:
+        score = play(es.forward)
+        print(' score: {:.2f}'.format(score))
+
+    if i % SAVE_EVERY == 0:
+        p = path.join(path.dirname(__file__), 'saves', run_id, 'save-{}'.format(i))
+        np.save(p, es.layers)
+
--- a/win.py
+++ b/win.py
@ -0,0 +1,99 @@
+import gi
+import os
+from os import path
+from game import Game
+
+gi.require_version('Gtk', '3.0')
+
+from gi.repository import Gtk, GLib, Gdk
+
+GAME_SPEED = 35
+THUG_SCORE = 100
+
+class Window(Gtk.Window):
+
+    def __init__(self, game=None):
+        Gtk.Window.__init__(self, title='Flappy Bird Evolution Strategies')
+
+        (width, height) = self.get_size()
+        self.width = width
+        self.height = height
+
+        if game:
+            self.game = game
+        else:
+            self.game = Game(width, height)
+
+        self.fixed = Gtk.Fixed()
+        self.birdie = Gtk.Image.new_from_file(path.join(path.dirname(__file__), 'assets/birdie.png'))
+        self.fixed.add(self.birdie)
+
+        self.wall_top = Gtk.Box()
+        self.wall_bottom = Gtk.Box()
+
+        bg = Gdk.RGBA(0.1, 1, 0.1, 1)
+        self.wall_top.override_background_color(0, bg)
+        self.wall_bottom.override_background_color(0, bg)
+
+        self.fixed.add(self.wall_top)
+        self.fixed.add(self.wall_bottom)
+
+        self.add(self.fixed)
+
+        self.gameover = Gtk.Label.new('')
+        self.gameover.override_color(0, Gdk.RGBA(1, 0.2, 0.2, 1))
+
+        self.score = Gtk.Label.new('')
+
+        self.thug = Gtk.Image.new_from_file(path.join(path.dirname(__file__), 'assets/thug-text.png'))
+
+        self.fixed.add(self.gameover)
+        self.fixed.add(self.score)
+        self.fixed.add(self.thug)
+        self.fixed.move(self.gameover, width / 2, height / 2)
+        self.fixed.move(self.score, 10, 10)
+        self.fixed.move(self.thug, -75, -75)
+
+        self.update()
+
+        if not game:
+            GLib.timeout_add(GAME_SPEED, self.update)
+
+            self.connect("key-press-event", self.on_key)
+
+    def update(self):
+        if self.game.lost:
+            self.gameover.set_text('Game Over!')
+            return True
+        else:
+            self.gameover.set_text('')
+
+        self.score.set_text(str(self.game.score))
+
+        self.game.update()
+
+        if self.game.score > THUG_SCORE:
+            self.birdie.set_from_file(path.join(path.dirname(__file__), 'assets/birdie-thug.png'))
+            self.fixed.move(self.thug, 10, self.height - 60)
+
+        self.wall_top.set_size_request(self.game.wall.width, self.game.wall.gate.y)
+        self.wall_bottom.set_size_request(self.game.wall.width, self.height - self.game.wall.gate.y - self.game.wall.gate.height)
+
+        self.fixed.move(self.birdie, self.width / 2, self.game.bird.y)
+        self.fixed.move(self.wall_top, self.game.wall.x, 0)
+        self.fixed.move(self.wall_bottom, self.game.wall.x, self.game.wall.gate.y + self.game.wall.gate.height)
+
+        return True
+
+    def on_key(self, win, key):
+        if key.keyval == Gdk.KEY_space:
+            self.game.bird.jump()
+
+        if key.keyval == Gdk.KEY_Return and self.game.lost:
+            self.game = Game(self.game.width, self.game.height)
+
+
+# win = Window()
+# win.connect('delete-event', Gtk.main_quit)
+# win.show_all()
+# Gtk.main()