initial commit

This commit is contained in:
Mahdi Dibaiee 2017-04-02 16:48:56 +04:30
commit 7892141699
19 changed files with 546 additions and 0 deletions

93
.gitignore vendored Normal file
View File

@ -0,0 +1,93 @@
#### joe made this: http://goel.io/joe
#### python ####
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# IPython Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# dotenv
.env
# virtualenv
.venv/
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject

40
README.md Normal file
View File

@ -0,0 +1,40 @@
Playing Flappy Bird using Evolution Strategies
==============================================
After reading [Evolution Strategies as a Scalable Alternative to Reinforcement Learning](https://blog.openai.com/evolution-strategies/), I wanted to experiment something using Evolution Strategies, and Flappy Bird has always been one of my favorites when it comes to Game experiments. A simple yet challenging game.
The model learns to play very well after ~1500 iterations, but not completely flawless and it usually loses in difficult cases (high difference between two wall entrances).
Training process is pretty fast as there is no backpropagation, and is not very costy in terms of memory as there is no need to record actions as in policy gradients.
Here is a demonstration of the model before training and after ~3000 iterations (less than an hour of training):
Before training:
![before training](/demo/flappy-lose.gif)
![after training](/demo/flappy-success.gif)
For each frame the bird stays alive, +1 score is given to him. For each wall he passes, +10 score is given.
Try it yourself
---------------
First, install dependencies:
```
pip install -r requirements
```
The pretrained parameters are in a file named `load.npy` and will be loaded when you run `train.py` or `demo.py`.
`train.py` will train the model, saving the parameters to `saves/<TIMESTAMP>/save-<ITERATION>`.
`demo.py` shows the game in a GTK window so you can see how the AI actually plays.
`play.py` if you feel like playing the game yourself, space: jump, once lost, press enter to play again. :grin:
_pro tip: reach 100 score and you will become THUG FOR LIFE :smoking:_
Notes
-----
It seems training for too long reduces the performance after a while, learning rate decay might help with that.
To try it yourself, there is a `long.npy` file, rename it to `load.npy` (backup `load.npy` before doing so) and run `demo.py`,
you will see the bird failing more often than not. `long.py` was trained for ~2000 more iterations than `load.npy`.

BIN
assets/birdie-thug.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.2 KiB

BIN
assets/birdie.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.0 KiB

BIN
assets/pipe-down.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 456 B

BIN
assets/pipe-up.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 421 B

BIN
assets/pipe.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 161 B

BIN
assets/thug-text.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.7 KiB

39
demo.py Normal file
View File

@ -0,0 +1,39 @@
from es import EvolutionStrategy
import numpy as np
from game import Game, play
from win import Window, GAME_SPEED
import gi
from gi.repository import Gtk, GLib, Gdk
from os import path
import os
import time
es = EvolutionStrategy(fn=play, noisep=50, sigma=0.1, alpha=0.001, layer_sizes=[[4, 500], [500, 1]], input_size=4)
load = path.join(path.dirname(__file__), 'load.npy')
# if load.npy exists, load the parameters from it
if path.exists(load):
es.layers = np.load(load)
def step(game, update):
win = Window(game)
GLib.timeout_add(GAME_SPEED, lambda: timeout_kill(win, game))
GLib.timeout_add(GAME_SPEED, update)
GLib.timeout_add(GAME_SPEED, win.update)
win.show_all()
Gtk.main()
# once the bird has lost, kill the window and stop Gtk loop
def timeout_kill(win, game):
if game.lost:
Gtk.main_quit()
win.destroy()
return False
return True
time.sleep(5)
for i in range(10000):
play(es.forward, step=step)
Gtk.main_quit()

BIN
demo/flappy-lose.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.8 MiB

BIN
demo/flappy-success.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 MiB

74
es.py Normal file
View File

@ -0,0 +1,74 @@
import tensorflow as tf
import numpy as np
def sigmoid(x):
return 1 / (1 + np.exp(-x))
class EvolutionStrategy():
# fn: function that plays the game and returns the rewards, this function must take as argument another function that
# determines whether the bird should jump or not
# e.g. def fn shouldJump = if shouldJump(some_input): ... return reward
# noisep: noise population, how many different noises should be tried at each step
# sigma: standard deviation of generated noise
# alpha: learning rate
# layer_sizes: sizes of neural network layers, e.g. [[4, 500], [500, 1]]
# input_size: number of inputs
def __init__(self, fn, noisep, sigma, alpha, layer_sizes, input_size):
self.fn = fn
self.sigma = sigma
self.noisep = noisep
self.alpha = alpha
self.layer_sizes = layer_sizes
self.input_size = input_size
# initialize layers randomly
self.layers = []
for i, layer in enumerate(layer_sizes):
self.layers.append(np.random.uniform(-0.1, 0.1, layer))
# forward propagation: sigmoid(xW) for every layer
def forward(self, input):
output = input
for i, layer in enumerate(self.layers):
output = sigmoid(np.dot(output, layer))
return output
# train the model
def train(self):
N = [[] for i in range(len(self.layers))]
R = np.zeros(self.noisep)
for i in range(self.noisep):
noisy_layers = []
for j, (layer_size, layer) in enumerate(zip(self.layer_sizes, self.layers)):
# for each layer, generate a noise
n = np.random.randn(*layer_size)
N[j].append(n)
# add noise to layer
noisy_w = layer + self.sigma * n
noisy_layers.append(noisy_w)
# generate another network with the same parameters, but with noisy layers
es = EvolutionStrategy(fn=self.fn, noisep=self.noisep, sigma=self.sigma, alpha=self.alpha, layer_sizes=self.layer_sizes, input_size=self.input_size)
es.layers = noisy_layers
# run a forward propagation using the noisy layer and save the reward
R[i] = self.fn(es.forward)
# normalize the rewards
A = (R - np.mean(R)) / np.std(R)
# update layers
for n, i in zip(N, range(len(self.layers))):
n = np.array(n)
# np.dot(n.T, A) scales each noise's contribution to the update by how much reward it had received
update = self.alpha / (self.noisep * self.sigma) * np.dot(n.T, A).T
self.layers[i] = self.layers[i] + update

130
game.py Normal file
View File

@ -0,0 +1,130 @@
import numpy as np
import random
GRAVITY = 2
FRICTION = 0.9
class Game():
def __init__(self, width, height):
self.width = width
self.height = height
self.bird = Bird(width / 2, height / 2)
self.wall = self.create_wall()
self.lost = False
self.score = 0;
def update(self):
self.bird.update()
self.wall.update()
# 10 score for passing a wall
if (self.wall.x + self.wall.width) < self.width / 2:
self.wall = self.create_wall()
self.score += 10
print("\033[32m+\033[0m", end='')
if self.intercept(self.bird, self.wall):
self.lost = True
if self.bird.y < 0 or self.bird.y > self.height:
self.lost = True
# a constant score for each movement, this way
# our birds will try to stay alive longer and
# our evolution strategy won't start with zero reward
self.score += 0.1
# create a wall, the wall is between the 15%-65% of the screen
def create_wall(self):
return Wall(self.width - WALL_WIDTH, self.height * (0.15 + np.random.random() * 0.5) )
def intercept(self, bird, wall):
return ((bird.x + bird.width) > wall.x and
((bird.y + bird.height) > (wall.gate.y + wall.gate.height) or
(bird.y) < wall.gate.y))
JUMP_STEPS = 2
JUMP_SPEED = 7
class Bird():
def __init__(self, x, y):
self.x = x
self.y = y
self.width = 24
self.height = 18
self.velocity = np.array([0, 0], dtype=np.float64)
self.acceleration = np.array([0, 0], dtype=np.float64)
def jump(self):
self.velocity[1] = -JUMP_SPEED
self.acceleration[1] = 0
def update(self):
self.x += self.velocity[0]
self.y += self.velocity[1]
self.velocity += self.acceleration
self.velocity *= FRICTION
self.acceleration[1] = GRAVITY
WALL_WIDTH = 30
GATE_HEIGHT = 60
class Wall():
def __init__(self, x, y):
self.x = x
self.gate = dotdict({
'y': y,
'height': GATE_HEIGHT
})
self.width = WALL_WIDTH
def update(self):
self.x -= 2
class dotdict(dict):
"""dot.notation access to dictionary attributes"""
__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
# limit the game to 1000 frames while training, sometimes a game might take
# too long to finish after a while of training
MAX_FRAMES = 10000
def play(fn, step=None):
game = Game(200, 200)
frame = 0
# while showing to user, we want to update the GTK frontend
# the `step` function is responsible for doing just that, see index.py
if step:
return step(game, lambda: show_update(fn, game))
while not game.lost and frame < MAX_FRAMES:
frame += 1
# input of the model: bird x, bird y, distance to next wall, height of wall's entrance
data = np.array([[game.bird.x, game.bird.y, game.bird.x - game.wall.x, game.wall.gate.y]])
jump = fn(data)[0][0]
if jump > 0.5:
game.bird.jump()
game.update()
return game.score
def show_update(fn, game):
if not game.lost:
data = np.array([[game.bird.x, game.bird.y, game.bird.x - game.wall.x, game.wall.gate.y]])
jump = fn(data)[0][0]
if jump > 0.5:
game.bird.jump()
game.update()
return True
else:
return False

BIN
load.npy Normal file

Binary file not shown.

BIN
long.npy Normal file

Binary file not shown.

8
play.py Normal file
View File

@ -0,0 +1,8 @@
from win import Window
import gi
from gi.repository import Gtk, GLib, Gdk
win = Window()
win.connect('delete-event', Gtk.main_quit)
win.show_all()
Gtk.main()

2
requirements Normal file
View File

@ -0,0 +1,2 @@
pygobject==3.22.0
numpy=1.12.0

61
train.py Normal file
View File

@ -0,0 +1,61 @@
from es import EvolutionStrategy
import numpy as np
from game import Game, play
from win import Window, GAME_SPEED
import gi
from gi.repository import Gtk, GLib, Gdk
from datetime import datetime
from os import path
import os
es = EvolutionStrategy(fn=play, noisep=50, sigma=0.1, alpha=0.001, layer_sizes=[[4, 500], [500, 1]], input_size=4)
load = path.join(path.dirname(__file__), 'load.npy')
# if load.npy exists, load the parameters from it
if path.exists(load):
es.layers = np.load(load)
# show the game every n iterations
SHOW_EVERY = 100
# save the parameters every n iterations
SAVE_EVERY = 100
# an id for saving the parameters in a folder
run_id = str(datetime.now())
print("run {}".format(run_id))
os.mkdir(path.join(path.dirname(__file__), 'saves', run_id))
# this function is called when showing the game to user
def step(game, update):
win = Window(game)
GLib.timeout_add(GAME_SPEED, lambda: timeout_kill(win, game))
GLib.timeout_add(GAME_SPEED, update)
GLib.timeout_add(GAME_SPEED, win.update)
win.show_all()
Gtk.main()
# once the bird has lost, kill the window and stop Gtk loop
def timeout_kill(win, game):
if game.lost:
Gtk.main_quit()
win.destroy()
return False
return True
for i in range(10000):
print("{}: ".format(i), end='')
es.train()
if i % SHOW_EVERY == 0:
play(es.forward, step=step)
Gtk.main_quit()
print(' shown')
else:
score = play(es.forward)
print(' score: {:.2f}'.format(score))
if i % SAVE_EVERY == 0:
p = path.join(path.dirname(__file__), 'saves', run_id, 'save-{}'.format(i))
np.save(p, es.layers)

99
win.py Normal file
View File

@ -0,0 +1,99 @@
import gi
import os
from os import path
from game import Game
gi.require_version('Gtk', '3.0')
from gi.repository import Gtk, GLib, Gdk
GAME_SPEED = 35
THUG_SCORE = 100
class Window(Gtk.Window):
def __init__(self, game=None):
Gtk.Window.__init__(self, title='Flappy Bird Evolution Strategies')
(width, height) = self.get_size()
self.width = width
self.height = height
if game:
self.game = game
else:
self.game = Game(width, height)
self.fixed = Gtk.Fixed()
self.birdie = Gtk.Image.new_from_file(path.join(path.dirname(__file__), 'assets/birdie.png'))
self.fixed.add(self.birdie)
self.wall_top = Gtk.Box()
self.wall_bottom = Gtk.Box()
bg = Gdk.RGBA(0.1, 1, 0.1, 1)
self.wall_top.override_background_color(0, bg)
self.wall_bottom.override_background_color(0, bg)
self.fixed.add(self.wall_top)
self.fixed.add(self.wall_bottom)
self.add(self.fixed)
self.gameover = Gtk.Label.new('')
self.gameover.override_color(0, Gdk.RGBA(1, 0.2, 0.2, 1))
self.score = Gtk.Label.new('')
self.thug = Gtk.Image.new_from_file(path.join(path.dirname(__file__), 'assets/thug-text.png'))
self.fixed.add(self.gameover)
self.fixed.add(self.score)
self.fixed.add(self.thug)
self.fixed.move(self.gameover, width / 2, height / 2)
self.fixed.move(self.score, 10, 10)
self.fixed.move(self.thug, -75, -75)
self.update()
if not game:
GLib.timeout_add(GAME_SPEED, self.update)
self.connect("key-press-event", self.on_key)
def update(self):
if self.game.lost:
self.gameover.set_text('Game Over!')
return True
else:
self.gameover.set_text('')
self.score.set_text(str(self.game.score))
self.game.update()
if self.game.score > THUG_SCORE:
self.birdie.set_from_file(path.join(path.dirname(__file__), 'assets/birdie-thug.png'))
self.fixed.move(self.thug, 10, self.height - 60)
self.wall_top.set_size_request(self.game.wall.width, self.game.wall.gate.y)
self.wall_bottom.set_size_request(self.game.wall.width, self.height - self.game.wall.gate.y - self.game.wall.gate.height)
self.fixed.move(self.birdie, self.width / 2, self.game.bird.y)
self.fixed.move(self.wall_top, self.game.wall.x, 0)
self.fixed.move(self.wall_bottom, self.game.wall.x, self.game.wall.gate.y + self.game.wall.gate.height)
return True
def on_key(self, win, key):
if key.keyval == Gdk.KEY_space:
self.game.bird.jump()
if key.keyval == Gdk.KEY_Return and self.game.lost:
self.game = Game(self.game.width, self.game.height)
# win = Window()
# win.connect('delete-event', Gtk.main_quit)
# win.show_all()
# Gtk.main()