flappy-es/es.py

import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

class EvolutionStrategy():
    # fn: function that plays the game and returns the rewards, this function must take as argument another function that
    #     determines whether the bird should jump or not
    #     e.g. def fn shouldJump = if shouldJump(some_input): ... return reward
    # noisep: noise population, how many different noises should be tried at each step
    # sigma: standard deviation of generated noise
    # alpha: learning rate
    # layer_sizes: sizes of neural network layers, e.g. [[4, 500], [500, 1]]
    # input_size: number of inputs
    def __init__(self, fn, noisep, sigma, alpha, layer_sizes, input_size):
        self.fn = fn

        self.sigma = sigma
        self.noisep = noisep
        self.alpha = alpha
        self.layer_sizes = layer_sizes
        self.input_size = input_size

        # initialize layers randomly
        self.layers = []
        for i, layer in enumerate(layer_sizes):
            self.layers.append(np.random.uniform(-0.1, 0.1, layer))

    # forward propagation: sigmoid(xW) for every layer
    def forward(self, input):
        output = input
        for i, layer in enumerate(self.layers):
            output = sigmoid(np.dot(output, layer))

        return output

    # train the model
    def train(self):
        N = [[] for i in range(len(self.layers))]
        R = np.zeros(self.noisep)

        for i in range(self.noisep):
            noisy_layers = []

            for j, (layer_size, layer) in enumerate(zip(self.layer_sizes, self.layers)):
                # for each layer, generate a noise
                n = np.random.randn(*layer_size)
                N[j].append(n)

                # add noise to layer
                noisy_w = layer + self.sigma * n

                noisy_layers.append(noisy_w)

            # generate another network with the same parameters, but with noisy layers
            es = EvolutionStrategy(fn=self.fn, noisep=self.noisep, sigma=self.sigma, alpha=self.alpha, layer_sizes=self.layer_sizes, input_size=self.input_size)

            es.layers = noisy_layers
            # run a forward propagation using the noisy layer and save the reward
            R[i] = self.fn(es.forward)

        # normalize the rewards
        A = (R - np.mean(R)) / np.std(R)

        # update layers
        for n, i in zip(N, range(len(self.layers))):
            n = np.array(n)

            # np.dot(n.T, A) scales each noise's contribution to the update by how much reward it had received
            update = self.alpha / (self.noisep * self.sigma) * np.dot(n.T, A).T
            self.layers[i] = self.layers[i] + update

        return R