74 lines
2.6 KiB
Python
74 lines
2.6 KiB
Python
|
import numpy as np
|
||
|
|
||
|
def sigmoid(x):
|
||
|
return 1 / (1 + np.exp(-x))
|
||
|
|
||
|
class EvolutionStrategy():
|
||
|
# fn: function that plays the game and returns the rewards, this function must take as argument another function that
|
||
|
# determines whether the bird should jump or not
|
||
|
# e.g. def fn shouldJump = if shouldJump(some_input): ... return reward
|
||
|
# noisep: noise population, how many different noises should be tried at each step
|
||
|
# sigma: standard deviation of generated noise
|
||
|
# alpha: learning rate
|
||
|
# layer_sizes: sizes of neural network layers, e.g. [[4, 500], [500, 1]]
|
||
|
# input_size: number of inputs
|
||
|
def __init__(self, fn, noisep, sigma, alpha, layer_sizes, input_size):
|
||
|
self.fn = fn
|
||
|
|
||
|
self.sigma = sigma
|
||
|
self.noisep = noisep
|
||
|
self.alpha = alpha
|
||
|
self.layer_sizes = layer_sizes
|
||
|
self.input_size = input_size
|
||
|
|
||
|
# initialize layers randomly
|
||
|
self.layers = []
|
||
|
for i, layer in enumerate(layer_sizes):
|
||
|
self.layers.append(np.random.uniform(-0.1, 0.1, layer))
|
||
|
|
||
|
# forward propagation: sigmoid(xW) for every layer
|
||
|
def forward(self, input):
|
||
|
output = input
|
||
|
for i, layer in enumerate(self.layers):
|
||
|
output = sigmoid(np.dot(output, layer))
|
||
|
|
||
|
return output
|
||
|
|
||
|
# train the model
|
||
|
def train(self):
|
||
|
N = [[] for i in range(len(self.layers))]
|
||
|
R = np.zeros(self.noisep)
|
||
|
|
||
|
for i in range(self.noisep):
|
||
|
noisy_layers = []
|
||
|
|
||
|
for j, (layer_size, layer) in enumerate(zip(self.layer_sizes, self.layers)):
|
||
|
# for each layer, generate a noise
|
||
|
n = np.random.randn(*layer_size)
|
||
|
N[j].append(n)
|
||
|
|
||
|
# add noise to layer
|
||
|
noisy_w = layer + self.sigma * n
|
||
|
|
||
|
noisy_layers.append(noisy_w)
|
||
|
|
||
|
# generate another network with the same parameters, but with noisy layers
|
||
|
es = EvolutionStrategy(fn=self.fn, noisep=self.noisep, sigma=self.sigma, alpha=self.alpha, layer_sizes=self.layer_sizes, input_size=self.input_size)
|
||
|
|
||
|
es.layers = noisy_layers
|
||
|
# run a forward propagation using the noisy layer and save the reward
|
||
|
R[i] = self.fn(es.forward)
|
||
|
|
||
|
# normalize the rewards
|
||
|
A = (R - np.mean(R)) / np.std(R)
|
||
|
|
||
|
# update layers
|
||
|
for n, i in zip(N, range(len(self.layers))):
|
||
|
n = np.array(n)
|
||
|
|
||
|
# np.dot(n.T, A) scales each noise's contribution to the update by how much reward it had received
|
||
|
update = self.alpha / (self.noisep * self.sigma) * np.dot(n.T, A).T
|
||
|
self.layers[i] = self.layers[i] + update
|
||
|
|
||
|
|