150 lines
4.9 KiB
Python
150 lines
4.9 KiB
Python
from __future__ import absolute_import, division, print_function
|
|
|
|
# TensorFlow and tf.keras
|
|
import tensorflow as tf
|
|
from tensorflow import keras
|
|
|
|
# Helper libraries
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import pandas as pd
|
|
import os.path
|
|
|
|
from utils import *
|
|
# from predict import predicted_map
|
|
|
|
RANDOM_SEED = 1
|
|
|
|
print(tf.__version__)
|
|
|
|
# tf.enable_eager_execution()
|
|
|
|
tf.set_random_seed(RANDOM_SEED)
|
|
np.random.seed(RANDOM_SEED)
|
|
|
|
df = pd.read_pickle('data.p')
|
|
|
|
class MapHistory(keras.callbacks.Callback):
|
|
def on_epoch_end(self, epoch, logs):
|
|
print('EPOCH', epoch)
|
|
predicted_map('maps/{}'.format(epoch))
|
|
|
|
class Model():
|
|
def __init__(self, name, batch_size=16, shuffle_buffer_size=500, learning_rate=0.001, epochs=1):
|
|
self.name = name
|
|
self.path = "checkpoints/{}.hdf5".format(name)
|
|
|
|
self.batch_size = batch_size
|
|
self.shuffle_buffer_size = shuffle_buffer_size
|
|
self.learning_rate = learning_rate
|
|
self.epochs = epochs
|
|
|
|
def prepare_dataset(self, df, fn):
|
|
self.dataset_fn = fn
|
|
dataset_size, features, output_size, dataset = fn(df)
|
|
self.dataset = dataset.shuffle(self.shuffle_buffer_size)
|
|
self.TRAIN_SIZE = int(dataset_size * 0.85)
|
|
self.TEST_SIZE = dataset_size - self.TRAIN_SIZE
|
|
(training, test) = (self.dataset.take(self.TRAIN_SIZE).batch(self.batch_size).repeat(),
|
|
self.dataset.skip(self.TRAIN_SIZE).batch(self.batch_size).repeat())
|
|
|
|
# print(df.groupby(['biome_num']).agg({ 'biome_num': lambda x: x.count() / df.shape[0] }))
|
|
|
|
print('dataset: size={}, train={}, test={}'.format(dataset_size, self.TRAIN_SIZE, self.TEST_SIZE))
|
|
print('input_size={}'.format(features))
|
|
|
|
self.dataset_size = dataset_size
|
|
self.features = features
|
|
self.output_size = output_size
|
|
self.training = training
|
|
self.test = test
|
|
|
|
def create_model(self, layers, out_activation=None):
|
|
params = {
|
|
'kernel_initializer': 'lecun_uniform',
|
|
'bias_initializer': 'zeros',
|
|
# 'kernel_regularizer': keras.regularizers.l2(l=0.01)
|
|
}
|
|
dropout = [keras.layers.Dropout(0.1, input_shape=[self.features])]
|
|
# dropout = []
|
|
self.model = keras.Sequential(dropout + [
|
|
keras.layers.Dense(layers[0], activation=tf.nn.elu, **params)
|
|
] + [
|
|
keras.layers.Dense(n, activation=tf.nn.elu, **params) for n in layers[1:]
|
|
] + [
|
|
keras.layers.Dense(self.output_size, activation=out_activation, **params)
|
|
])
|
|
|
|
def compile(self, loss='mse', metrics=['accuracy'], optimizer=tf.train.AdamOptimizer, load_weights=True):
|
|
if load_weights:
|
|
self.model.load_weights(self.path)
|
|
|
|
optimizer = optimizer(self.learning_rate)
|
|
|
|
self.model.compile(loss=loss,
|
|
optimizer=optimizer,
|
|
metrics=metrics)
|
|
|
|
def evaluate(self):
|
|
return self.model.evaluate(
|
|
self.test,
|
|
batch_size=self.batch_size,
|
|
steps=int(self.dataset_size / self.batch_size),
|
|
verbose=1
|
|
)
|
|
|
|
def evaluate_print(self):
|
|
loss, accuracy = self.evaluate()
|
|
print('Test evaluation: loss: {}, accuracy: {}'.format(loss, accuracy))
|
|
|
|
def train(self):
|
|
self.model.summary()
|
|
|
|
checkpoint = keras.callbacks.ModelCheckpoint(self.path, monitor='val_loss', verbose=1, mode='min', save_best_only=True)
|
|
tensorboard = keras.callbacks.TensorBoard(log_dir='./logs', update_freq='epoch')
|
|
# reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)
|
|
# map_callback = MapHistory()
|
|
|
|
self.model.fit(
|
|
self.training,
|
|
batch_size=self.batch_size,
|
|
epochs=self.epochs,
|
|
steps_per_epoch=int(self.TRAIN_SIZE / self.batch_size),
|
|
callbacks=[checkpoint, tensorboard],
|
|
validation_data=self.test,
|
|
validation_steps=int(self.TEST_SIZE / self.batch_size),
|
|
verbose=1
|
|
)
|
|
|
|
def predict(self, a):
|
|
return np.argmax(self.model.predict(a), axis=1)
|
|
|
|
A = Model('a', epochs=2)
|
|
B = Model('b', learning_rate=0.0005, epochs=50)
|
|
|
|
# 24 so far
|
|
def compile_b():
|
|
B.prepare_dataset(df, dataframe_to_dataset_biomes)
|
|
B.create_model([12], tf.nn.softmax)
|
|
B.compile(loss='sparse_categorical_crossentropy', load_weights=False)
|
|
|
|
def compile_a():
|
|
A.prepare_dataset(df, dataframe_to_dataset_temp_precip)
|
|
A.create_model([(4, tf.nn.elu)])
|
|
# A.create_model([]) # linear model
|
|
A.compile(metrics=['accuracy', 'mae'])
|
|
|
|
if __name__ == "__main__":
|
|
compile_b()
|
|
B.train()
|
|
|
|
# for inp, out in B.test.take(1).make_one_shot_iterator():
|
|
# print(inp, out)
|
|
|
|
# print(np.unique(nums))
|
|
# print(np.unique(predictions))
|
|
# print('loss: {}, evaluation: {}'.format(*B.evaluate()))
|
|
|
|
# compile_a()
|
|
# A.train()
|