world-ecoregion/nn.py

from __future__ import absolute_import, division, print_function

# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras

# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os.path

from utils import *
# from predict import predicted_map

RANDOM_SEED = 1

print(tf.__version__)

# tf.enable_eager_execution()

tf.set_random_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

df = pd.read_pickle('data.p')

class MapHistory(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        print('EPOCH', epoch)
        predicted_map('maps/{}'.format(epoch))

class Model():
    def __init__(self, name, batch_size=16, shuffle_buffer_size=500, learning_rate=0.001, epochs=1):
        self.name = name
        self.path = "checkpoints/{}.hdf5".format(name)

        self.batch_size = batch_size
        self.shuffle_buffer_size = shuffle_buffer_size
        self.learning_rate = learning_rate 
        self.epochs = epochs

    def prepare_dataset(self, df, fn):
        self.dataset_fn = fn
        dataset_size, features, output_size, dataset = fn(df)
        self.dataset = dataset.shuffle(self.shuffle_buffer_size)
        self.TRAIN_SIZE = int(dataset_size * 0.85)
        self.TEST_SIZE = dataset_size - self.TRAIN_SIZE
        (training, test) = (self.dataset.take(self.TRAIN_SIZE).batch(self.batch_size).repeat(),
                            self.dataset.skip(self.TRAIN_SIZE).batch(self.batch_size).repeat())

        # print(df.groupby(['biome_num']).agg({ 'biome_num': lambda x: x.count() / df.shape[0] }))

        print('dataset: size={}, train={}, test={}'.format(dataset_size, self.TRAIN_SIZE, self.TEST_SIZE))
        print('input_size={}'.format(features))

        self.dataset_size = dataset_size
        self.features = features
        self.output_size = output_size
        self.training = training
        self.test = test

    def create_model(self, layers, out_activation=None):
        params = {
                'kernel_initializer': 'lecun_uniform',
                'bias_initializer': 'zeros',
                # 'kernel_regularizer': keras.regularizers.l2(l=0.01)
        }
        dropout = [keras.layers.Dropout(0.1, input_shape=[self.features])]
        # dropout = []
        self.model = keras.Sequential(dropout + [
            keras.layers.Dense(layers[0], activation=tf.nn.elu, **params)
        ] + [
            keras.layers.Dense(n, activation=tf.nn.elu, **params) for n in layers[1:]
        ] + [
            keras.layers.Dense(self.output_size, activation=out_activation, **params)
        ])

    def compile(self, loss='mse', metrics=['accuracy'], optimizer=tf.train.AdamOptimizer, load_weights=True):
        if load_weights:
            self.model.load_weights(self.path)

        optimizer = optimizer(self.learning_rate)

        self.model.compile(loss=loss,
                    optimizer=optimizer,
                    metrics=metrics)

    def evaluate(self):
        return self.model.evaluate(
            self.test,
            batch_size=self.batch_size,
            steps=int(self.dataset_size / self.batch_size),
            verbose=1
        )

    def evaluate_print(self):
        loss, accuracy = self.evaluate()
        print('Test evaluation: loss: {}, accuracy: {}'.format(loss, accuracy))

    def train(self):
        self.model.summary()

        checkpoint = keras.callbacks.ModelCheckpoint(self.path, monitor='val_loss', verbose=1, mode='min', save_best_only=True)
        tensorboard = keras.callbacks.TensorBoard(log_dir='./logs', update_freq='epoch')
        # reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)
        # map_callback = MapHistory()

        self.model.fit(
            self.training,
            batch_size=self.batch_size,
            epochs=self.epochs,
            steps_per_epoch=int(self.TRAIN_SIZE / self.batch_size),
            callbacks=[checkpoint, tensorboard],
            validation_data=self.test,
            validation_steps=int(self.TEST_SIZE / self.batch_size),
            verbose=1
        )

    def predict(self, a):
        return np.argmax(self.model.predict(a), axis=1)

A = Model('a', epochs=2)
B = Model('b', learning_rate=0.0005, epochs=50)

# 24 so far
def compile_b():
    B.prepare_dataset(df, dataframe_to_dataset_biomes)
    B.create_model([12], tf.nn.softmax)
    B.compile(loss='sparse_categorical_crossentropy', load_weights=False)

def compile_a():
    A.prepare_dataset(df, dataframe_to_dataset_temp_precip)
    A.create_model([(4, tf.nn.elu)])
    # A.create_model([]) # linear model
    A.compile(metrics=['accuracy', 'mae'])

if __name__ == "__main__":
    compile_b()
    B.train()

    # for inp, out in B.test.take(1).make_one_shot_iterator():
        # print(inp, out)

    # print(np.unique(nums))
    # print(np.unique(predictions))
    # print('loss: {}, evaluation: {}'.format(*B.evaluate()))

    # compile_a()
    # A.train()
feat(temps): various temperatures 2019-02-27 11:36:20 +00:00			`from __future__ import absolute_import, division, print_function`

			`# TensorFlow and tf.keras`
			`import tensorflow as tf`
			`from tensorflow import keras`

			`# Helper libraries`
			`import numpy as np`
			`import matplotlib.pyplot as plt`
			`import pandas as pd`
			`import os.path`

			`from utils import *`
updates 2019-03-07 03:25:23 +00:00			`# from predict import predicted_map`
feat(temps): various temperatures 2019-02-27 11:36:20 +00:00
			`RANDOM_SEED = 1`

refactor(data): include latitude longitude in columns, not indices 2019-03-05 07:59:30 +00:00			`print(tf.__version__)`

			`# tf.enable_eager_execution()`
feat(temps): various temperatures 2019-02-27 11:36:20 +00:00
			`tf.set_random_seed(RANDOM_SEED)`
			`np.random.seed(RANDOM_SEED)`

refactor(data): include latitude longitude in columns, not indices 2019-03-05 07:59:30 +00:00			`df = pd.read_pickle('data.p')`
feat(temps): various temperatures 2019-02-27 11:36:20 +00:00
updates 2019-03-07 03:25:23 +00:00			`class MapHistory(keras.callbacks.Callback):`
			`def on_epoch_end(self, epoch, logs):`
			`print('EPOCH', epoch)`
			`predicted_map('maps/{}'.format(epoch))`

feat(temps): various temperatures 2019-02-27 11:36:20 +00:00			`class Model():`
fix(nn): better normalization, weight initialization and activation 2019-02-28 13:52:50 +00:00			`def __init__(self, name, batch_size=16, shuffle_buffer_size=500, learning_rate=0.001, epochs=1):`
feat(temps): various temperatures 2019-02-27 11:36:20 +00:00			`self.name = name`
			`self.path = "checkpoints/{}.hdf5".format(name)`

			`self.batch_size = batch_size`
			`self.shuffle_buffer_size = shuffle_buffer_size`
			`self.learning_rate = learning_rate`
			`self.epochs = epochs`

			`def prepare_dataset(self, df, fn):`
			`self.dataset_fn = fn`
			`dataset_size, features, output_size, dataset = fn(df)`
			`self.dataset = dataset.shuffle(self.shuffle_buffer_size)`
			`self.TRAIN_SIZE = int(dataset_size * 0.85)`
			`self.TEST_SIZE = dataset_size - self.TRAIN_SIZE`
			`(training, test) = (self.dataset.take(self.TRAIN_SIZE).batch(self.batch_size).repeat(),`
			`self.dataset.skip(self.TRAIN_SIZE).batch(self.batch_size).repeat())`

updates 2019-03-07 03:25:23 +00:00			`# print(df.groupby(['biome_num']).agg({ 'biome_num': lambda x: x.count() / df.shape[0] }))`

refactor(data): include latitude longitude in columns, not indices 2019-03-05 07:59:30 +00:00			`print('dataset: size={}, train={}, test={}'.format(dataset_size, self.TRAIN_SIZE, self.TEST_SIZE))`
			`print('input_size={}'.format(features))`

feat(temps): various temperatures 2019-02-27 11:36:20 +00:00			`self.dataset_size = dataset_size`
			`self.features = features`
			`self.output_size = output_size`
			`self.training = training`
			`self.test = test`

refactor(data): include latitude longitude in columns, not indices 2019-03-05 07:59:30 +00:00			`def create_model(self, layers, out_activation=None):`
fix(nn): better normalization, weight initialization and activation 2019-02-28 13:52:50 +00:00			`params = {`
			`'kernel_initializer': 'lecun_uniform',`
			`'bias_initializer': 'zeros',`
fix: use correct order for prediction 2019-03-05 11:53:29 +00:00			`# 'kernel_regularizer': keras.regularizers.l2(l=0.01)`
fix(nn): better normalization, weight initialization and activation 2019-02-28 13:52:50 +00:00			`}`
fix: use correct order for prediction 2019-03-05 11:53:29 +00:00			`dropout = [keras.layers.Dropout(0.1, input_shape=[self.features])]`
updates 2019-03-07 03:25:23 +00:00			`# dropout = []`
fix: use correct order for prediction 2019-03-05 11:53:29 +00:00			`self.model = keras.Sequential(dropout + [`
			`keras.layers.Dense(layers[0], activation=tf.nn.elu, **params)`
feat(temps): various temperatures 2019-02-27 11:36:20 +00:00			`] + [`
fix(nn): better normalization, weight initialization and activation 2019-02-28 13:52:50 +00:00			`keras.layers.Dense(n, activation=tf.nn.elu, **params) for n in layers[1:]`
feat(temps): various temperatures 2019-02-27 11:36:20 +00:00			`] + [`
fix(nn): better normalization, weight initialization and activation 2019-02-28 13:52:50 +00:00			`keras.layers.Dense(self.output_size, activation=out_activation, **params)`
feat(temps): various temperatures 2019-02-27 11:36:20 +00:00			`])`

fix: use correct order for prediction 2019-03-05 11:53:29 +00:00			`def compile(self, loss='mse', metrics=['accuracy'], optimizer=tf.train.AdamOptimizer, load_weights=True):`
			`if load_weights:`
			`self.model.load_weights(self.path)`

updates 2019-02-28 10:04:47 +00:00			`optimizer = optimizer(self.learning_rate)`
feat(temps): various temperatures 2019-02-27 11:36:20 +00:00
updates 2019-02-28 10:04:47 +00:00			`self.model.compile(loss=loss,`
feat(temps): various temperatures 2019-02-27 11:36:20 +00:00			`optimizer=optimizer,`
updates 2019-02-28 10:04:47 +00:00			`metrics=metrics)`
feat(temps): various temperatures 2019-02-27 11:36:20 +00:00
			`def evaluate(self):`
			`return self.model.evaluate(`
			`self.test,`
			`batch_size=self.batch_size,`
			`steps=int(self.dataset_size / self.batch_size),`
			`verbose=1`
			`)`

fix: use correct order for prediction 2019-03-05 11:53:29 +00:00			`def evaluate_print(self):`
			`loss, accuracy = self.evaluate()`
			`print('Test evaluation: loss: {}, accuracy: {}'.format(loss, accuracy))`

feat(temps): various temperatures 2019-02-27 11:36:20 +00:00			`def train(self):`
			`self.model.summary()`

updates 2019-03-07 03:25:23 +00:00			`checkpoint = keras.callbacks.ModelCheckpoint(self.path, monitor='val_loss', verbose=1, mode='min', save_best_only=True)`
refactor(data): include latitude longitude in columns, not indices 2019-03-05 07:59:30 +00:00			`tensorboard = keras.callbacks.TensorBoard(log_dir='./logs', update_freq='epoch')`
updates 2019-03-07 03:25:23 +00:00			`# reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)`
			`# map_callback = MapHistory()`
feat(temps): various temperatures 2019-02-27 11:36:20 +00:00
			`self.model.fit(`
			`self.training,`
			`batch_size=self.batch_size,`
			`epochs=self.epochs,`
updates 2019-03-07 03:25:23 +00:00			`steps_per_epoch=int(self.TRAIN_SIZE / self.batch_size),`
updates 2019-02-28 10:04:47 +00:00			`callbacks=[checkpoint, tensorboard],`
updates 2019-03-07 03:25:23 +00:00			`validation_data=self.test,`
			`validation_steps=int(self.TEST_SIZE / self.batch_size),`
feat(temps): various temperatures 2019-02-27 11:36:20 +00:00			`verbose=1`
			`)`

			`def predict(self, a):`
			`return np.argmax(self.model.predict(a), axis=1)`

updates 2019-02-28 10:04:47 +00:00			`A = Model('a', epochs=2)`
updates 2019-03-07 03:25:23 +00:00			`B = Model('b', learning_rate=0.0005, epochs=50)`
feat(temps): various temperatures 2019-02-27 11:36:20 +00:00
updates 2019-03-07 03:25:23 +00:00			`# 24 so far`
updates 2019-02-28 10:04:47 +00:00			`def compile_b():`
feat(temps): various temperatures 2019-02-27 11:36:20 +00:00			`B.prepare_dataset(df, dataframe_to_dataset_biomes)`
updates 2019-03-07 03:25:23 +00:00			`B.create_model([12], tf.nn.softmax)`
			`B.compile(loss='sparse_categorical_crossentropy', load_weights=False)`
updates 2019-02-28 10:04:47 +00:00
			`def compile_a():`
			`A.prepare_dataset(df, dataframe_to_dataset_temp_precip)`
fix(nn): better normalization, weight initialization and activation 2019-02-28 13:52:50 +00:00			`A.create_model([(4, tf.nn.elu)])`
refactor(data): include latitude longitude in columns, not indices 2019-03-05 07:59:30 +00:00			`# A.create_model([]) # linear model`
updates 2019-02-28 10:04:47 +00:00			`A.compile(metrics=['accuracy', 'mae'])`

			`if __name__ == "__main__":`
			`compile_b()`
			`B.train()`
feat(temps): various temperatures 2019-02-27 11:36:20 +00:00
			`# for inp, out in B.test.take(1).make_one_shot_iterator():`
			`# print(inp, out)`

			`# print(np.unique(nums))`
			`# print(np.unique(predictions))`
updates 2019-02-28 10:04:47 +00:00			`# print('loss: {}, evaluation: {}'.format(*B.evaluate()))`
feat(temps): various temperatures 2019-02-27 11:36:20 +00:00
refactor(data): include latitude longitude in columns, not indices 2019-03-05 07:59:30 +00:00			`# compile_a()`
			`# A.train()`