diff --git a/.gitignore b/.gitignore index 0361c94..e2327e5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +checkpoints.* geodata *.p #### joe made this: http://goel.io/joe diff --git a/checkpoints/a.hdf5 b/checkpoints/a.hdf5 new file mode 100644 index 0000000..f016d67 Binary files /dev/null and b/checkpoints/a.hdf5 differ diff --git a/checkpoints/b.hdf5 b/checkpoints/b.hdf5 new file mode 100644 index 0000000..fb5b7d1 Binary files /dev/null and b/checkpoints/b.hdf5 differ diff --git a/nn.py b/nn.py deleted file mode 100644 index 0e8be7a..0000000 --- a/nn.py +++ /dev/null @@ -1,60 +0,0 @@ -from __future__ import absolute_import, division, print_function - -# TensorFlow and tf.keras -import tensorflow as tf -from tensorflow import keras - -# Helper libraries -import numpy as np -import matplotlib.pyplot as plt -import pandas as pd - -from utils import * - -tf.enable_eager_execution() - -df = pd.read_pickle('data_final.p') -# print(df.head()) - -BATCH_SIZE = 15 -SHUFFLE_BUFFER_SIZE = 100 -LEARNING_RATE = 0.001 - -# dataset = dataframe_to_dataset_biomes(df) -dataset_size, features, dataset = dataframe_to_dataset_temp_precip(df) -print(dataset_size) -dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE).repeat() -TRAIN_SIZE = dataset_size * 0.85 -TEST_SIZE = dataset_size - TRAIN_SIZE -(training, test) = (dataset.take(TRAIN_SIZE), dataset.skip(TRAIN_SIZE)) - -print(training.make_one_shot_iterator().get_next()) - -model = keras.Sequential([ - keras.layers.Dense(32, activation=tf.nn.relu, input_shape=[features]), - keras.layers.Dense(32, activation=tf.nn.relu), - keras.layers.Dense(2) -]) - -optimizer = tf.train.AdamOptimizer(LEARNING_RATE) - -model.compile(loss='mse', - optimizer=optimizer, - metrics=['mae']) - -model.summary() - -EPOCHS = 1000 - -history = model.fit( - training, - epochs=EPOCHS, - verbose=1, - steps_per_epoch=int(dataset_size / BATCH_SIZE) -) - -# i = 0 -# for feature, target in dataset: - # print('{} => {}'.format(feature, target)) - -print(tf.__version__) diff --git a/train.py b/train.py new file mode 100644 index 0000000..83d3082 --- /dev/null +++ b/train.py @@ -0,0 +1,134 @@ +from __future__ import absolute_import, division, print_function + +# TensorFlow and tf.keras +import tensorflow as tf +from tensorflow import keras + +# Helper libraries +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd +import os.path + +from utils import * + +RANDOM_SEED = 1 + +tf.enable_eager_execution() + +tf.set_random_seed(RANDOM_SEED) +np.random.seed(RANDOM_SEED) + +df = pd.read_pickle('data_final.p') + +# temp and precipitation +def train_model_a(): + filepath = "checkpoints/a.hdf5" + + BATCH_SIZE = 100 + SHUFFLE_BUFFER_SIZE = 500 + LEARNING_RATE = 0.001 + EPOCHS = 2 + + # dataset = dataframe_to_dataset_biomes(df) + dataset_size, features, output_size, dataset = dataframe_to_dataset_temp_precip(df) + dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE) + TRAIN_SIZE = dataset_size * 0.85 + TEST_SIZE = dataset_size - TRAIN_SIZE + (training, test) = (dataset.take(TRAIN_SIZE).repeat(), dataset.skip(TRAIN_SIZE).repeat()) + + model = keras.Sequential([ + keras.layers.Dense(4, activation=tf.nn.relu, input_shape=[features]), + keras.layers.Dense(output_size) + ]) + + model.load_weights(filepath) + + optimizer = tf.train.AdamOptimizer(LEARNING_RATE) + + model.compile(loss='mse', + optimizer=optimizer, + metrics=['mae', 'accuracy']) + + model.summary() + + checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='acc', verbose=1, mode='max') + + model.fit( + training, + batch_size=BATCH_SIZE, + epochs=EPOCHS, + steps_per_epoch=int(dataset_size / BATCH_SIZE), + callbacks=[checkpoint], + verbose=1 + ) + + evaluation = model.evaluate( + test, + batch_size=BATCH_SIZE, + steps=int(dataset_size / BATCH_SIZE), + verbose=1 + ) + + print(evaluation) + +# 850 epochs so far +def train_model_b(): + filepath = filepath="checkpoints/b.hdf5" + + BATCH_SIZE = 100 + SHUFFLE_BUFFER_SIZE = 500 + LEARNING_RATE = 0.0005 + EPOCHS = 400 + + # dataset = dataframe_to_dataset_biomes(df) + dataset_size, features, output_size, dataset = dataframe_to_dataset_biomes(df) + dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE) + TRAIN_SIZE = dataset_size * 0.85 + TEST_SIZE = dataset_size - TRAIN_SIZE + (training, test) = (dataset.take(TRAIN_SIZE).batch(BATCH_SIZE).repeat(), dataset.skip(TRAIN_SIZE).batch(BATCH_SIZE).repeat()) + + model = keras.Sequential([ + keras.layers.Dense(64, activation=tf.nn.relu, input_shape=[features]), + keras.layers.Dense(128, activation=tf.nn.relu), + keras.layers.Dense(output_size, activation=tf.nn.softmax) + ]) + + model.load_weights(filepath) + + optimizer = tf.train.AdamOptimizer(LEARNING_RATE) + + model.compile(loss='sparse_categorical_crossentropy', + optimizer=optimizer, + metrics=['accuracy']) + + model.summary() + + checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='acc', verbose=1, mode='max') + + model.fit( + training, + epochs=EPOCHS, + verbose=1, + steps_per_epoch=int(dataset_size / BATCH_SIZE), + callbacks=[checkpoint] + ) + # print(dataset.repeat().make_one_shot_iteraor().get_next()) + + # inp, out = test.make_one_shot_iterator().get_next() + # print(inp, out) + # print(np.argmax(model.predict(inp), axis=1)) + + evaluation = model.evaluate( + test, + batch_size=BATCH_SIZE, + steps=int(dataset_size / BATCH_SIZE), + verbose=1 + ) + + print('loss: {}, accuracy: {}'.format(*evaluation)) + +# train_model_a() +train_model_b() + +# train_model_a() diff --git a/utils.py b/utils.py index 99393ba..11c1a3b 100644 --- a/utils.py +++ b/utils.py @@ -49,9 +49,9 @@ def dataframe_to_dataset_biomes(df): tf_output = np.concatenate((tf_output, df[output].values), axis=0) tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32) - tf_output = tf.cast(normalize_ndarray(tf_output), tf.int32) + tf_output = tf.cast(tf_output, tf.int64) - return int(tf_inputs.shape[0]), 5, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output)) + return int(tf_inputs.shape[0]), 11, 14, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output)) def dataframe_to_dataset_temp_precip(df): rows = df.shape[0] @@ -81,5 +81,5 @@ def dataframe_to_dataset_temp_precip(df): tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32) tf_output = tf.cast(normalize_ndarray(tf_output), tf.float32) - return int(tf_inputs.shape[0]), 5, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output)) + return int(tf_inputs.shape[0]), 5, 2, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))