feat(models): train models and evaluate them

2019-02-26 11:50:31 +03:30
parent 0d9a0068b1
commit d8365d6285
6 changed files with 138 additions and 63 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 checkpoints.*
 geodata
 *.p
 #### joe made this: http://goel.io/joe
--- a/checkpoints/a.hdf5
+++ b/checkpoints/a.hdf5
--- a/checkpoints/b.hdf5
+++ b/checkpoints/b.hdf5
--- a/nn.py
+++ b/nn.py
@@ -1,60 +0,0 @@
 from __future__ import absolute_import, division, print_function
 # TensorFlow and tf.keras
 import tensorflow as tf
 from tensorflow import keras
 # Helper libraries
 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd
 from utils import *
 tf.enable_eager_execution()
 df = pd.read_pickle('data_final.p')
 # print(df.head())
 BATCH_SIZE = 15
 SHUFFLE_BUFFER_SIZE = 100
 LEARNING_RATE = 0.001
 # dataset = dataframe_to_dataset_biomes(df)
 dataset_size, features, dataset = dataframe_to_dataset_temp_precip(df)
 print(dataset_size)
 dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE).repeat()
 TRAIN_SIZE = dataset_size * 0.85
 TEST_SIZE = dataset_size - TRAIN_SIZE
 (training, test) = (dataset.take(TRAIN_SIZE), dataset.skip(TRAIN_SIZE))
 print(training.make_one_shot_iterator().get_next())
 model = keras.Sequential([
    keras.layers.Dense(32, activation=tf.nn.relu, input_shape=[features]),
    keras.layers.Dense(32, activation=tf.nn.relu),
    keras.layers.Dense(2)
 ])
 optimizer = tf.train.AdamOptimizer(LEARNING_RATE)
 model.compile(loss='mse',
              optimizer=optimizer,
              metrics=['mae'])
 model.summary()
 EPOCHS = 1000
 history = model.fit(
    training,
    epochs=EPOCHS,
    verbose=1,
    steps_per_epoch=int(dataset_size / BATCH_SIZE)
 )
 # i = 0
 # for feature, target in dataset:
    # print('{} => {}'.format(feature, target))
 print(tf.__version__)
--- a/train.py
+++ b/train.py
@@ -0,0 +1,134 @@
 from __future__ import absolute_import, division, print_function
 # TensorFlow and tf.keras
 import tensorflow as tf
 from tensorflow import keras
 # Helper libraries
 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd
 import os.path
 from utils import *
 RANDOM_SEED = 1
 tf.enable_eager_execution()
 tf.set_random_seed(RANDOM_SEED)
 np.random.seed(RANDOM_SEED)
 df = pd.read_pickle('data_final.p')
 # temp and precipitation
 def train_model_a():
    filepath = "checkpoints/a.hdf5"
    BATCH_SIZE = 100
    SHUFFLE_BUFFER_SIZE = 500
    LEARNING_RATE = 0.001
    EPOCHS = 2
    # dataset = dataframe_to_dataset_biomes(df)
    dataset_size, features, output_size, dataset = dataframe_to_dataset_temp_precip(df)
    dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
    TRAIN_SIZE = dataset_size * 0.85
    TEST_SIZE = dataset_size - TRAIN_SIZE
    (training, test) = (dataset.take(TRAIN_SIZE).repeat(), dataset.skip(TRAIN_SIZE).repeat())
    model = keras.Sequential([
        keras.layers.Dense(4, activation=tf.nn.relu, input_shape=[features]),
        keras.layers.Dense(output_size)
    ])
    model.load_weights(filepath)
    optimizer = tf.train.AdamOptimizer(LEARNING_RATE)
    model.compile(loss='mse',
                optimizer=optimizer,
                metrics=['mae', 'accuracy'])
    model.summary()
    checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='acc', verbose=1, mode='max')
    model.fit(
        training,
        batch_size=BATCH_SIZE,
        epochs=EPOCHS,
        steps_per_epoch=int(dataset_size / BATCH_SIZE),
        callbacks=[checkpoint],
        verbose=1
    )
    evaluation = model.evaluate(
        test,
        batch_size=BATCH_SIZE,
        steps=int(dataset_size / BATCH_SIZE),
        verbose=1
    )
    print(evaluation)
 # 850 epochs so far
 def train_model_b():
    filepath = filepath="checkpoints/b.hdf5"
    BATCH_SIZE = 100
    SHUFFLE_BUFFER_SIZE = 500
    LEARNING_RATE = 0.0005
    EPOCHS = 400
    # dataset = dataframe_to_dataset_biomes(df)
    dataset_size, features, output_size, dataset = dataframe_to_dataset_biomes(df)
    dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE)
    TRAIN_SIZE = dataset_size * 0.85
    TEST_SIZE = dataset_size - TRAIN_SIZE
    (training, test) = (dataset.take(TRAIN_SIZE).batch(BATCH_SIZE).repeat(), dataset.skip(TRAIN_SIZE).batch(BATCH_SIZE).repeat())
    model = keras.Sequential([
        keras.layers.Dense(64, activation=tf.nn.relu, input_shape=[features]),
        keras.layers.Dense(128, activation=tf.nn.relu),
        keras.layers.Dense(output_size, activation=tf.nn.softmax)
    ])
    model.load_weights(filepath)
    optimizer = tf.train.AdamOptimizer(LEARNING_RATE)
    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])
    model.summary()
    checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='acc', verbose=1, mode='max')
    model.fit(
        training,
        epochs=EPOCHS,
        verbose=1,
        steps_per_epoch=int(dataset_size / BATCH_SIZE),
        callbacks=[checkpoint]
    )
    # print(dataset.repeat().make_one_shot_iteraor().get_next())
    # inp, out = test.make_one_shot_iterator().get_next()
    # print(inp, out)
    # print(np.argmax(model.predict(inp), axis=1))
    evaluation = model.evaluate(
        test,
        batch_size=BATCH_SIZE,
        steps=int(dataset_size / BATCH_SIZE),
        verbose=1
    )
    print('loss: {}, accuracy: {}'.format(*evaluation))
 # train_model_a()
 train_model_b()
 # train_model_a()
--- a/utils.py
+++ b/utils.py
@@ -49,9 +49,9 @@ def dataframe_to_dataset_biomes(df):
        tf_output = np.concatenate((tf_output, df[output].values), axis=0)
    tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32)
-    tf_output = tf.cast(normalize_ndarray(tf_output), tf.int32)
+    tf_output = tf.cast(tf_output, tf.int64)
-    return int(tf_inputs.shape[0]), 5, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
+    return int(tf_inputs.shape[0]), 11, 14, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
 def dataframe_to_dataset_temp_precip(df):
    rows = df.shape[0]
@@ -81,5 +81,5 @@ def dataframe_to_dataset_temp_precip(df):
    tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32)
    tf_output = tf.cast(normalize_ndarray(tf_output), tf.float32)
-    return int(tf_inputs.shape[0]), 5, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
+    return int(tf_inputs.shape[0]), 5, 2, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))