feat(models): train models and evaluate them

This commit is contained in:
Mahdi Dibaiee 2019-02-26 11:50:31 +03:30
parent 0d9a0068b1
commit d8365d6285
6 changed files with 138 additions and 63 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
checkpoints.*
geodata
*.p
#### joe made this: http://goel.io/joe

BIN
checkpoints/a.hdf5 Normal file

Binary file not shown.

BIN
checkpoints/b.hdf5 Normal file

Binary file not shown.

60
nn.py
View File

@ -1,60 +0,0 @@
from __future__ import absolute_import, division, print_function
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras
# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from utils import *
tf.enable_eager_execution()
df = pd.read_pickle('data_final.p')
# print(df.head())
BATCH_SIZE = 15
SHUFFLE_BUFFER_SIZE = 100
LEARNING_RATE = 0.001
# dataset = dataframe_to_dataset_biomes(df)
dataset_size, features, dataset = dataframe_to_dataset_temp_precip(df)
print(dataset_size)
dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE).repeat()
TRAIN_SIZE = dataset_size * 0.85
TEST_SIZE = dataset_size - TRAIN_SIZE
(training, test) = (dataset.take(TRAIN_SIZE), dataset.skip(TRAIN_SIZE))
print(training.make_one_shot_iterator().get_next())
model = keras.Sequential([
keras.layers.Dense(32, activation=tf.nn.relu, input_shape=[features]),
keras.layers.Dense(32, activation=tf.nn.relu),
keras.layers.Dense(2)
])
optimizer = tf.train.AdamOptimizer(LEARNING_RATE)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae'])
model.summary()
EPOCHS = 1000
history = model.fit(
training,
epochs=EPOCHS,
verbose=1,
steps_per_epoch=int(dataset_size / BATCH_SIZE)
)
# i = 0
# for feature, target in dataset:
# print('{} => {}'.format(feature, target))
print(tf.__version__)

134
train.py Normal file
View File

@ -0,0 +1,134 @@
from __future__ import absolute_import, division, print_function
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras
# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os.path
from utils import *
RANDOM_SEED = 1
tf.enable_eager_execution()
tf.set_random_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
df = pd.read_pickle('data_final.p')
# temp and precipitation
def train_model_a():
filepath = "checkpoints/a.hdf5"
BATCH_SIZE = 100
SHUFFLE_BUFFER_SIZE = 500
LEARNING_RATE = 0.001
EPOCHS = 2
# dataset = dataframe_to_dataset_biomes(df)
dataset_size, features, output_size, dataset = dataframe_to_dataset_temp_precip(df)
dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
TRAIN_SIZE = dataset_size * 0.85
TEST_SIZE = dataset_size - TRAIN_SIZE
(training, test) = (dataset.take(TRAIN_SIZE).repeat(), dataset.skip(TRAIN_SIZE).repeat())
model = keras.Sequential([
keras.layers.Dense(4, activation=tf.nn.relu, input_shape=[features]),
keras.layers.Dense(output_size)
])
model.load_weights(filepath)
optimizer = tf.train.AdamOptimizer(LEARNING_RATE)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae', 'accuracy'])
model.summary()
checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='acc', verbose=1, mode='max')
model.fit(
training,
batch_size=BATCH_SIZE,
epochs=EPOCHS,
steps_per_epoch=int(dataset_size / BATCH_SIZE),
callbacks=[checkpoint],
verbose=1
)
evaluation = model.evaluate(
test,
batch_size=BATCH_SIZE,
steps=int(dataset_size / BATCH_SIZE),
verbose=1
)
print(evaluation)
# 850 epochs so far
def train_model_b():
filepath = filepath="checkpoints/b.hdf5"
BATCH_SIZE = 100
SHUFFLE_BUFFER_SIZE = 500
LEARNING_RATE = 0.0005
EPOCHS = 400
# dataset = dataframe_to_dataset_biomes(df)
dataset_size, features, output_size, dataset = dataframe_to_dataset_biomes(df)
dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE)
TRAIN_SIZE = dataset_size * 0.85
TEST_SIZE = dataset_size - TRAIN_SIZE
(training, test) = (dataset.take(TRAIN_SIZE).batch(BATCH_SIZE).repeat(), dataset.skip(TRAIN_SIZE).batch(BATCH_SIZE).repeat())
model = keras.Sequential([
keras.layers.Dense(64, activation=tf.nn.relu, input_shape=[features]),
keras.layers.Dense(128, activation=tf.nn.relu),
keras.layers.Dense(output_size, activation=tf.nn.softmax)
])
model.load_weights(filepath)
optimizer = tf.train.AdamOptimizer(LEARNING_RATE)
model.compile(loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=['accuracy'])
model.summary()
checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='acc', verbose=1, mode='max')
model.fit(
training,
epochs=EPOCHS,
verbose=1,
steps_per_epoch=int(dataset_size / BATCH_SIZE),
callbacks=[checkpoint]
)
# print(dataset.repeat().make_one_shot_iteraor().get_next())
# inp, out = test.make_one_shot_iterator().get_next()
# print(inp, out)
# print(np.argmax(model.predict(inp), axis=1))
evaluation = model.evaluate(
test,
batch_size=BATCH_SIZE,
steps=int(dataset_size / BATCH_SIZE),
verbose=1
)
print('loss: {}, accuracy: {}'.format(*evaluation))
# train_model_a()
train_model_b()
# train_model_a()

View File

@ -49,9 +49,9 @@ def dataframe_to_dataset_biomes(df):
tf_output = np.concatenate((tf_output, df[output].values), axis=0)
tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32)
tf_output = tf.cast(normalize_ndarray(tf_output), tf.int32)
tf_output = tf.cast(tf_output, tf.int64)
return int(tf_inputs.shape[0]), 5, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
return int(tf_inputs.shape[0]), 11, 14, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
def dataframe_to_dataset_temp_precip(df):
rows = df.shape[0]
@ -81,5 +81,5 @@ def dataframe_to_dataset_temp_precip(df):
tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32)
tf_output = tf.cast(normalize_ndarray(tf_output), tf.float32)
return int(tf_inputs.shape[0]), 5, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
return int(tf_inputs.shape[0]), 5, 2, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))