feat(models): train models and evaluate them
This commit is contained in:
parent
0d9a0068b1
commit
d8365d6285
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,3 +1,4 @@
|
|||||||
|
checkpoints.*
|
||||||
geodata
|
geodata
|
||||||
*.p
|
*.p
|
||||||
#### joe made this: http://goel.io/joe
|
#### joe made this: http://goel.io/joe
|
||||||
|
BIN
checkpoints/a.hdf5
Normal file
BIN
checkpoints/a.hdf5
Normal file
Binary file not shown.
BIN
checkpoints/b.hdf5
Normal file
BIN
checkpoints/b.hdf5
Normal file
Binary file not shown.
60
nn.py
60
nn.py
@ -1,60 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, print_function
|
|
||||||
|
|
||||||
# TensorFlow and tf.keras
|
|
||||||
import tensorflow as tf
|
|
||||||
from tensorflow import keras
|
|
||||||
|
|
||||||
# Helper libraries
|
|
||||||
import numpy as np
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
from utils import *
|
|
||||||
|
|
||||||
tf.enable_eager_execution()
|
|
||||||
|
|
||||||
df = pd.read_pickle('data_final.p')
|
|
||||||
# print(df.head())
|
|
||||||
|
|
||||||
BATCH_SIZE = 15
|
|
||||||
SHUFFLE_BUFFER_SIZE = 100
|
|
||||||
LEARNING_RATE = 0.001
|
|
||||||
|
|
||||||
# dataset = dataframe_to_dataset_biomes(df)
|
|
||||||
dataset_size, features, dataset = dataframe_to_dataset_temp_precip(df)
|
|
||||||
print(dataset_size)
|
|
||||||
dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE).repeat()
|
|
||||||
TRAIN_SIZE = dataset_size * 0.85
|
|
||||||
TEST_SIZE = dataset_size - TRAIN_SIZE
|
|
||||||
(training, test) = (dataset.take(TRAIN_SIZE), dataset.skip(TRAIN_SIZE))
|
|
||||||
|
|
||||||
print(training.make_one_shot_iterator().get_next())
|
|
||||||
|
|
||||||
model = keras.Sequential([
|
|
||||||
keras.layers.Dense(32, activation=tf.nn.relu, input_shape=[features]),
|
|
||||||
keras.layers.Dense(32, activation=tf.nn.relu),
|
|
||||||
keras.layers.Dense(2)
|
|
||||||
])
|
|
||||||
|
|
||||||
optimizer = tf.train.AdamOptimizer(LEARNING_RATE)
|
|
||||||
|
|
||||||
model.compile(loss='mse',
|
|
||||||
optimizer=optimizer,
|
|
||||||
metrics=['mae'])
|
|
||||||
|
|
||||||
model.summary()
|
|
||||||
|
|
||||||
EPOCHS = 1000
|
|
||||||
|
|
||||||
history = model.fit(
|
|
||||||
training,
|
|
||||||
epochs=EPOCHS,
|
|
||||||
verbose=1,
|
|
||||||
steps_per_epoch=int(dataset_size / BATCH_SIZE)
|
|
||||||
)
|
|
||||||
|
|
||||||
# i = 0
|
|
||||||
# for feature, target in dataset:
|
|
||||||
# print('{} => {}'.format(feature, target))
|
|
||||||
|
|
||||||
print(tf.__version__)
|
|
134
train.py
Normal file
134
train.py
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
|
# TensorFlow and tf.keras
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow import keras
|
||||||
|
|
||||||
|
# Helper libraries
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import pandas as pd
|
||||||
|
import os.path
|
||||||
|
|
||||||
|
from utils import *
|
||||||
|
|
||||||
|
RANDOM_SEED = 1
|
||||||
|
|
||||||
|
tf.enable_eager_execution()
|
||||||
|
|
||||||
|
tf.set_random_seed(RANDOM_SEED)
|
||||||
|
np.random.seed(RANDOM_SEED)
|
||||||
|
|
||||||
|
df = pd.read_pickle('data_final.p')
|
||||||
|
|
||||||
|
# temp and precipitation
|
||||||
|
def train_model_a():
|
||||||
|
filepath = "checkpoints/a.hdf5"
|
||||||
|
|
||||||
|
BATCH_SIZE = 100
|
||||||
|
SHUFFLE_BUFFER_SIZE = 500
|
||||||
|
LEARNING_RATE = 0.001
|
||||||
|
EPOCHS = 2
|
||||||
|
|
||||||
|
# dataset = dataframe_to_dataset_biomes(df)
|
||||||
|
dataset_size, features, output_size, dataset = dataframe_to_dataset_temp_precip(df)
|
||||||
|
dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
|
||||||
|
TRAIN_SIZE = dataset_size * 0.85
|
||||||
|
TEST_SIZE = dataset_size - TRAIN_SIZE
|
||||||
|
(training, test) = (dataset.take(TRAIN_SIZE).repeat(), dataset.skip(TRAIN_SIZE).repeat())
|
||||||
|
|
||||||
|
model = keras.Sequential([
|
||||||
|
keras.layers.Dense(4, activation=tf.nn.relu, input_shape=[features]),
|
||||||
|
keras.layers.Dense(output_size)
|
||||||
|
])
|
||||||
|
|
||||||
|
model.load_weights(filepath)
|
||||||
|
|
||||||
|
optimizer = tf.train.AdamOptimizer(LEARNING_RATE)
|
||||||
|
|
||||||
|
model.compile(loss='mse',
|
||||||
|
optimizer=optimizer,
|
||||||
|
metrics=['mae', 'accuracy'])
|
||||||
|
|
||||||
|
model.summary()
|
||||||
|
|
||||||
|
checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='acc', verbose=1, mode='max')
|
||||||
|
|
||||||
|
model.fit(
|
||||||
|
training,
|
||||||
|
batch_size=BATCH_SIZE,
|
||||||
|
epochs=EPOCHS,
|
||||||
|
steps_per_epoch=int(dataset_size / BATCH_SIZE),
|
||||||
|
callbacks=[checkpoint],
|
||||||
|
verbose=1
|
||||||
|
)
|
||||||
|
|
||||||
|
evaluation = model.evaluate(
|
||||||
|
test,
|
||||||
|
batch_size=BATCH_SIZE,
|
||||||
|
steps=int(dataset_size / BATCH_SIZE),
|
||||||
|
verbose=1
|
||||||
|
)
|
||||||
|
|
||||||
|
print(evaluation)
|
||||||
|
|
||||||
|
# 850 epochs so far
|
||||||
|
def train_model_b():
|
||||||
|
filepath = filepath="checkpoints/b.hdf5"
|
||||||
|
|
||||||
|
BATCH_SIZE = 100
|
||||||
|
SHUFFLE_BUFFER_SIZE = 500
|
||||||
|
LEARNING_RATE = 0.0005
|
||||||
|
EPOCHS = 400
|
||||||
|
|
||||||
|
# dataset = dataframe_to_dataset_biomes(df)
|
||||||
|
dataset_size, features, output_size, dataset = dataframe_to_dataset_biomes(df)
|
||||||
|
dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE)
|
||||||
|
TRAIN_SIZE = dataset_size * 0.85
|
||||||
|
TEST_SIZE = dataset_size - TRAIN_SIZE
|
||||||
|
(training, test) = (dataset.take(TRAIN_SIZE).batch(BATCH_SIZE).repeat(), dataset.skip(TRAIN_SIZE).batch(BATCH_SIZE).repeat())
|
||||||
|
|
||||||
|
model = keras.Sequential([
|
||||||
|
keras.layers.Dense(64, activation=tf.nn.relu, input_shape=[features]),
|
||||||
|
keras.layers.Dense(128, activation=tf.nn.relu),
|
||||||
|
keras.layers.Dense(output_size, activation=tf.nn.softmax)
|
||||||
|
])
|
||||||
|
|
||||||
|
model.load_weights(filepath)
|
||||||
|
|
||||||
|
optimizer = tf.train.AdamOptimizer(LEARNING_RATE)
|
||||||
|
|
||||||
|
model.compile(loss='sparse_categorical_crossentropy',
|
||||||
|
optimizer=optimizer,
|
||||||
|
metrics=['accuracy'])
|
||||||
|
|
||||||
|
model.summary()
|
||||||
|
|
||||||
|
checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='acc', verbose=1, mode='max')
|
||||||
|
|
||||||
|
model.fit(
|
||||||
|
training,
|
||||||
|
epochs=EPOCHS,
|
||||||
|
verbose=1,
|
||||||
|
steps_per_epoch=int(dataset_size / BATCH_SIZE),
|
||||||
|
callbacks=[checkpoint]
|
||||||
|
)
|
||||||
|
# print(dataset.repeat().make_one_shot_iteraor().get_next())
|
||||||
|
|
||||||
|
# inp, out = test.make_one_shot_iterator().get_next()
|
||||||
|
# print(inp, out)
|
||||||
|
# print(np.argmax(model.predict(inp), axis=1))
|
||||||
|
|
||||||
|
evaluation = model.evaluate(
|
||||||
|
test,
|
||||||
|
batch_size=BATCH_SIZE,
|
||||||
|
steps=int(dataset_size / BATCH_SIZE),
|
||||||
|
verbose=1
|
||||||
|
)
|
||||||
|
|
||||||
|
print('loss: {}, accuracy: {}'.format(*evaluation))
|
||||||
|
|
||||||
|
# train_model_a()
|
||||||
|
train_model_b()
|
||||||
|
|
||||||
|
# train_model_a()
|
6
utils.py
6
utils.py
@ -49,9 +49,9 @@ def dataframe_to_dataset_biomes(df):
|
|||||||
tf_output = np.concatenate((tf_output, df[output].values), axis=0)
|
tf_output = np.concatenate((tf_output, df[output].values), axis=0)
|
||||||
|
|
||||||
tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32)
|
tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32)
|
||||||
tf_output = tf.cast(normalize_ndarray(tf_output), tf.int32)
|
tf_output = tf.cast(tf_output, tf.int64)
|
||||||
|
|
||||||
return int(tf_inputs.shape[0]), 5, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
|
return int(tf_inputs.shape[0]), 11, 14, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
|
||||||
|
|
||||||
def dataframe_to_dataset_temp_precip(df):
|
def dataframe_to_dataset_temp_precip(df):
|
||||||
rows = df.shape[0]
|
rows = df.shape[0]
|
||||||
@ -81,5 +81,5 @@ def dataframe_to_dataset_temp_precip(df):
|
|||||||
tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32)
|
tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32)
|
||||||
tf_output = tf.cast(normalize_ndarray(tf_output), tf.float32)
|
tf_output = tf.cast(normalize_ndarray(tf_output), tf.float32)
|
||||||
|
|
||||||
return int(tf_inputs.shape[0]), 5, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
|
return int(tf_inputs.shape[0]), 5, 2, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user