diff --git a/.gitignore b/.gitignore
index 0361c94..e2327e5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+checkpoints.*
 geodata
 *.p
 #### joe made this: http://goel.io/joe
diff --git a/checkpoints/a.hdf5 b/checkpoints/a.hdf5
new file mode 100644
index 0000000..f016d67
Binary files /dev/null and b/checkpoints/a.hdf5 differ
diff --git a/checkpoints/b.hdf5 b/checkpoints/b.hdf5
new file mode 100644
index 0000000..fb5b7d1
Binary files /dev/null and b/checkpoints/b.hdf5 differ
diff --git a/nn.py b/nn.py
deleted file mode 100644
index 0e8be7a..0000000
--- a/nn.py
+++ /dev/null
@@ -1,60 +0,0 @@
-from __future__ import absolute_import, division, print_function
-
-# TensorFlow and tf.keras
-import tensorflow as tf
-from tensorflow import keras
-
-# Helper libraries
-import numpy as np
-import matplotlib.pyplot as plt
-import pandas as pd
-
-from utils import *
-
-tf.enable_eager_execution()
-
-df = pd.read_pickle('data_final.p')
-# print(df.head())
-
-BATCH_SIZE = 15
-SHUFFLE_BUFFER_SIZE = 100
-LEARNING_RATE = 0.001
-
-# dataset = dataframe_to_dataset_biomes(df)
-dataset_size, features, dataset = dataframe_to_dataset_temp_precip(df)
-print(dataset_size)
-dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE).repeat()
-TRAIN_SIZE = dataset_size * 0.85
-TEST_SIZE = dataset_size - TRAIN_SIZE
-(training, test) = (dataset.take(TRAIN_SIZE), dataset.skip(TRAIN_SIZE))
-
-print(training.make_one_shot_iterator().get_next())
-
-model = keras.Sequential([
-    keras.layers.Dense(32, activation=tf.nn.relu, input_shape=[features]),
-    keras.layers.Dense(32, activation=tf.nn.relu),
-    keras.layers.Dense(2)
-])
-
-optimizer = tf.train.AdamOptimizer(LEARNING_RATE)
-
-model.compile(loss='mse',
-              optimizer=optimizer,
-              metrics=['mae'])
-
-model.summary()
-
-EPOCHS = 1000
-
-history = model.fit(
-    training,
-    epochs=EPOCHS,
-    verbose=1,
-    steps_per_epoch=int(dataset_size / BATCH_SIZE)
-)
-
-# i = 0
-# for feature, target in dataset:
-    # print('{} => {}'.format(feature, target))
-
-print(tf.__version__)
diff --git a/train.py b/train.py
new file mode 100644
index 0000000..83d3082
--- /dev/null
+++ b/train.py
@@ -0,0 +1,134 @@
+from __future__ import absolute_import, division, print_function
+
+# TensorFlow and tf.keras
+import tensorflow as tf
+from tensorflow import keras
+
+# Helper libraries
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import os.path
+
+from utils import *
+
+RANDOM_SEED = 1
+
+tf.enable_eager_execution()
+
+tf.set_random_seed(RANDOM_SEED)
+np.random.seed(RANDOM_SEED)
+
+df = pd.read_pickle('data_final.p')
+
+# temp and precipitation
+def train_model_a():
+    filepath = "checkpoints/a.hdf5"
+
+    BATCH_SIZE = 100
+    SHUFFLE_BUFFER_SIZE = 500
+    LEARNING_RATE = 0.001
+    EPOCHS = 2
+
+    # dataset = dataframe_to_dataset_biomes(df)
+    dataset_size, features, output_size, dataset = dataframe_to_dataset_temp_precip(df)
+    dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
+    TRAIN_SIZE = dataset_size * 0.85
+    TEST_SIZE = dataset_size - TRAIN_SIZE
+    (training, test) = (dataset.take(TRAIN_SIZE).repeat(), dataset.skip(TRAIN_SIZE).repeat())
+
+    model = keras.Sequential([
+        keras.layers.Dense(4, activation=tf.nn.relu, input_shape=[features]),
+        keras.layers.Dense(output_size)
+    ])
+
+    model.load_weights(filepath)
+
+    optimizer = tf.train.AdamOptimizer(LEARNING_RATE)
+
+    model.compile(loss='mse',
+                optimizer=optimizer,
+                metrics=['mae', 'accuracy'])
+
+    model.summary()
+
+    checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='acc', verbose=1, mode='max')
+
+    model.fit(
+        training,
+        batch_size=BATCH_SIZE,
+        epochs=EPOCHS,
+        steps_per_epoch=int(dataset_size / BATCH_SIZE),
+        callbacks=[checkpoint],
+        verbose=1
+    )
+
+    evaluation = model.evaluate(
+        test,
+        batch_size=BATCH_SIZE,
+        steps=int(dataset_size / BATCH_SIZE),
+        verbose=1
+    )
+
+    print(evaluation)
+
+# 850 epochs so far
+def train_model_b():
+    filepath = filepath="checkpoints/b.hdf5"
+
+    BATCH_SIZE = 100
+    SHUFFLE_BUFFER_SIZE = 500
+    LEARNING_RATE = 0.0005
+    EPOCHS = 400
+
+    # dataset = dataframe_to_dataset_biomes(df)
+    dataset_size, features, output_size, dataset = dataframe_to_dataset_biomes(df)
+    dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE)
+    TRAIN_SIZE = dataset_size * 0.85
+    TEST_SIZE = dataset_size - TRAIN_SIZE
+    (training, test) = (dataset.take(TRAIN_SIZE).batch(BATCH_SIZE).repeat(), dataset.skip(TRAIN_SIZE).batch(BATCH_SIZE).repeat())
+
+    model = keras.Sequential([
+        keras.layers.Dense(64, activation=tf.nn.relu, input_shape=[features]),
+        keras.layers.Dense(128, activation=tf.nn.relu),
+        keras.layers.Dense(output_size, activation=tf.nn.softmax)
+    ])
+
+    model.load_weights(filepath)
+
+    optimizer = tf.train.AdamOptimizer(LEARNING_RATE)
+
+    model.compile(loss='sparse_categorical_crossentropy',
+                  optimizer=optimizer,
+                  metrics=['accuracy'])
+
+    model.summary()
+
+    checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='acc', verbose=1, mode='max')
+
+    model.fit(
+        training,
+        epochs=EPOCHS,
+        verbose=1,
+        steps_per_epoch=int(dataset_size / BATCH_SIZE),
+        callbacks=[checkpoint]
+    )
+    # print(dataset.repeat().make_one_shot_iteraor().get_next())
+
+    # inp, out = test.make_one_shot_iterator().get_next()
+    # print(inp, out)
+    # print(np.argmax(model.predict(inp), axis=1))
+
+    evaluation = model.evaluate(
+        test,
+        batch_size=BATCH_SIZE,
+        steps=int(dataset_size / BATCH_SIZE),
+        verbose=1
+    )
+
+    print('loss: {}, accuracy: {}'.format(*evaluation))
+
+# train_model_a()
+train_model_b()
+
+# train_model_a()
diff --git a/utils.py b/utils.py
index 99393ba..11c1a3b 100644
--- a/utils.py
+++ b/utils.py
@@ -49,9 +49,9 @@ def dataframe_to_dataset_biomes(df):
         tf_output = np.concatenate((tf_output, df[output].values), axis=0)
 
     tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32)
-    tf_output = tf.cast(normalize_ndarray(tf_output), tf.int32)
+    tf_output = tf.cast(tf_output, tf.int64)
 
-    return int(tf_inputs.shape[0]), 5, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
+    return int(tf_inputs.shape[0]), 11, 14, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
 
 def dataframe_to_dataset_temp_precip(df):
     rows = df.shape[0]
@@ -81,5 +81,5 @@ def dataframe_to_dataset_temp_precip(df):
     tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32)
     tf_output = tf.cast(normalize_ndarray(tf_output), tf.float32)
 
-    return int(tf_inputs.shape[0]), 5, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
+    return int(tf_inputs.shape[0]), 5, 2, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))