diff --git a/draw.py b/draw.py index 8965ff3..c55a00e 100644 --- a/draw.py +++ b/draw.py @@ -4,38 +4,52 @@ import matplotlib.pyplot as plt import pandas as pd import cartopy.crs as ccrs -df = pd.read_pickle('data_final.p') +def draw(df): + biomes = {} + biome_numbers = df['biome_num'].unique() + # biome_names = df['biome_name'].unique() -biomes = {} -biome_numbers = df['biome_num'].unique() -for n in biome_numbers: - biomes[n] = [] + for (longitude, latitude), row in df.iterrows(): + p = Point(longitude, latitude) + if row.biome_num in biomes: + biomes[row.biome_num].append(p) + else: + biomes[row.biome_num] = [p] -for (longitude, latitude), row in df.iterrows(): - biomes[row.biome_num].append(Point(longitude, latitude)) + ax = plt.axes(projection=ccrs.PlateCarree()) + ax.stock_img() + # ax.legend(df['biome_name'].unique()) -ax = plt.axes(projection=ccrs.PlateCarree()) -ax.stock_img() + colors={ + 0: '#016936', + 1: '#B2D127', + 2: '#77CC00', + 3: '#99C500', + 4: '#B6CC00', + 5: '#00C5B5', + 6: '#EFFF00', + 7: '#FFEE00', + 8: '#009BFF', + 9: '#A0ADBA', + 10: '#5C62FF', + 11: '#00850F', + 12: '#FF9E1F', + 13: '#FF1F97' + } -colors={ - 0: '#016936', - 1: '#B2D127', - 2: '#77CC00', - 3: '#99C500', - 4: '#B6CC00', - 5: '#00C5B5', - 6: '#EFFF00', - 7: '#FFEE00', - 8: '#009BFF', - 9: '#A0ADBA', - 10: '#5C62FF', - 11: '#00850F', - 12: '#FF9E1F', - 13: '#FF1F97' -} + for n in biome_numbers: + biomes[n] = MultiPoint(biomes[n]).buffer(1) + # print(biomes[n]) + # legend = biome_names[n] + if not hasattr(biomes[n], '__iter__'): + biomes[n] = [biomes[n]] + ax.add_geometries(biomes[n], ccrs.PlateCarree(), facecolor=colors[n]) + # artist.set_label(biome_names[n]) + # print(artist.get_label()) -for n in biome_numbers: - biomes[n] = MultiPoint(biomes[n]).buffer(1) - ax.add_geometries(biomes[n], ccrs.PlateCarree(), facecolor=colors[n]) + # ax.legend(artists, biome_names) + plt.show() -plt.show() +if __name__ == "__main__": + df = pd.read_pickle('data_final.p') + draw(df) diff --git a/nn.py b/nn.py new file mode 100644 index 0000000..4c1a47a --- /dev/null +++ b/nn.py @@ -0,0 +1,112 @@ +from __future__ import absolute_import, division, print_function + +# TensorFlow and tf.keras +import tensorflow as tf +from tensorflow import keras + +# Helper libraries +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd +import os.path + +from utils import * + +RANDOM_SEED = 1 + +tf.enable_eager_execution() + +tf.set_random_seed(RANDOM_SEED) +np.random.seed(RANDOM_SEED) + +df = pd.read_pickle('data_final.p') + +class Model(): + def __init__(self, name, batch_size=100, shuffle_buffer_size=500, learning_rate=0.001, epochs=1): + self.name = name + self.path = "checkpoints/{}.hdf5".format(name) + + self.batch_size = batch_size + self.shuffle_buffer_size = shuffle_buffer_size + self.learning_rate = learning_rate + self.epochs = epochs + + def prepare_dataset(self, df, fn): + self.dataset_fn = fn + dataset_size, features, output_size, dataset = fn(df) + self.dataset = dataset.shuffle(self.shuffle_buffer_size) + self.TRAIN_SIZE = int(dataset_size * 0.85) + self.TEST_SIZE = dataset_size - self.TRAIN_SIZE + (training, test) = (self.dataset.take(self.TRAIN_SIZE).batch(self.batch_size).repeat(), + self.dataset.skip(self.TRAIN_SIZE).batch(self.batch_size).repeat()) + + self.dataset_size = dataset_size + self.features = features + self.output_size = output_size + self.training = training + self.test = test + + def create_model(self, layers): + self.model = keras.Sequential([ + keras.layers.Dense(layers[0], activation=tf.nn.relu, input_shape=[self.features]) + ] + [ + keras.layers.Dense(n, activation=tf.nn.relu) for n in layers[1:] + ] + [ + keras.layers.Dense(self.output_size) + ]) + + def compile(self): + self.model.load_weights(self.path) + optimizer = tf.train.AdamOptimizer(self.learning_rate) + + self.model.compile(loss='mse', + optimizer=optimizer, + metrics=['mae', 'accuracy']) + + def evaluate(self): + return self.model.evaluate( + self.test, + batch_size=self.batch_size, + steps=int(self.dataset_size / self.batch_size), + verbose=1 + ) + + def train(self): + self.model.summary() + + checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='acc', verbose=1, mode='max') + + self.model.fit( + self.training, + batch_size=self.batch_size, + epochs=self.epochs, + steps_per_epoch=int(self.dataset_size / self.batch_size), + callbacks=[checkpoint], + verbose=1 + ) + + def predict(self, a): + return np.argmax(self.model.predict(a), axis=1) + +A = Model('a', batch_size=100, shuffle_buffer_size=500, learning_rate=0.001, epochs=2) +B = Model('b', batch_size=100, shuffle_buffer_size=500, learning_rate=0.001, epochs=850) + +if __name__ == "__main__": + B.prepare_dataset(df, dataframe_to_dataset_biomes) + B.create_model([64, 128]) + B.compile() + + # for inp, out in B.test.take(1).make_one_shot_iterator(): + # print(inp, out) + + # print(np.unique(nums)) + # print(np.unique(predictions)) + + print('loss: {}, evaluation: {}'.format(*B.evaluate())) + + # B.train() + + A.prepare_dataset(df, dataframe_to_dataset_temp_precip) + A.create_model([4]) + A.compile() + # A.train() diff --git a/train.py b/train.py deleted file mode 100644 index 83d3082..0000000 --- a/train.py +++ /dev/null @@ -1,134 +0,0 @@ -from __future__ import absolute_import, division, print_function - -# TensorFlow and tf.keras -import tensorflow as tf -from tensorflow import keras - -# Helper libraries -import numpy as np -import matplotlib.pyplot as plt -import pandas as pd -import os.path - -from utils import * - -RANDOM_SEED = 1 - -tf.enable_eager_execution() - -tf.set_random_seed(RANDOM_SEED) -np.random.seed(RANDOM_SEED) - -df = pd.read_pickle('data_final.p') - -# temp and precipitation -def train_model_a(): - filepath = "checkpoints/a.hdf5" - - BATCH_SIZE = 100 - SHUFFLE_BUFFER_SIZE = 500 - LEARNING_RATE = 0.001 - EPOCHS = 2 - - # dataset = dataframe_to_dataset_biomes(df) - dataset_size, features, output_size, dataset = dataframe_to_dataset_temp_precip(df) - dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE) - TRAIN_SIZE = dataset_size * 0.85 - TEST_SIZE = dataset_size - TRAIN_SIZE - (training, test) = (dataset.take(TRAIN_SIZE).repeat(), dataset.skip(TRAIN_SIZE).repeat()) - - model = keras.Sequential([ - keras.layers.Dense(4, activation=tf.nn.relu, input_shape=[features]), - keras.layers.Dense(output_size) - ]) - - model.load_weights(filepath) - - optimizer = tf.train.AdamOptimizer(LEARNING_RATE) - - model.compile(loss='mse', - optimizer=optimizer, - metrics=['mae', 'accuracy']) - - model.summary() - - checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='acc', verbose=1, mode='max') - - model.fit( - training, - batch_size=BATCH_SIZE, - epochs=EPOCHS, - steps_per_epoch=int(dataset_size / BATCH_SIZE), - callbacks=[checkpoint], - verbose=1 - ) - - evaluation = model.evaluate( - test, - batch_size=BATCH_SIZE, - steps=int(dataset_size / BATCH_SIZE), - verbose=1 - ) - - print(evaluation) - -# 850 epochs so far -def train_model_b(): - filepath = filepath="checkpoints/b.hdf5" - - BATCH_SIZE = 100 - SHUFFLE_BUFFER_SIZE = 500 - LEARNING_RATE = 0.0005 - EPOCHS = 400 - - # dataset = dataframe_to_dataset_biomes(df) - dataset_size, features, output_size, dataset = dataframe_to_dataset_biomes(df) - dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE) - TRAIN_SIZE = dataset_size * 0.85 - TEST_SIZE = dataset_size - TRAIN_SIZE - (training, test) = (dataset.take(TRAIN_SIZE).batch(BATCH_SIZE).repeat(), dataset.skip(TRAIN_SIZE).batch(BATCH_SIZE).repeat()) - - model = keras.Sequential([ - keras.layers.Dense(64, activation=tf.nn.relu, input_shape=[features]), - keras.layers.Dense(128, activation=tf.nn.relu), - keras.layers.Dense(output_size, activation=tf.nn.softmax) - ]) - - model.load_weights(filepath) - - optimizer = tf.train.AdamOptimizer(LEARNING_RATE) - - model.compile(loss='sparse_categorical_crossentropy', - optimizer=optimizer, - metrics=['accuracy']) - - model.summary() - - checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='acc', verbose=1, mode='max') - - model.fit( - training, - epochs=EPOCHS, - verbose=1, - steps_per_epoch=int(dataset_size / BATCH_SIZE), - callbacks=[checkpoint] - ) - # print(dataset.repeat().make_one_shot_iteraor().get_next()) - - # inp, out = test.make_one_shot_iterator().get_next() - # print(inp, out) - # print(np.argmax(model.predict(inp), axis=1)) - - evaluation = model.evaluate( - test, - batch_size=BATCH_SIZE, - steps=int(dataset_size / BATCH_SIZE), - verbose=1 - ) - - print('loss: {}, accuracy: {}'.format(*evaluation)) - -# train_model_a() -train_model_b() - -# train_model_a() diff --git a/utils.py b/utils.py index 11c1a3b..2b4edfe 100644 --- a/utils.py +++ b/utils.py @@ -18,7 +18,7 @@ def normalize_ndarray(ar): def normalize_df(df): for col in df.columns: - df[col] = normalize(df[col]) + df.loc[col] = normalize(df[col]) return df diff --git a/various_temps.py b/various_temps.py new file mode 100644 index 0000000..3642402 --- /dev/null +++ b/various_temps.py @@ -0,0 +1,65 @@ +import numpy as np + +from utils import * +from nn import B +from draw import draw +import time + +def chunker(seq, size): + return (seq[pos:pos + size] for pos in range(0, len(seq), size)) + +year = MAX_YEAR - 1 + +df = pd.read_pickle('data_final.p') +latitude = np.array(df.index.get_level_values(1)) +df.loc[:, 'latitude'] = pd.Series(latitude, index=df.index) + +B.prepare_dataset(df, dataframe_to_dataset_biomes) +B.create_model([64, 128]) +B.compile() + +for change in range(-5, 6): + print('TEMPERATURE MODIFICATION OF {}'.format(change)) + + inputs = ['elevation', 'distance_to_water'] + + for season in SEASONS: + inputs += [ + 'temp_{}_{}'.format(season, year), + 'precip_{}_{}'.format(season, year) + ] + + inputs += ['latitude'] + + frame = df[inputs] + print(frame.head()) + + # for season in SEASONS: + # frame.loc[:, 'temp_{}_{}'.format(season, year)] += change + + # print(np.average(frame.loc[:, 'temp_winter_2016'])) + + # index = [] + # for longitude in range(-179, 179): + # for latitude in range(-89, 89): + # index.append((longitude, latitude)) + + columns = ['biome_num'] + new_data = pd.DataFrame(columns=columns) + for i, chunk in enumerate(chunker(frame, B.batch_size)): + input_data = normalize_ndarray(chunk.values) + out = B.predict(input_data) + new_index = np.concatenate((chunk.index.values, new_data.index.values)) + + new_data = new_data.reindex(new_index) + new_data.loc[chunk.index.values, 'biome_num'] = out + + # print(new_data['biome_num'].unique()) + + draw(new_data) + + # columns = ['biome_num'] + # indices = ['longitude', 'latitude'] + # new_df = pd.DataFrame(index=indices, columns=columns) + # new_df = +