diff --git a/biomes/plot.py b/biomes/plot.py new file mode 100644 index 0000000..7d1a5e1 --- /dev/null +++ b/biomes/plot.py @@ -0,0 +1,29 @@ +from utils import * +import tensorflow as tf +import pandas as pd +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import Axes3D + +tf.enable_eager_execution() + +df = pd.read_pickle('data.p') +_, columns, _, _, dataset = dataframe_to_dataset_temp_precip(df) + +xs = np.empty((3, 100)) +ys = np.empty((100)) + +for i, (inp, out) in enumerate(dataset.take(100)): + xs[0][i] = float(inp[0]) + xs[1][i] = float(inp[1]) + xs[2][i] = float(inp[2]) + ys[i] = float(out[0]) + +print(xs, ys) +fig = plt.figure() +ax = fig.add_subplot(1, 1, 1) +ax.scatter(xs[0], ys, c='red', label='elevation') +ax.scatter(xs[1], ys, c='blue', label='distance_to_water') +ax.scatter(xs[2], ys, c='green', label='latitude') +#ax.scatter(xs2, 0, zs=0, c='blue') + +plt.show() diff --git a/biomes/predict.py b/biomes/predict.py index e06cef7..bdce8e0 100644 --- a/biomes/predict.py +++ b/biomes/predict.py @@ -59,49 +59,83 @@ def predicted_temps(A, year=2000): df = pd.read_pickle('data.p') - print(columns) - # print(df[0:A.batch_size]) inputs = df[INPUTS] all_temps = ['temp_{}_{}'.format(season, year) for season in SEASONS] - all_precips = ['precip_{}_{}'.format(season, year) for season in SEASONS] inputs.loc[:, 'mean_temp'] = np.mean(df[all_temps].values) + + inputs = inputs.to_numpy() + inputs = normalize_ndarray(inputs) + print(inputs[0:A.batch_size]) + + out_columns = all_temps # + all_precips + print(out_columns) + + out = A.predict(inputs) + actual_output = df[out_columns][0:A.batch_size] + model_output = pd.DataFrame(data=denormalize(out, df[out_columns].to_numpy()), columns=out_columns)[0:A.batch_size] + print(actual_output) + print(model_output) + +def predicted_precips(A, year=2000): + columns = INPUTS + + df = pd.read_pickle('data.p') + + inputs = df[INPUTS] + + all_precips = ['precip_{}_{}'.format(season, year) for season in SEASONS] inputs.loc[:, 'mean_precip'] = np.mean(df[all_precips].values) inputs = inputs.to_numpy() inputs = normalize_ndarray(inputs) print(inputs[0:A.batch_size]) - out_columns = all_temps + all_precips + out_columns = all_precips print(out_columns) out = A.predict(inputs) - # print(out.shape, out[0].shape) - # print(out) - # print(out[0]) - print(normalize_ndarray(df[out_columns])[0:A.batch_size]) - print(pd.DataFrame(data=out, columns=out_columns)) - # print(df[out_columns][0:A.batch_size]) - # print(pd.DataFrame(data=denormalize(out, df[out_columns].to_numpy()), columns=out_columns)) + actual_output = df[out_columns][0:A.batch_size] + model_output = pd.DataFrame(data=denormalize(out, df[out_columns].to_numpy()), columns=out_columns)[0:A.batch_size] + print(actual_output) + print(model_output) -def predicted_temps_cmd(checkpoint='checkpoints/a.h5', year=2000): +def predicted_temps_cmd(checkpoint='checkpoints/temp.h5', year=2000): batch_size = A_params['batch_size']['grid_search'][0] layers = A_params['layers']['grid_search'][0] optimizer = A_params['optimizer']['grid_search'][0](A_params['lr']['grid_search'][0]) - A = Model('a', epochs=1) - A.prepare_for_use( + Temp = Model('temp', epochs=1) + Temp.prepare_for_use( batch_size=batch_size, layers=layers, - dataset_fn=dataframe_to_dataset_temp_precip, + dataset_fn=dataframe_to_dataset_temp, optimizer=optimizer, out_activation=None, loss='mse', metrics=['mae'] ) - A.restore(checkpoint) - predicted_temps(A, year=year) + Temp.restore(checkpoint) + predicted_temps(Temp, year=year) + +def predicted_precips_cmd(checkpoint='checkpoints/precip.h5', year=2000): + batch_size = A_params['batch_size']['grid_search'][0] + layers = A_params['layers']['grid_search'][0] + optimizer = A_params['optimizer']['grid_search'][0](A_params['lr']['grid_search'][0]) + + Precip = Model('precip', epochs=1) + Precip.prepare_for_use( + batch_size=batch_size, + layers=layers, + dataset_fn=dataframe_to_dataset_temp, + optimizer=optimizer, + out_activation=None, + loss='mse', + metrics=['mae'] + ) + Precip.restore(checkpoint) + predicted_precips(Precip, year=year) if __name__ == "__main__": - fire.Fire({ 'map': predicted_map_cmd, 'temp': predicted_temps_cmd }) + fire.Fire({ 'map': predicted_map_cmd, 'temp': predicted_temps_cmd, 'precip': predicted_precips_cmd }) diff --git a/biomes/train.py b/biomes/train.py index 188a838..4f9b43d 100644 --- a/biomes/train.py +++ b/biomes/train.py @@ -53,11 +53,11 @@ A_params = { #'optimizer': tune.grid_search([tf.keras.optimizers.RMSprop]) } -class TuneA(tune.Trainable): +class TuneTemp(tune.Trainable): def _setup(self, config): logger.debug('Ray Tune model configuration %s', config) - self.model = Model('a', epochs=1) + self.model = Model('temp', epochs=1) optimizer = config['optimizer'] optimizer = config['optimizer'](lr=config['lr']) @@ -68,7 +68,46 @@ class TuneA(tune.Trainable): layers=config['layers'], optimizer=optimizer, out_activation=None, - dataset_fn=dataframe_to_dataset_temp_precip, + dataset_fn=dataframe_to_dataset_temp, + loss='mse', + metrics=['mae'] + ) + + def _train(self): + logs = self.model.train(self.config) + + print(logs.history) + metrics = { + 'loss': logs.history['loss'][0], + 'mae': logs.history['mean_absolute_error'][0], + 'val_loss': logs.history['val_loss'][0], + 'val_mae': logs.history['val_mean_absolute_error'][0], + } + + return metrics + + def _save(self, checkpoint_dir): + return self.model.save(checkpoint_dir) + + def _restore(self, path): + return self.model.restore(path) + +class TunePrecip(tune.Trainable): + def _setup(self, config): + logger.debug('Ray Tune model configuration %s', config) + + self.model = Model('precip', epochs=1) + + optimizer = config['optimizer'] + optimizer = config['optimizer'](lr=config['lr']) + + self.model.prepare_for_use( + df=df, + batch_size=config['batch_size'], + layers=config['layers'], + optimizer=optimizer, + out_activation=None, + dataset_fn=dataframe_to_dataset_precip, loss='mse', metrics=['mae'] ) @@ -95,8 +134,11 @@ class TuneA(tune.Trainable): def start_tuning(model, cpu=1, gpu=2, checkpoint_freq=1, checkpoint_at_end=True, resume=False, restore=None, stop=500): ray.init() - if model == 'a': - t = TuneA + if model == 'temp': + t = TuneTemp + params = A_params + elif model == 'precip': + t = TunePrecip params = A_params else: t = TuneB @@ -112,6 +154,7 @@ def start_tuning(model, cpu=1, gpu=2, checkpoint_freq=1, checkpoint_at_end=True, checkpoint_at_end=checkpoint_at_end, checkpoint_freq=checkpoint_freq, restore=restore, + max_failures=-1, stop={ 'training_iteration': stop }) diff --git a/biomes/utils.py b/biomes/utils.py index 2414140..8d8059d 100644 --- a/biomes/utils.py +++ b/biomes/utils.py @@ -76,13 +76,13 @@ def dataframe_to_dataset_biomes(df): logger.debug('dataset size: rows=%d, input_columns=%d, num_classes=%d', int(tf_inputs.shape[0]), input_columns, num_classes) return int(tf_inputs.shape[0]), input_columns, num_classes, class_weights, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output)) -def dataframe_to_dataset_temp_precip(df): +def dataframe_to_dataset_temp(df): rows = df.shape[0] - # elevation, distance_to_water, latitude, mean_temp, mean_precip - input_columns = 5 - # (temp, precip) * 4 seasons - num_classes = 8 + # elevation, distance_to_water, latitude, mean_temp + input_columns = 4 + # 4 seasons + num_classes = 4 tf_inputs = np.empty((0, input_columns)) tf_output = np.empty((0, num_classes)) @@ -91,11 +91,37 @@ def dataframe_to_dataset_temp_precip(df): local_inputs = list(INPUTS) local_df = df[local_inputs] all_temps = ['temp_{}_{}'.format(season, year) for season in SEASONS] - all_precips = ['precip_{}_{}'.format(season, year) for season in SEASONS] local_df.loc[:, 'mean_temp'] = np.mean(df[all_temps].values) + + output = all_temps + + tf_inputs = np.concatenate((tf_inputs, local_df.values), axis=0) + tf_output = np.concatenate((tf_output, df[output].values), axis=0) + + tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32) + tf_output = tf.cast(normalize_ndarray(tf_output), tf.float32) + + logger.debug('dataset size: rows=%d, input_columns=%d, num_classes=%d', int(tf_inputs.shape[0]), input_columns, num_classes) + return int(tf_inputs.shape[0]), input_columns, num_classes, None, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output)) + +def dataframe_to_dataset_precip(df): + rows = df.shape[0] + + # elevation, distance_to_water, latitude, mean_precip + input_columns = 4 + # 4 seasons + num_classes = 4 + + tf_inputs = np.empty((0, input_columns)) + tf_output = np.empty((0, num_classes)) + + for year in range(MIN_YEAR, MAX_YEAR + 1): + local_inputs = list(INPUTS) + local_df = df[local_inputs] + all_precips = ['precip_{}_{}'.format(season, year) for season in SEASONS] local_df.loc[:, 'mean_precip'] = np.mean(df[all_precips].values) - output = all_temps + all_precips + output = all_precips tf_inputs = np.concatenate((tf_inputs, local_df.values), axis=0) tf_output = np.concatenate((tf_output, df[output].values), axis=0)