refactor: working version with command-line utilities
This commit is contained in:
parent
fe3f539d7d
commit
e3e3fecf4d
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,6 +1,6 @@
|
|||||||
maps
|
maps
|
||||||
logs
|
logs
|
||||||
checkpoints.*
|
checkpoints
|
||||||
geodata
|
geodata
|
||||||
*.p
|
*.p
|
||||||
#### joe made this: http://goel.io/joe
|
#### joe made this: http://goel.io/joe
|
||||||
|
@ -2,5 +2,7 @@
|
|||||||
pyenv install $(cat .python-version)
|
pyenv install $(cat .python-version)
|
||||||
pyenv local
|
pyenv local
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
|
apt install proj-bin libproj-dev # https://proj4.org/install.html#install
|
||||||
|
apt install libgeos-3.6.2 libgeos-dev libgeos++-dev # https://packages.ubuntu.com/search?keywords=geos&searchon=sourcenames&suite=all§ion=all
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Binary file not shown.
Binary file not shown.
63
constants.py
63
constants.py
@ -16,3 +16,66 @@ WINTER_MONTHS = ['december', 'january', 'february']
|
|||||||
SPRING_MONTHS = ['march', 'april', 'may']
|
SPRING_MONTHS = ['march', 'april', 'may']
|
||||||
SUMMER_MONTHS = ['june', 'july', 'august']
|
SUMMER_MONTHS = ['june', 'july', 'august']
|
||||||
AUTUMN_MONTHS = ['september', 'november', 'october']
|
AUTUMN_MONTHS = ['september', 'november', 'october']
|
||||||
|
|
||||||
|
INPUTS = ['elevation', 'distance_to_water', 'latitude']
|
||||||
|
OUTPUT = 'biome_num'
|
||||||
|
|
||||||
|
BIOMES = [
|
||||||
|
{
|
||||||
|
'name': 'Tropical & Subtropical Moist Broadleaf Forests',
|
||||||
|
'color': '#016936',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'name': 'Tropical & Subtropical Dry Broadleaf Forests',
|
||||||
|
'color': '#B2D127',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'name': 'Tropical & Subtropical Coniferous Forests',
|
||||||
|
'color': '#77CC00',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'name': 'Temperate Broadleaf & Mixed Forests',
|
||||||
|
'color': '#99C500',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'name': 'Temperate Conifer Forests',
|
||||||
|
'color': '#B6CC00',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'name': 'Boreal Forests/Taiga',
|
||||||
|
'color': '#00C5B5',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'name': 'Tropical & Subtropical Grasslands, Savannas & Shrublands',
|
||||||
|
'color': '#EFFF00',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'name': 'Temperate Grasslands, Savannas & Shrublands',
|
||||||
|
'color': '#FFEE00',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'name': 'Flooded Grasslands & Savannas',
|
||||||
|
'color': '#009BFF',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'name': 'Montane Grasslands & Shrublands',
|
||||||
|
'color': '#A0ADBA',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'name': 'Tundra',
|
||||||
|
'color': '#5C62FF',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'name': 'Mediterranean Forests, Woodlands & Scrub',
|
||||||
|
'color': '#00850F',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'name': 'Deserts & Xeric Shrublands',
|
||||||
|
'color': '#FF9E1F',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'name': 'Mangroves',
|
||||||
|
'color': '#FF1F97'
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
19
demo.py
19
demo.py
@ -1,19 +0,0 @@
|
|||||||
import pandas as pd
|
|
||||||
from utils import *
|
|
||||||
|
|
||||||
df = pd.read_pickle('data_final.p')
|
|
||||||
df.to_csv('data_final.csv')
|
|
||||||
|
|
||||||
print('DataFrame:')
|
|
||||||
print(df)
|
|
||||||
|
|
||||||
dataset_size, features, output_size, _ = dataframe_to_dataset_biomes(df)
|
|
||||||
print('Biomes dataset:\n - size: {}\n - inputs: {}\n - outputs: {}\n'.format(dataset_size, features, output_size))
|
|
||||||
|
|
||||||
dataset_size, features, output_size, _ = dataframe_to_dataset_temp_precip(df)
|
|
||||||
print('Temp/Precip dataset:\n - size: {}\n - inputs: {}\n - outputs: {}\n'.format(dataset_size, features, output_size))
|
|
||||||
|
|
||||||
# print('Normalized Data:')
|
|
||||||
# print(normalize_df(df))
|
|
||||||
|
|
||||||
# normalize_df(df).to_csv('data_normalized.csv')
|
|
52
draw.py
52
draw.py
@ -1,59 +1,43 @@
|
|||||||
from shapely.geometry import Point, MultiPoint
|
import fire
|
||||||
from shapely.ops import cascaded_union
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
from utils import logger
|
||||||
|
from constants import BIOMES
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import cartopy.crs as ccrs
|
import cartopy.crs as ccrs
|
||||||
|
|
||||||
def draw(df, path=None):
|
def draw(df, path=None):
|
||||||
|
logger.debug('draw(df, %s)', path)
|
||||||
biomes = {}
|
biomes = {}
|
||||||
biome_numbers = df['biome_num'].unique()
|
biome_numbers = df['biome_num'].unique()
|
||||||
# biome_names = df['biome_name'].unique()
|
|
||||||
|
|
||||||
for i, row in df.iterrows():
|
for i, row in df.iterrows():
|
||||||
p = Point(row.longitude, row.latitude)
|
|
||||||
if row.biome_num in biomes:
|
if row.biome_num in biomes:
|
||||||
biomes[row.biome_num].append(p)
|
biomes[row.biome_num]['x'].append(row.longitude)
|
||||||
|
biomes[row.biome_num]['y'].append(row.latitude)
|
||||||
else:
|
else:
|
||||||
biomes[row.biome_num] = [p]
|
biomes[row.biome_num] = { 'x': [row.longitude], 'y': [row.latitude] }
|
||||||
|
|
||||||
ax = plt.axes(projection=ccrs.PlateCarree())
|
ax = plt.axes(projection=ccrs.PlateCarree())
|
||||||
ax.stock_img()
|
ax.stock_img()
|
||||||
# ax.legend(df['biome_name'].unique())
|
|
||||||
|
|
||||||
colors={
|
|
||||||
0: '#016936',
|
|
||||||
1: '#B2D127',
|
|
||||||
2: '#77CC00',
|
|
||||||
3: '#99C500',
|
|
||||||
4: '#B6CC00',
|
|
||||||
5: '#00C5B5',
|
|
||||||
6: '#EFFF00',
|
|
||||||
7: '#FFEE00',
|
|
||||||
8: '#009BFF',
|
|
||||||
9: '#A0ADBA',
|
|
||||||
10: '#5C62FF',
|
|
||||||
11: '#00850F',
|
|
||||||
12: '#FF9E1F',
|
|
||||||
13: '#FF1F97'
|
|
||||||
}
|
|
||||||
|
|
||||||
for n in biome_numbers:
|
for n in biome_numbers:
|
||||||
biomes[n] = MultiPoint(biomes[n]).buffer(0.5)
|
xs = biomes[n]['x']
|
||||||
# print(biomes[n])
|
ys = biomes[n]['y']
|
||||||
# legend = biome_names[n]
|
scatter = ax.scatter(xs, ys, s=4, c=BIOMES[n]['color'], transform=ccrs.PlateCarree())
|
||||||
if not hasattr(biomes[n], '__iter__'):
|
scatter.set_label(BIOMES[n]['name'])
|
||||||
biomes[n] = [biomes[n]]
|
|
||||||
ax.add_geometries(biomes[n], ccrs.PlateCarree(), facecolor=colors[n])
|
|
||||||
# artist.set_label(biome_names[n])
|
|
||||||
# print(artist.get_label())
|
|
||||||
|
|
||||||
# ax.legend(artists, biome_names)
|
ax.legend()
|
||||||
|
figure = plt.gcf()
|
||||||
|
figure.set_size_inches(20, 18)
|
||||||
if path:
|
if path:
|
||||||
plt.savefig(path)
|
plt.savefig(path)
|
||||||
else:
|
else:
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
def draw_cmd(path=None):
|
||||||
|
draw(pd.read_pickle('data.p'), path=path)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
df = pd.read_pickle('data.p')
|
fire.Fire(draw_cmd)
|
||||||
draw(df)
|
|
||||||
|
23
floyd.yml
23
floyd.yml
@ -1,23 +0,0 @@
|
|||||||
# see: https://docs.floydhub.com/floyd_config
|
|
||||||
# All supported configs:
|
|
||||||
#
|
|
||||||
#machine: cpu
|
|
||||||
#env: tensorflow-1.8
|
|
||||||
#input:
|
|
||||||
# - destination: input
|
|
||||||
# source: foo/datasets/yelp-food/1
|
|
||||||
# - foo/datasets/yelp-food-test/1:test
|
|
||||||
#description: this is a test
|
|
||||||
#max_runtime: 3600
|
|
||||||
#command: python train.py
|
|
||||||
|
|
||||||
# You can also define multiple tasks to use with --task argument:
|
|
||||||
#
|
|
||||||
#task:
|
|
||||||
# evaluate:
|
|
||||||
# machine: gpu
|
|
||||||
# command: python evaluate.py
|
|
||||||
#
|
|
||||||
# serve:
|
|
||||||
# machine: cpu
|
|
||||||
# mode: serve
|
|
144
model.py
Normal file
144
model.py
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
|
# TensorFlow and tf.keras
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow import keras
|
||||||
|
|
||||||
|
# Helper libraries
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from utils import *
|
||||||
|
|
||||||
|
RANDOM_SEED = 1
|
||||||
|
|
||||||
|
logger.debug('Tensorflow version: %s', tf.__version__)
|
||||||
|
logger.debug('Random Seed: %s', RANDOM_SEED)
|
||||||
|
|
||||||
|
tf.set_random_seed(RANDOM_SEED)
|
||||||
|
np.random.seed(RANDOM_SEED)
|
||||||
|
|
||||||
|
DEFAULT_BATCH_SIZE=256
|
||||||
|
DEFAULT_LAYERS = [512, 512]
|
||||||
|
DEFAULT_BUFFER_SIZE=500
|
||||||
|
DEFAULT_OUT_ACTIVATION = tf.nn.softmax
|
||||||
|
DEFAULT_LOSS = 'sparse_categorical_crossentropy'
|
||||||
|
DEFAULT_OPTIMIZER = tf.keras.optimizers.Adam(lr=0.001)
|
||||||
|
|
||||||
|
class Model():
|
||||||
|
def __init__(self, name, epochs=1):
|
||||||
|
self.name = name
|
||||||
|
self.path = "checkpoints/{}.hdf5".format(name)
|
||||||
|
|
||||||
|
self.epochs = epochs
|
||||||
|
|
||||||
|
def prepare_dataset(self, df, fn, **kwargs):
|
||||||
|
self.dataset_fn = fn
|
||||||
|
|
||||||
|
self.set_dataset(*fn(df), **kwargs)
|
||||||
|
|
||||||
|
def set_dataset(self, dataset_size, features, output_size, class_weight, dataset, shuffle_buffer_size=DEFAULT_BUFFER_SIZE, batch_size=DEFAULT_BATCH_SIZE):
|
||||||
|
self.shuffle_buffer_size = shuffle_buffer_size
|
||||||
|
|
||||||
|
self.class_weight = class_weight
|
||||||
|
self.dataset = dataset.shuffle(self.shuffle_buffer_size)
|
||||||
|
self.TRAIN_SIZE = int(dataset_size * 0.85)
|
||||||
|
self.TEST_SIZE = dataset_size - self.TRAIN_SIZE
|
||||||
|
(training, test) = (self.dataset.take(self.TRAIN_SIZE),
|
||||||
|
self.dataset.skip(self.TRAIN_SIZE))
|
||||||
|
|
||||||
|
logger.debug('Model dataset info: size=%s, train=%s, test=%s', dataset_size, self.TRAIN_SIZE, self.TEST_SIZE)
|
||||||
|
|
||||||
|
self.dataset_size = dataset_size
|
||||||
|
self.features = features
|
||||||
|
self.output_size = output_size
|
||||||
|
self.training = training
|
||||||
|
self.test = test
|
||||||
|
|
||||||
|
logger.debug('Model input size: %s', self.features)
|
||||||
|
logger.debug('Model output size: %s', self.output_size)
|
||||||
|
|
||||||
|
self.batch_size = batch_size
|
||||||
|
self.training_batched = self.training.batch(self.batch_size).repeat()
|
||||||
|
self.test_batched = self.test.batch(self.batch_size).repeat()
|
||||||
|
|
||||||
|
def create_model(self, layers=DEFAULT_LAYERS, out_activation=DEFAULT_OUT_ACTIVATION):
|
||||||
|
params = {
|
||||||
|
'kernel_initializer': 'lecun_uniform',
|
||||||
|
'bias_initializer': 'zeros',
|
||||||
|
# 'kernel_regularizer': keras.regularizers.l2(l=0.01)
|
||||||
|
'input_shape': [self.features]
|
||||||
|
}
|
||||||
|
|
||||||
|
activation = tf.nn.elu
|
||||||
|
|
||||||
|
logger.debug('Model layer parameters: %s', params)
|
||||||
|
logger.debug('Model layer sizes: %s', layers)
|
||||||
|
logger.debug('Model layer activation function: %s', activation)
|
||||||
|
logger.debug('Model out activation function: %s', out_activation)
|
||||||
|
|
||||||
|
|
||||||
|
self.model = keras.Sequential([
|
||||||
|
keras.layers.Dense(n, activation=activation, **params) for n in layers
|
||||||
|
] + [
|
||||||
|
keras.layers.Dense(self.output_size, activation=out_activation, **params)
|
||||||
|
])
|
||||||
|
|
||||||
|
def compile(self, loss=DEFAULT_LOSS, metrics=['accuracy'], optimizer=DEFAULT_OPTIMIZER):
|
||||||
|
logger.debug('Model loss function: %s', loss)
|
||||||
|
logger.debug('Model optimizer: %s', optimizer)
|
||||||
|
logger.debug('Model metrics: %s', metrics)
|
||||||
|
|
||||||
|
self.model.compile(loss=loss,
|
||||||
|
optimizer=optimizer,
|
||||||
|
metrics=metrics)
|
||||||
|
|
||||||
|
def restore(self, path):
|
||||||
|
logger.debug('Restoring model weights from path: %s', path)
|
||||||
|
return self.model.load_weights(path)
|
||||||
|
|
||||||
|
def save(self, path):
|
||||||
|
logger.debug('Saving model weights to path: %s', path)
|
||||||
|
self.model.save_weights(path)
|
||||||
|
return path
|
||||||
|
|
||||||
|
def evaluate(self):
|
||||||
|
return self.model.evaluate(
|
||||||
|
self.test,
|
||||||
|
batch_size=self.batch_size,
|
||||||
|
steps=int(self.dataset_size / self.batch_size),
|
||||||
|
verbose=1
|
||||||
|
)
|
||||||
|
|
||||||
|
def evaluate_print(self):
|
||||||
|
loss, accuracy = self.evaluate()
|
||||||
|
print('Test evaluation: loss: {}, accuracy: {}'.format(loss, accuracy))
|
||||||
|
|
||||||
|
def train(self, config):
|
||||||
|
self.model.summary()
|
||||||
|
|
||||||
|
# map_callback = MapHistory()
|
||||||
|
|
||||||
|
out = self.model.fit(
|
||||||
|
self.training_batched,
|
||||||
|
batch_size=self.batch_size,
|
||||||
|
epochs=self.epochs,
|
||||||
|
steps_per_epoch=int(self.TRAIN_SIZE / self.batch_size),
|
||||||
|
class_weight=self.class_weight,
|
||||||
|
validation_data=self.test_batched,
|
||||||
|
validation_steps=int(self.TEST_SIZE / self.batch_size),
|
||||||
|
verbose=1
|
||||||
|
)
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
def predict(self, a):
|
||||||
|
return np.argmax(self.model.predict(a), axis=1)
|
||||||
|
|
||||||
|
def prepare_for_use(self, df=None, batch_size=DEFAULT_BUFFER_SIZE, layers=DEFAULT_LAYERS, out_activation=DEFAULT_OUT_ACTIVATION, loss=DEFAULT_LOSS, optimizer=DEFAULT_OPTIMIZER):
|
||||||
|
if df is None:
|
||||||
|
df = pd.read_pickle('data.p')
|
||||||
|
self.prepare_dataset(df, dataframe_to_dataset_biomes, batch_size=batch_size)
|
||||||
|
self.create_model(layers=layers, out_activation=out_activation)
|
||||||
|
self.compile(loss=loss, optimizer=optimizer)
|
||||||
|
|
149
nn.py
149
nn.py
@ -1,149 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, print_function
|
|
||||||
|
|
||||||
# TensorFlow and tf.keras
|
|
||||||
import tensorflow as tf
|
|
||||||
from tensorflow import keras
|
|
||||||
|
|
||||||
# Helper libraries
|
|
||||||
import numpy as np
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import pandas as pd
|
|
||||||
import os.path
|
|
||||||
|
|
||||||
from utils import *
|
|
||||||
# from predict import predicted_map
|
|
||||||
|
|
||||||
RANDOM_SEED = 1
|
|
||||||
|
|
||||||
print(tf.__version__)
|
|
||||||
|
|
||||||
# tf.enable_eager_execution()
|
|
||||||
|
|
||||||
tf.set_random_seed(RANDOM_SEED)
|
|
||||||
np.random.seed(RANDOM_SEED)
|
|
||||||
|
|
||||||
df = pd.read_pickle('data.p')
|
|
||||||
|
|
||||||
class MapHistory(keras.callbacks.Callback):
|
|
||||||
def on_epoch_end(self, epoch, logs):
|
|
||||||
print('EPOCH', epoch)
|
|
||||||
predicted_map('maps/{}'.format(epoch))
|
|
||||||
|
|
||||||
class Model():
|
|
||||||
def __init__(self, name, batch_size=16, shuffle_buffer_size=500, learning_rate=0.001, epochs=1):
|
|
||||||
self.name = name
|
|
||||||
self.path = "checkpoints/{}.hdf5".format(name)
|
|
||||||
|
|
||||||
self.batch_size = batch_size
|
|
||||||
self.shuffle_buffer_size = shuffle_buffer_size
|
|
||||||
self.learning_rate = learning_rate
|
|
||||||
self.epochs = epochs
|
|
||||||
|
|
||||||
def prepare_dataset(self, df, fn):
|
|
||||||
self.dataset_fn = fn
|
|
||||||
dataset_size, features, output_size, dataset = fn(df)
|
|
||||||
self.dataset = dataset.shuffle(self.shuffle_buffer_size)
|
|
||||||
self.TRAIN_SIZE = int(dataset_size * 0.85)
|
|
||||||
self.TEST_SIZE = dataset_size - self.TRAIN_SIZE
|
|
||||||
(training, test) = (self.dataset.take(self.TRAIN_SIZE).batch(self.batch_size).repeat(),
|
|
||||||
self.dataset.skip(self.TRAIN_SIZE).batch(self.batch_size).repeat())
|
|
||||||
|
|
||||||
# print(df.groupby(['biome_num']).agg({ 'biome_num': lambda x: x.count() / df.shape[0] }))
|
|
||||||
|
|
||||||
print('dataset: size={}, train={}, test={}'.format(dataset_size, self.TRAIN_SIZE, self.TEST_SIZE))
|
|
||||||
print('input_size={}'.format(features))
|
|
||||||
|
|
||||||
self.dataset_size = dataset_size
|
|
||||||
self.features = features
|
|
||||||
self.output_size = output_size
|
|
||||||
self.training = training
|
|
||||||
self.test = test
|
|
||||||
|
|
||||||
def create_model(self, layers, out_activation=None):
|
|
||||||
params = {
|
|
||||||
'kernel_initializer': 'lecun_uniform',
|
|
||||||
'bias_initializer': 'zeros',
|
|
||||||
# 'kernel_regularizer': keras.regularizers.l2(l=0.01)
|
|
||||||
}
|
|
||||||
dropout = [keras.layers.Dropout(0.1, input_shape=[self.features])]
|
|
||||||
# dropout = []
|
|
||||||
self.model = keras.Sequential(dropout + [
|
|
||||||
keras.layers.Dense(layers[0], activation=tf.nn.elu, **params)
|
|
||||||
] + [
|
|
||||||
keras.layers.Dense(n, activation=tf.nn.elu, **params) for n in layers[1:]
|
|
||||||
] + [
|
|
||||||
keras.layers.Dense(self.output_size, activation=out_activation, **params)
|
|
||||||
])
|
|
||||||
|
|
||||||
def compile(self, loss='mse', metrics=['accuracy'], optimizer=tf.train.AdamOptimizer, load_weights=True):
|
|
||||||
if load_weights:
|
|
||||||
self.model.load_weights(self.path)
|
|
||||||
|
|
||||||
optimizer = optimizer(self.learning_rate)
|
|
||||||
|
|
||||||
self.model.compile(loss=loss,
|
|
||||||
optimizer=optimizer,
|
|
||||||
metrics=metrics)
|
|
||||||
|
|
||||||
def evaluate(self):
|
|
||||||
return self.model.evaluate(
|
|
||||||
self.test,
|
|
||||||
batch_size=self.batch_size,
|
|
||||||
steps=int(self.dataset_size / self.batch_size),
|
|
||||||
verbose=1
|
|
||||||
)
|
|
||||||
|
|
||||||
def evaluate_print(self):
|
|
||||||
loss, accuracy = self.evaluate()
|
|
||||||
print('Test evaluation: loss: {}, accuracy: {}'.format(loss, accuracy))
|
|
||||||
|
|
||||||
def train(self):
|
|
||||||
self.model.summary()
|
|
||||||
|
|
||||||
checkpoint = keras.callbacks.ModelCheckpoint(self.path, monitor='val_loss', verbose=1, mode='min', save_best_only=True)
|
|
||||||
tensorboard = keras.callbacks.TensorBoard(log_dir='./logs', update_freq='epoch')
|
|
||||||
# reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)
|
|
||||||
# map_callback = MapHistory()
|
|
||||||
|
|
||||||
self.model.fit(
|
|
||||||
self.training,
|
|
||||||
batch_size=self.batch_size,
|
|
||||||
epochs=self.epochs,
|
|
||||||
steps_per_epoch=int(self.TRAIN_SIZE / self.batch_size),
|
|
||||||
callbacks=[checkpoint, tensorboard],
|
|
||||||
validation_data=self.test,
|
|
||||||
validation_steps=int(self.TEST_SIZE / self.batch_size),
|
|
||||||
verbose=1
|
|
||||||
)
|
|
||||||
|
|
||||||
def predict(self, a):
|
|
||||||
return np.argmax(self.model.predict(a), axis=1)
|
|
||||||
|
|
||||||
A = Model('a', epochs=2)
|
|
||||||
B = Model('b', learning_rate=0.0005, epochs=50)
|
|
||||||
|
|
||||||
# 24 so far
|
|
||||||
def compile_b():
|
|
||||||
B.prepare_dataset(df, dataframe_to_dataset_biomes)
|
|
||||||
B.create_model([12], tf.nn.softmax)
|
|
||||||
B.compile(loss='sparse_categorical_crossentropy', load_weights=False)
|
|
||||||
|
|
||||||
def compile_a():
|
|
||||||
A.prepare_dataset(df, dataframe_to_dataset_temp_precip)
|
|
||||||
A.create_model([(4, tf.nn.elu)])
|
|
||||||
# A.create_model([]) # linear model
|
|
||||||
A.compile(metrics=['accuracy', 'mae'])
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
compile_b()
|
|
||||||
B.train()
|
|
||||||
|
|
||||||
# for inp, out in B.test.take(1).make_one_shot_iterator():
|
|
||||||
# print(inp, out)
|
|
||||||
|
|
||||||
# print(np.unique(nums))
|
|
||||||
# print(np.unique(predictions))
|
|
||||||
# print('loss: {}, evaluation: {}'.format(*B.evaluate()))
|
|
||||||
|
|
||||||
# compile_a()
|
|
||||||
# A.train()
|
|
28
plot.py
28
plot.py
@ -1,28 +0,0 @@
|
|||||||
import geopandas
|
|
||||||
import os
|
|
||||||
import rasterio
|
|
||||||
import pandas as pd
|
|
||||||
from matplotlib import pyplot
|
|
||||||
|
|
||||||
directory = os.path.dirname(os.path.abspath(__file__))
|
|
||||||
|
|
||||||
|
|
||||||
GEODATA = os.path.join(directory, 'geodata')
|
|
||||||
ECOREGIONS = os.path.join(GEODATA, 'ecoregions', 'Ecoregions2017.shp')
|
|
||||||
ELEVATION = os.path.join(GEODATA, 'srtm', 'topo30-180.tif')
|
|
||||||
TEMP = os.path.join(GEODATA, 'air_temp')
|
|
||||||
|
|
||||||
temp = pd.read_csv(os.path.join(TEMP, 'air_temp.2017'), sep='\s+', header=None, names=['longitude', 'latitude', 'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'november', 'october', 'december', 'yearly_avg'])
|
|
||||||
|
|
||||||
print(temp.head())
|
|
||||||
|
|
||||||
eco = geopandas.read_file(ECOREGIONS)
|
|
||||||
elevation = rasterio.open(ELEVATION)
|
|
||||||
|
|
||||||
print(eco.head())
|
|
||||||
print(elevation)
|
|
||||||
|
|
||||||
eco.plot()
|
|
||||||
# rasterio.plot.show(src)
|
|
||||||
# pyplot.imshow(elevation.read(1))
|
|
||||||
pyplot.show()
|
|
41
predict.py
41
predict.py
@ -1,22 +1,20 @@
|
|||||||
|
import fire
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from utils import *
|
from utils import *
|
||||||
from nn import B, compile_b
|
#from nn import compile_b
|
||||||
|
from constants import INPUTS
|
||||||
|
from model import Model
|
||||||
from draw import draw
|
from draw import draw
|
||||||
import time
|
|
||||||
|
|
||||||
def chunker(seq, size):
|
def predicted_map(B, change=0, path=None):
|
||||||
return (seq[pos:pos + size] for pos in range(0, len(seq), size))
|
|
||||||
|
|
||||||
|
|
||||||
def predicted_map(path=None):
|
|
||||||
year = MAX_YEAR - 1
|
year = MAX_YEAR - 1
|
||||||
|
|
||||||
df = pd.read_pickle('data.p')
|
df = pd.read_pickle('data.p')
|
||||||
|
|
||||||
print('TEMPERATURE MODIFICATION OF {}'.format(change))
|
logger.info('temperature change of %s', change)
|
||||||
|
|
||||||
inputs = ['elevation', 'distance_to_water', 'latitude']
|
inputs = list(INPUTS)
|
||||||
|
|
||||||
for season in SEASONS:
|
for season in SEASONS:
|
||||||
inputs += [
|
inputs += [
|
||||||
@ -24,34 +22,37 @@ def predicted_map(path=None):
|
|||||||
'precip_{}_{}'.format(season, year)
|
'precip_{}_{}'.format(season, year)
|
||||||
]
|
]
|
||||||
|
|
||||||
print(inputs)
|
|
||||||
|
|
||||||
# print(inputs)
|
|
||||||
frame = df[inputs + ['longitude']]
|
frame = df[inputs + ['longitude']]
|
||||||
# print(frame.head())
|
frame_cp = df[inputs + ['longitude']]
|
||||||
|
|
||||||
for season in SEASONS:
|
for season in SEASONS:
|
||||||
frame.loc[:, 'temp_{}_{}'.format(season, year)] += change
|
frame.loc[:, 'temp_{}_{}'.format(season, year)] += change
|
||||||
|
|
||||||
columns = ['latitude', 'longitude', 'biome_num']
|
columns = ['latitude', 'longitude', 'biome_num']
|
||||||
new_data = pd.DataFrame(columns=columns)
|
new_data = pd.DataFrame(columns=columns)
|
||||||
|
nframe = pd.DataFrame(columns=frame.columns, data=normalize_ndarray(frame.to_numpy(), frame_cp.to_numpy()))
|
||||||
|
|
||||||
for i, chunk in enumerate(chunker(frame, B.batch_size)):
|
for i, (chunk, chunk_original) in enumerate(zip(chunker(nframe, B.batch_size), chunker(frame, B.batch_size))):
|
||||||
if chunk.shape[0] < B.batch_size:
|
if chunk.shape[0] < B.batch_size:
|
||||||
continue
|
continue
|
||||||
input_data = normalize_ndarray(chunk.loc[:, inputs].values)
|
input_data = chunk.loc[:, inputs].values
|
||||||
out = B.predict(input_data)
|
out = B.predict(input_data)
|
||||||
|
|
||||||
f = pd.DataFrame({
|
f = pd.DataFrame({
|
||||||
'longitude': chunk.loc[:, 'longitude'],
|
'longitude': chunk_original.loc[:, 'longitude'],
|
||||||
'latitude': chunk.loc[:, 'latitude'],
|
'latitude': chunk_original.loc[:, 'latitude'],
|
||||||
'biome_num': out
|
'biome_num': out
|
||||||
}, columns=columns)
|
}, columns=columns)
|
||||||
new_data = new_data.append(f)
|
new_data = new_data.append(f)
|
||||||
|
|
||||||
draw(new_data, path=path)
|
draw(new_data, path=path)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def predicted_map_cmd(checkpoint='checkpoints/save.h5', change=0, path=None):
|
||||||
compile_b()
|
B = Model('b', epochs=1)
|
||||||
predicted_map()
|
B.prepare_for_use()
|
||||||
|
B.restore(checkpoint)
|
||||||
|
predicted_map(B, change=change, path=path)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
fire.Fire(predicted_map_cmd)
|
||||||
|
|
||||||
|
@ -7,3 +7,7 @@ rasterio==1.0.15
|
|||||||
tensorflow==1.13.1
|
tensorflow==1.13.1
|
||||||
Cartopy==0.17.0
|
Cartopy==0.17.0
|
||||||
numpy==1.16.1
|
numpy==1.16.1
|
||||||
|
scikit-learn==0.20.3
|
||||||
|
https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.7.0.dev1-cp36-cp36m-manylinux1_x86_64.whl
|
||||||
|
fire==0.1.3
|
||||||
|
psutil==5.6.1
|
||||||
|
89
tracks
89
tracks
@ -1,89 +0,0 @@
|
|||||||
Layer (type) Output Shape Param #
|
|
||||||
=================================================================
|
|
||||||
Group 1
|
|
||||||
-----------------------------------------------------------------
|
|
||||||
dense (Dense) (None, 128) 1536
|
|
||||||
_________________________________________________________________
|
|
||||||
dense_1 (Dense) (None, 256) 33024
|
|
||||||
_________________________________________________________________
|
|
||||||
dense_2 (Dense) (None, 14) 3598
|
|
||||||
-----------------------------------------------------------------
|
|
||||||
Total params: 38,158
|
|
||||||
1 Epoch: loss: 0.3822 - acc: 0.8684
|
|
||||||
Learning rate: 0.005
|
|
||||||
=================================================================
|
|
||||||
|
|
||||||
Group 2
|
|
||||||
-----------------------------------------------------------------
|
|
||||||
dense (Dense) (None, 32) 384
|
|
||||||
_________________________________________________________________
|
|
||||||
dense_1 (Dense) (None, 64) 2112
|
|
||||||
_________________________________________________________________
|
|
||||||
dense_2 (Dense) (None, 32) 2080
|
|
||||||
_________________________________________________________________
|
|
||||||
dense_3 (Dense) (None, 14) 462
|
|
||||||
-----------------------------------------------------------------
|
|
||||||
Total params: 5,038
|
|
||||||
1 Epoch: loss: 0.3760 - acc: 0.8678 @ 20minutes
|
|
||||||
Stopped converging, loss increasing
|
|
||||||
Learning rate: 0.005
|
|
||||||
=================================================================
|
|
||||||
|
|
||||||
Group 3
|
|
||||||
-----------------------------------------------------------------
|
|
||||||
dense (Dense) (None, 16) 192
|
|
||||||
_________________________________________________________________
|
|
||||||
dense_1 (Dense) (None, 32) 544
|
|
||||||
_________________________________________________________________
|
|
||||||
dense_2 (Dense) (None, 16) 528
|
|
||||||
_________________________________________________________________
|
|
||||||
dense_3 (Dense) (None, 14) 238
|
|
||||||
-----------------------------------------------------------------
|
|
||||||
Total params: 1,502
|
|
||||||
1 Epoch: loss: 0.3702 - acc: 0.8671 @ 12minutes
|
|
||||||
10 Epochs: loss: 0.3280 - acc: 0.8815
|
|
||||||
Stopped converging after 5 epochs, was oscillating
|
|
||||||
Learning rate: 0.005
|
|
||||||
=================================================================
|
|
||||||
|
|
||||||
Group 4
|
|
||||||
_________________________________________________________________
|
|
||||||
dense (Dense) (None, 12) 144
|
|
||||||
_________________________________________________________________
|
|
||||||
dense_1 (Dense) (None, 14) 182
|
|
||||||
_________________________________________________________________
|
|
||||||
Total params: 326
|
|
||||||
1 Epoch: loss: 0.4412 - acc: 0.8457 @ 10m
|
|
||||||
60 Epochs: loss: 0.4146 - acc: 0.8546
|
|
||||||
Stopped converging
|
|
||||||
Learning rate: 0.005
|
|
||||||
=================================================================
|
|
||||||
|
|
||||||
Group 5
|
|
||||||
_________________________________________________________________
|
|
||||||
dense (Dense) (None, 12) 144
|
|
||||||
_________________________________________________________________
|
|
||||||
dense_1 (Dense) (None, 14) 182
|
|
||||||
_________________________________________________________________
|
|
||||||
Total params: 326
|
|
||||||
1 Epoch: loss: 0.5057 - acc: 0.8268 @ 10m
|
|
||||||
15 epoch: loss: 0.4240 - acc: 0.8481
|
|
||||||
Stopped converging
|
|
||||||
Learning rate: 0.001
|
|
||||||
=================================================================
|
|
||||||
|
|
||||||
Group 6
|
|
||||||
_________________________________________________________________
|
|
||||||
Layer (type) Output Shape Param #
|
|
||||||
=================================================================
|
|
||||||
dense (Dense) (None, 24) 288
|
|
||||||
_________________________________________________________________
|
|
||||||
dense_1 (Dense) (None, 14) 350
|
|
||||||
_________________________________________________________________
|
|
||||||
Total params: 638
|
|
||||||
1 Epoch: loss: 0.4520 - acc: 0.8416 @ 12m
|
|
||||||
30 epochs: loss: 0.3562 - acc: 0.8691, still converging
|
|
||||||
stopped converging after 100 epochs
|
|
||||||
Learning rate: 0.001
|
|
||||||
|
|
||||||
|
|
67
train.py
Normal file
67
train.py
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
import fire
|
||||||
|
import ray
|
||||||
|
import pandas as pd
|
||||||
|
import tensorflow as tf
|
||||||
|
from ray import tune
|
||||||
|
from tensorflow import keras
|
||||||
|
from utils import logger
|
||||||
|
from model import Model
|
||||||
|
|
||||||
|
B_params = {
|
||||||
|
'batch_size': tune.grid_search([256]),
|
||||||
|
'layers': tune.grid_search([[512, 512]]),
|
||||||
|
'lr': tune.grid_search([1e-4]),
|
||||||
|
'optimizer': tune.grid_search([tf.keras.optimizers.Adam]),
|
||||||
|
}
|
||||||
|
|
||||||
|
df = pd.read_pickle('data.p')
|
||||||
|
|
||||||
|
class TuneB(tune.Trainable):
|
||||||
|
def _setup(self, config):
|
||||||
|
logger.debug('Ray Tune model configuration %s', config)
|
||||||
|
|
||||||
|
self.model = Model('b', epochs=1)
|
||||||
|
|
||||||
|
optimizer = config['optimizer']
|
||||||
|
optimizer = config['optimizer'](lr=config['lr'])
|
||||||
|
|
||||||
|
self.model.prepare_for_use(df=df, batch_size=config['batch_size'], layers=config['layers'], optimizer=optimizer)
|
||||||
|
|
||||||
|
def _train(self):
|
||||||
|
logs = self.model.train(self.config)
|
||||||
|
|
||||||
|
metrics = {
|
||||||
|
'mean_accuracy': logs.history['acc'][0],
|
||||||
|
'loss': logs.history['loss'][0],
|
||||||
|
'val_accuracy': logs.history['val_acc'][0],
|
||||||
|
'val_loss': logs.history['val_loss'][0],
|
||||||
|
}
|
||||||
|
|
||||||
|
return metrics
|
||||||
|
|
||||||
|
def _save(self, checkpoint_dir):
|
||||||
|
return self.model.save(checkpoint_dir)
|
||||||
|
|
||||||
|
def _restore(self, path):
|
||||||
|
return self.model.restore(path)
|
||||||
|
|
||||||
|
def start_tuning(cpu=1, gpu=2, checkpoint_freq=1, checkpoint_at_end=True, resume=False, restore=None, stop=500):
|
||||||
|
ray.init()
|
||||||
|
|
||||||
|
tune.run(TuneB,
|
||||||
|
config=B_params,
|
||||||
|
resources_per_trial={
|
||||||
|
"cpu": cpu,
|
||||||
|
"gpu": gpu
|
||||||
|
},
|
||||||
|
resume=resume,
|
||||||
|
checkpoint_at_end=checkpoint_at_end,
|
||||||
|
checkpoint_freq=checkpoint_freq,
|
||||||
|
restore=restore,
|
||||||
|
stop={
|
||||||
|
'training_iteration': stop
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
fire.Fire(start_tuning)
|
81
utils.py
81
utils.py
@ -1,55 +1,46 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
from collections import Counter
|
||||||
|
from sklearn.utils import class_weight
|
||||||
from constants import *
|
from constants import *
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
inputs = ['elevation', 'distance_to_water', 'latitude']
|
logger = logging.getLogger('main')
|
||||||
output = 'biome_num'
|
logger.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))
|
||||||
|
|
||||||
def normalize(v):
|
|
||||||
return (v - np.mean(v)) / np.std(v)
|
|
||||||
|
|
||||||
def normalize_ndarray(ar):
|
def normalize(v, o=None):
|
||||||
|
if o is None:
|
||||||
|
o = v
|
||||||
|
return (v - np.mean(o)) / np.std(o)
|
||||||
|
|
||||||
|
def normalize_ndarray(ar, o=None):
|
||||||
|
if o is None:
|
||||||
|
o = ar
|
||||||
|
|
||||||
|
# transpose: operate over columns
|
||||||
tr = np.transpose(ar)
|
tr = np.transpose(ar)
|
||||||
|
to = np.transpose(o)
|
||||||
for i in range(tr.shape[0]):
|
for i in range(tr.shape[0]):
|
||||||
tr[i] = normalize(tr[i])
|
tr[i] = normalize(tr[i], to[i])
|
||||||
|
|
||||||
|
# transpose back
|
||||||
return np.transpose(tr)
|
return np.transpose(tr)
|
||||||
|
|
||||||
def normalize_df(df):
|
|
||||||
for col in df.columns:
|
|
||||||
df.loc[col] = normalize_ndarray(df[col])
|
|
||||||
|
|
||||||
return df
|
|
||||||
|
|
||||||
def dataframe_to_dataset_biomes(df):
|
def dataframe_to_dataset_biomes(df):
|
||||||
rows = df.shape[0]
|
rows = df.shape[0]
|
||||||
|
|
||||||
# 8 for seasonal temp and precipitation
|
# 8 for seasonal temp and precipitation
|
||||||
# 3 for latitude, elevation and distance_to_water
|
# 3 for latitude, elevation and distance_to_water
|
||||||
columns = 11
|
input_columns = 11
|
||||||
|
|
||||||
# make biomes uniformly distributed so each biome has enough data to avoid a biased dataset
|
tf_inputs = np.empty((0, input_columns))
|
||||||
biome_shares = df.groupby(['biome_num']).agg({ 'biome_num': lambda x: x.count() / df.shape[0] })
|
|
||||||
max_share = np.max(biome_shares['biome_num'])
|
|
||||||
dsize = df.shape[0]
|
|
||||||
max_share_count = int(max_share * dsize)
|
|
||||||
|
|
||||||
for biome_num in biome_shares.index:
|
|
||||||
share = biome_shares.values[biome_num][0]
|
|
||||||
share_count = int(share * dsize)
|
|
||||||
diff = max_share_count - share_count
|
|
||||||
rows = df.loc[df['biome_num'] == biome_num]
|
|
||||||
diff_ratio = int(diff / rows.shape[0])
|
|
||||||
df = pd.concat([df] + [rows] * diff_ratio, ignore_index=True)
|
|
||||||
|
|
||||||
# print(df.groupby(['biome_num']).agg({ 'biome_num': lambda x: x.count() / df.shape[0] }))
|
|
||||||
|
|
||||||
tf_inputs = np.empty((0, columns))
|
|
||||||
tf_output = np.empty((0))
|
tf_output = np.empty((0))
|
||||||
|
|
||||||
for year in range(MIN_YEAR, MAX_YEAR + 1):
|
for year in range(MIN_YEAR, MAX_YEAR + 1):
|
||||||
local_inputs = list(inputs)
|
local_inputs = list(INPUTS)
|
||||||
for season in SEASONS:
|
for season in SEASONS:
|
||||||
local_inputs += [
|
local_inputs += [
|
||||||
'temp_{}_{}'.format(season, year),
|
'temp_{}_{}'.format(season, year),
|
||||||
@ -60,25 +51,32 @@ def dataframe_to_dataset_biomes(df):
|
|||||||
local_df = df[local_inputs]
|
local_df = df[local_inputs]
|
||||||
|
|
||||||
tf_inputs = np.concatenate((tf_inputs, local_df.values), axis=0)
|
tf_inputs = np.concatenate((tf_inputs, local_df.values), axis=0)
|
||||||
tf_output = np.concatenate((tf_output, df[output].values), axis=0)
|
tf_output = np.concatenate((tf_output, df[OUTPUT].values), axis=0)
|
||||||
|
|
||||||
|
# balance class weights for the loss function, since the data is highly unbalanced
|
||||||
|
num_classes = len(np.unique(tf_output))
|
||||||
|
class_weights = class_weight.compute_class_weight('balanced', np.unique(tf_output), tf_output)
|
||||||
|
logger.debug('class_weights %s', class_weights)
|
||||||
|
|
||||||
tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32)
|
tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32)
|
||||||
tf_output = tf.cast(tf_output, tf.int64)
|
tf_output = tf.cast(tf_output, tf.int64)
|
||||||
|
|
||||||
return int(tf_inputs.shape[0]), 11, 14, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
|
logger.debug('dataset size: rows=%d, input_columns=%d, num_classes=%d', int(tf_inputs.shape[0]), input_columns, num_classes)
|
||||||
|
return int(tf_inputs.shape[0]), input_columns, num_classes, class_weights, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
|
||||||
|
|
||||||
def dataframe_to_dataset_temp_precip(df):
|
def dataframe_to_dataset_temp_precip(df):
|
||||||
rows = df.shape[0]
|
rows = df.shape[0]
|
||||||
|
|
||||||
# elevation, distance_to_water, latitude
|
# elevation, distance_to_water, latitude
|
||||||
# season, year
|
# season, year
|
||||||
columns = 5
|
input_columns = 5
|
||||||
|
num_classes = 2
|
||||||
|
|
||||||
tf_inputs = np.empty((0, columns))
|
tf_inputs = np.empty((0, input_columns))
|
||||||
tf_output = np.empty((0, 2))
|
tf_output = np.empty((0, num_classes))
|
||||||
|
|
||||||
for year in range(MIN_YEAR, MAX_YEAR + 1):
|
for year in range(MIN_YEAR, MAX_YEAR + 1):
|
||||||
local_inputs = list(inputs)
|
local_inputs = list(INPUTS)
|
||||||
|
|
||||||
for idx, season in enumerate(SEASONS):
|
for idx, season in enumerate(SEASONS):
|
||||||
season_index = idx / len(season)
|
season_index = idx / len(season)
|
||||||
@ -93,8 +91,11 @@ def dataframe_to_dataset_temp_precip(df):
|
|||||||
tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32)
|
tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32)
|
||||||
tf_output = tf.cast(tf_output, tf.float32)
|
tf_output = tf.cast(tf_output, tf.float32)
|
||||||
|
|
||||||
return int(tf_inputs.shape[0]), 5, 2, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
|
logger.debug('dataset size: rows=%d, input_columns=%d, num_classes=%d', int(tf_inputs.shape[0]), input_columns, num_classes)
|
||||||
|
return int(tf_inputs.shape[0]), input_columns, num_classes, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
|
||||||
|
|
||||||
|
|
||||||
# df = pd.read_pickle('data.p')
|
flatten = lambda l: [item for sublist in l for item in sublist]
|
||||||
# print(dataframe_to_dataset_biomes(df))
|
|
||||||
|
def chunker(seq, size):
|
||||||
|
return (seq[pos:pos + size] for pos in range(0, len(seq), size))
|
||||||
|
Loading…
Reference in New Issue
Block a user