feat(temps): various temperatures

This commit is contained in:
Mahdi Dibaiee 2019-02-27 15:06:20 +03:30
parent d8365d6285
commit f268e72244
5 changed files with 221 additions and 164 deletions

72
draw.py
View File

@ -4,38 +4,52 @@ import matplotlib.pyplot as plt
import pandas as pd
import cartopy.crs as ccrs
df = pd.read_pickle('data_final.p')
def draw(df):
biomes = {}
biome_numbers = df['biome_num'].unique()
# biome_names = df['biome_name'].unique()
biomes = {}
biome_numbers = df['biome_num'].unique()
for n in biome_numbers:
biomes[n] = []
for (longitude, latitude), row in df.iterrows():
p = Point(longitude, latitude)
if row.biome_num in biomes:
biomes[row.biome_num].append(p)
else:
biomes[row.biome_num] = [p]
for (longitude, latitude), row in df.iterrows():
biomes[row.biome_num].append(Point(longitude, latitude))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.stock_img()
# ax.legend(df['biome_name'].unique())
ax = plt.axes(projection=ccrs.PlateCarree())
ax.stock_img()
colors={
0: '#016936',
1: '#B2D127',
2: '#77CC00',
3: '#99C500',
4: '#B6CC00',
5: '#00C5B5',
6: '#EFFF00',
7: '#FFEE00',
8: '#009BFF',
9: '#A0ADBA',
10: '#5C62FF',
11: '#00850F',
12: '#FF9E1F',
13: '#FF1F97'
}
colors={
0: '#016936',
1: '#B2D127',
2: '#77CC00',
3: '#99C500',
4: '#B6CC00',
5: '#00C5B5',
6: '#EFFF00',
7: '#FFEE00',
8: '#009BFF',
9: '#A0ADBA',
10: '#5C62FF',
11: '#00850F',
12: '#FF9E1F',
13: '#FF1F97'
}
for n in biome_numbers:
biomes[n] = MultiPoint(biomes[n]).buffer(1)
# print(biomes[n])
# legend = biome_names[n]
if not hasattr(biomes[n], '__iter__'):
biomes[n] = [biomes[n]]
ax.add_geometries(biomes[n], ccrs.PlateCarree(), facecolor=colors[n])
# artist.set_label(biome_names[n])
# print(artist.get_label())
for n in biome_numbers:
biomes[n] = MultiPoint(biomes[n]).buffer(1)
ax.add_geometries(biomes[n], ccrs.PlateCarree(), facecolor=colors[n])
# ax.legend(artists, biome_names)
plt.show()
plt.show()
if __name__ == "__main__":
df = pd.read_pickle('data_final.p')
draw(df)

112
nn.py Normal file
View File

@ -0,0 +1,112 @@
from __future__ import absolute_import, division, print_function
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras
# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os.path
from utils import *
RANDOM_SEED = 1
tf.enable_eager_execution()
tf.set_random_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
df = pd.read_pickle('data_final.p')
class Model():
def __init__(self, name, batch_size=100, shuffle_buffer_size=500, learning_rate=0.001, epochs=1):
self.name = name
self.path = "checkpoints/{}.hdf5".format(name)
self.batch_size = batch_size
self.shuffle_buffer_size = shuffle_buffer_size
self.learning_rate = learning_rate
self.epochs = epochs
def prepare_dataset(self, df, fn):
self.dataset_fn = fn
dataset_size, features, output_size, dataset = fn(df)
self.dataset = dataset.shuffle(self.shuffle_buffer_size)
self.TRAIN_SIZE = int(dataset_size * 0.85)
self.TEST_SIZE = dataset_size - self.TRAIN_SIZE
(training, test) = (self.dataset.take(self.TRAIN_SIZE).batch(self.batch_size).repeat(),
self.dataset.skip(self.TRAIN_SIZE).batch(self.batch_size).repeat())
self.dataset_size = dataset_size
self.features = features
self.output_size = output_size
self.training = training
self.test = test
def create_model(self, layers):
self.model = keras.Sequential([
keras.layers.Dense(layers[0], activation=tf.nn.relu, input_shape=[self.features])
] + [
keras.layers.Dense(n, activation=tf.nn.relu) for n in layers[1:]
] + [
keras.layers.Dense(self.output_size)
])
def compile(self):
self.model.load_weights(self.path)
optimizer = tf.train.AdamOptimizer(self.learning_rate)
self.model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae', 'accuracy'])
def evaluate(self):
return self.model.evaluate(
self.test,
batch_size=self.batch_size,
steps=int(self.dataset_size / self.batch_size),
verbose=1
)
def train(self):
self.model.summary()
checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='acc', verbose=1, mode='max')
self.model.fit(
self.training,
batch_size=self.batch_size,
epochs=self.epochs,
steps_per_epoch=int(self.dataset_size / self.batch_size),
callbacks=[checkpoint],
verbose=1
)
def predict(self, a):
return np.argmax(self.model.predict(a), axis=1)
A = Model('a', batch_size=100, shuffle_buffer_size=500, learning_rate=0.001, epochs=2)
B = Model('b', batch_size=100, shuffle_buffer_size=500, learning_rate=0.001, epochs=850)
if __name__ == "__main__":
B.prepare_dataset(df, dataframe_to_dataset_biomes)
B.create_model([64, 128])
B.compile()
# for inp, out in B.test.take(1).make_one_shot_iterator():
# print(inp, out)
# print(np.unique(nums))
# print(np.unique(predictions))
print('loss: {}, evaluation: {}'.format(*B.evaluate()))
# B.train()
A.prepare_dataset(df, dataframe_to_dataset_temp_precip)
A.create_model([4])
A.compile()
# A.train()

134
train.py
View File

@ -1,134 +0,0 @@
from __future__ import absolute_import, division, print_function
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras
# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os.path
from utils import *
RANDOM_SEED = 1
tf.enable_eager_execution()
tf.set_random_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
df = pd.read_pickle('data_final.p')
# temp and precipitation
def train_model_a():
filepath = "checkpoints/a.hdf5"
BATCH_SIZE = 100
SHUFFLE_BUFFER_SIZE = 500
LEARNING_RATE = 0.001
EPOCHS = 2
# dataset = dataframe_to_dataset_biomes(df)
dataset_size, features, output_size, dataset = dataframe_to_dataset_temp_precip(df)
dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
TRAIN_SIZE = dataset_size * 0.85
TEST_SIZE = dataset_size - TRAIN_SIZE
(training, test) = (dataset.take(TRAIN_SIZE).repeat(), dataset.skip(TRAIN_SIZE).repeat())
model = keras.Sequential([
keras.layers.Dense(4, activation=tf.nn.relu, input_shape=[features]),
keras.layers.Dense(output_size)
])
model.load_weights(filepath)
optimizer = tf.train.AdamOptimizer(LEARNING_RATE)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae', 'accuracy'])
model.summary()
checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='acc', verbose=1, mode='max')
model.fit(
training,
batch_size=BATCH_SIZE,
epochs=EPOCHS,
steps_per_epoch=int(dataset_size / BATCH_SIZE),
callbacks=[checkpoint],
verbose=1
)
evaluation = model.evaluate(
test,
batch_size=BATCH_SIZE,
steps=int(dataset_size / BATCH_SIZE),
verbose=1
)
print(evaluation)
# 850 epochs so far
def train_model_b():
filepath = filepath="checkpoints/b.hdf5"
BATCH_SIZE = 100
SHUFFLE_BUFFER_SIZE = 500
LEARNING_RATE = 0.0005
EPOCHS = 400
# dataset = dataframe_to_dataset_biomes(df)
dataset_size, features, output_size, dataset = dataframe_to_dataset_biomes(df)
dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE)
TRAIN_SIZE = dataset_size * 0.85
TEST_SIZE = dataset_size - TRAIN_SIZE
(training, test) = (dataset.take(TRAIN_SIZE).batch(BATCH_SIZE).repeat(), dataset.skip(TRAIN_SIZE).batch(BATCH_SIZE).repeat())
model = keras.Sequential([
keras.layers.Dense(64, activation=tf.nn.relu, input_shape=[features]),
keras.layers.Dense(128, activation=tf.nn.relu),
keras.layers.Dense(output_size, activation=tf.nn.softmax)
])
model.load_weights(filepath)
optimizer = tf.train.AdamOptimizer(LEARNING_RATE)
model.compile(loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=['accuracy'])
model.summary()
checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='acc', verbose=1, mode='max')
model.fit(
training,
epochs=EPOCHS,
verbose=1,
steps_per_epoch=int(dataset_size / BATCH_SIZE),
callbacks=[checkpoint]
)
# print(dataset.repeat().make_one_shot_iteraor().get_next())
# inp, out = test.make_one_shot_iterator().get_next()
# print(inp, out)
# print(np.argmax(model.predict(inp), axis=1))
evaluation = model.evaluate(
test,
batch_size=BATCH_SIZE,
steps=int(dataset_size / BATCH_SIZE),
verbose=1
)
print('loss: {}, accuracy: {}'.format(*evaluation))
# train_model_a()
train_model_b()
# train_model_a()

View File

@ -18,7 +18,7 @@ def normalize_ndarray(ar):
def normalize_df(df):
for col in df.columns:
df[col] = normalize(df[col])
df.loc[col] = normalize(df[col])
return df

65
various_temps.py Normal file
View File

@ -0,0 +1,65 @@
import numpy as np
from utils import *
from nn import B
from draw import draw
import time
def chunker(seq, size):
return (seq[pos:pos + size] for pos in range(0, len(seq), size))
year = MAX_YEAR - 1
df = pd.read_pickle('data_final.p')
latitude = np.array(df.index.get_level_values(1))
df.loc[:, 'latitude'] = pd.Series(latitude, index=df.index)
B.prepare_dataset(df, dataframe_to_dataset_biomes)
B.create_model([64, 128])
B.compile()
for change in range(-5, 6):
print('TEMPERATURE MODIFICATION OF {}'.format(change))
inputs = ['elevation', 'distance_to_water']
for season in SEASONS:
inputs += [
'temp_{}_{}'.format(season, year),
'precip_{}_{}'.format(season, year)
]
inputs += ['latitude']
frame = df[inputs]
print(frame.head())
# for season in SEASONS:
# frame.loc[:, 'temp_{}_{}'.format(season, year)] += change
# print(np.average(frame.loc[:, 'temp_winter_2016']))
# index = []
# for longitude in range(-179, 179):
# for latitude in range(-89, 89):
# index.append((longitude, latitude))
columns = ['biome_num']
new_data = pd.DataFrame(columns=columns)
for i, chunk in enumerate(chunker(frame, B.batch_size)):
input_data = normalize_ndarray(chunk.values)
out = B.predict(input_data)
new_index = np.concatenate((chunk.index.values, new_data.index.values))
new_data = new_data.reindex(new_index)
new_data.loc[chunk.index.values, 'biome_num'] = out
# print(new_data['biome_num'].unique())
draw(new_data)
# columns = ['biome_num']
# indices = ['longitude', 'latitude']
# new_df = pd.DataFrame(index=indices, columns=columns)
# new_df =