refactor(data): include latitude longitude in columns, not indices
This commit is contained in:
parent
865cc775ed
commit
3dcafddb8c
1
.floydexpt
Normal file
1
.floydexpt
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"name": "world", "namespace": "mdibaiee", "family_id": "prj_HzeYYJXLyy2otH6W"}
|
19
.floydignore
Normal file
19
.floydignore
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
maps
|
||||||
|
logs
|
||||||
|
checkpoints.*
|
||||||
|
geodata
|
||||||
|
*.p
|
||||||
|
|
||||||
|
# Directories and files to ignore when uploading code to floyd
|
||||||
|
|
||||||
|
.git
|
||||||
|
.eggs
|
||||||
|
eggs
|
||||||
|
lib
|
||||||
|
lib64
|
||||||
|
parts
|
||||||
|
sdist
|
||||||
|
var
|
||||||
|
*.pyc
|
||||||
|
*.swp
|
||||||
|
.DS_Store
|
Binary file not shown.
19
data.py
19
data.py
@ -46,9 +46,8 @@ for year in range(MIN_YEAR, MAX_YEAR + 1):
|
|||||||
for s in SEASONS:
|
for s in SEASONS:
|
||||||
temp_precip_columns += ['temp_{}_{}'.format(s, year), 'precip_{}_{}'.format(s, year)]
|
temp_precip_columns += ['temp_{}_{}'.format(s, year), 'precip_{}_{}'.format(s, year)]
|
||||||
|
|
||||||
columns = ['biome_num', 'biome_name', 'elevation', 'distance_to_water'] + temp_precip_columns
|
columns = ['longitude', 'latitude', 'biome_num', 'biome_name', 'elevation', 'distance_to_water'] + temp_precip_columns
|
||||||
indices = ['longitude', 'latitude']
|
final_data = pd.DataFrame(columns=columns)
|
||||||
final_data = pd.DataFrame(index=indices, columns=columns)
|
|
||||||
|
|
||||||
def get_point_information(longitude, latitude):
|
def get_point_information(longitude, latitude):
|
||||||
item = {}
|
item = {}
|
||||||
@ -57,6 +56,8 @@ def get_point_information(longitude, latitude):
|
|||||||
if ecoregion.empty:
|
if ecoregion.empty:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
item['longitude'] = longitude
|
||||||
|
item['latitude'] = latitude
|
||||||
item['biome_num'] = ecoregion.BIOME_NUM.iloc[0]
|
item['biome_num'] = ecoregion.BIOME_NUM.iloc[0]
|
||||||
item['biome_name'] = ecoregion.BIOME_NAME.iloc[0]
|
item['biome_name'] = ecoregion.BIOME_NAME.iloc[0]
|
||||||
|
|
||||||
@ -100,18 +101,18 @@ def get_point_information(longitude, latitude):
|
|||||||
|
|
||||||
return item
|
return item
|
||||||
|
|
||||||
data_indices = []
|
data = {}
|
||||||
data_map = {}
|
|
||||||
for col in columns:
|
for col in columns:
|
||||||
data_map[col] = {}
|
data[col] = []
|
||||||
|
|
||||||
i = 0
|
# i = 0
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
for longitude in range(-179, 179):
|
for longitude in range(-179, 179):
|
||||||
print('-', end='')
|
print('-', end='')
|
||||||
for latitude in range(-89, 89):
|
for latitude in range(-89, 89):
|
||||||
|
|
||||||
# generate data and save to file
|
# generate data and save to file
|
||||||
d = get_point_information(longitude, latitude)
|
d = get_point_information(longitude, latitude)
|
||||||
if d == False:
|
if d == False:
|
||||||
@ -119,7 +120,7 @@ for longitude in range(-179, 179):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
for key, value in d.items():
|
for key, value in d.items():
|
||||||
data_map[key][(longitude, latitude)] = value
|
data[key].append(value)
|
||||||
|
|
||||||
print('+', end='')
|
print('+', end='')
|
||||||
|
|
||||||
@ -128,7 +129,7 @@ for longitude in range(-179, 179):
|
|||||||
print("--- Calculations: %s seconds ---" % (time.time() - start_time))
|
print("--- Calculations: %s seconds ---" % (time.time() - start_time))
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
df = pd.DataFrame(data_map)
|
df = pd.DataFrame(data)
|
||||||
print("--- Generating DataFrame: %s seconds ---" % (time.time() - start_time))
|
print("--- Generating DataFrame: %s seconds ---" % (time.time() - start_time))
|
||||||
print(df)
|
print(df)
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
19
demo.py
Normal file
19
demo.py
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from utils import *
|
||||||
|
|
||||||
|
df = pd.read_pickle('data_final.p')
|
||||||
|
df.to_csv('data_final.csv')
|
||||||
|
|
||||||
|
print('DataFrame:')
|
||||||
|
print(df)
|
||||||
|
|
||||||
|
dataset_size, features, output_size, _ = dataframe_to_dataset_biomes(df)
|
||||||
|
print('Biomes dataset:\n - size: {}\n - inputs: {}\n - outputs: {}\n'.format(dataset_size, features, output_size))
|
||||||
|
|
||||||
|
dataset_size, features, output_size, _ = dataframe_to_dataset_temp_precip(df)
|
||||||
|
print('Temp/Precip dataset:\n - size: {}\n - inputs: {}\n - outputs: {}\n'.format(dataset_size, features, output_size))
|
||||||
|
|
||||||
|
# print('Normalized Data:')
|
||||||
|
# print(normalize_df(df))
|
||||||
|
|
||||||
|
# normalize_df(df).to_csv('data_normalized.csv')
|
6
draw.py
6
draw.py
@ -10,8 +10,8 @@ def draw(df, path=None):
|
|||||||
biome_numbers = df['biome_num'].unique()
|
biome_numbers = df['biome_num'].unique()
|
||||||
# biome_names = df['biome_name'].unique()
|
# biome_names = df['biome_name'].unique()
|
||||||
|
|
||||||
for (longitude, latitude), row in df.iterrows():
|
for i, row in df.iterrows():
|
||||||
p = Point(longitude, latitude)
|
p = Point(row.longitude, row.latitude)
|
||||||
if row.biome_num in biomes:
|
if row.biome_num in biomes:
|
||||||
biomes[row.biome_num].append(p)
|
biomes[row.biome_num].append(p)
|
||||||
else:
|
else:
|
||||||
@ -55,5 +55,5 @@ def draw(df, path=None):
|
|||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
df = pd.read_pickle('data_final.p')
|
df = pd.read_pickle('data.p')
|
||||||
draw(df)
|
draw(df)
|
||||||
|
23
floyd.yml
Normal file
23
floyd.yml
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# see: https://docs.floydhub.com/floyd_config
|
||||||
|
# All supported configs:
|
||||||
|
#
|
||||||
|
#machine: cpu
|
||||||
|
#env: tensorflow-1.8
|
||||||
|
#input:
|
||||||
|
# - destination: input
|
||||||
|
# source: foo/datasets/yelp-food/1
|
||||||
|
# - foo/datasets/yelp-food-test/1:test
|
||||||
|
#description: this is a test
|
||||||
|
#max_runtime: 3600
|
||||||
|
#command: python train.py
|
||||||
|
|
||||||
|
# You can also define multiple tasks to use with --task argument:
|
||||||
|
#
|
||||||
|
#task:
|
||||||
|
# evaluate:
|
||||||
|
# machine: gpu
|
||||||
|
# command: python evaluate.py
|
||||||
|
#
|
||||||
|
# serve:
|
||||||
|
# machine: cpu
|
||||||
|
# mode: serve
|
25
nn.py
25
nn.py
@ -14,12 +14,14 @@ from utils import *
|
|||||||
|
|
||||||
RANDOM_SEED = 1
|
RANDOM_SEED = 1
|
||||||
|
|
||||||
tf.enable_eager_execution()
|
print(tf.__version__)
|
||||||
|
|
||||||
|
# tf.enable_eager_execution()
|
||||||
|
|
||||||
tf.set_random_seed(RANDOM_SEED)
|
tf.set_random_seed(RANDOM_SEED)
|
||||||
np.random.seed(RANDOM_SEED)
|
np.random.seed(RANDOM_SEED)
|
||||||
|
|
||||||
df = pd.read_pickle('data_final.p')
|
df = pd.read_pickle('data.p')
|
||||||
|
|
||||||
class Model():
|
class Model():
|
||||||
def __init__(self, name, batch_size=16, shuffle_buffer_size=500, learning_rate=0.001, epochs=1):
|
def __init__(self, name, batch_size=16, shuffle_buffer_size=500, learning_rate=0.001, epochs=1):
|
||||||
@ -40,17 +42,21 @@ class Model():
|
|||||||
(training, test) = (self.dataset.take(self.TRAIN_SIZE).batch(self.batch_size).repeat(),
|
(training, test) = (self.dataset.take(self.TRAIN_SIZE).batch(self.batch_size).repeat(),
|
||||||
self.dataset.skip(self.TRAIN_SIZE).batch(self.batch_size).repeat())
|
self.dataset.skip(self.TRAIN_SIZE).batch(self.batch_size).repeat())
|
||||||
|
|
||||||
|
print('dataset: size={}, train={}, test={}'.format(dataset_size, self.TRAIN_SIZE, self.TEST_SIZE))
|
||||||
|
print('input_size={}'.format(features))
|
||||||
|
|
||||||
self.dataset_size = dataset_size
|
self.dataset_size = dataset_size
|
||||||
self.features = features
|
self.features = features
|
||||||
self.output_size = output_size
|
self.output_size = output_size
|
||||||
self.training = training
|
self.training = training
|
||||||
self.test = test
|
self.test = test
|
||||||
|
|
||||||
def create_model(self, layers, out_activation):
|
def create_model(self, layers, out_activation=None):
|
||||||
params = {
|
params = {
|
||||||
'kernel_initializer': 'lecun_uniform',
|
'kernel_initializer': 'lecun_uniform',
|
||||||
'bias_initializer': 'zeros',
|
'bias_initializer': 'zeros',
|
||||||
}
|
}
|
||||||
|
# dropout = keras.layersDropout(0.2, input_shape=[self.features])
|
||||||
self.model = keras.Sequential([
|
self.model = keras.Sequential([
|
||||||
keras.layers.Dense(layers[0], activation=tf.nn.elu, input_shape=[self.features], **params)
|
keras.layers.Dense(layers[0], activation=tf.nn.elu, input_shape=[self.features], **params)
|
||||||
] + [
|
] + [
|
||||||
@ -60,7 +66,7 @@ class Model():
|
|||||||
])
|
])
|
||||||
|
|
||||||
def compile(self, loss='mse', metrics=['accuracy'], optimizer=tf.train.AdamOptimizer):
|
def compile(self, loss='mse', metrics=['accuracy'], optimizer=tf.train.AdamOptimizer):
|
||||||
# self.model.load_weights(self.path)
|
self.model.load_weights(self.path)
|
||||||
optimizer = optimizer(self.learning_rate)
|
optimizer = optimizer(self.learning_rate)
|
||||||
|
|
||||||
self.model.compile(loss=loss,
|
self.model.compile(loss=loss,
|
||||||
@ -79,7 +85,7 @@ class Model():
|
|||||||
self.model.summary()
|
self.model.summary()
|
||||||
|
|
||||||
checkpoint = keras.callbacks.ModelCheckpoint(self.path, monitor='acc', verbose=1, mode='max')
|
checkpoint = keras.callbacks.ModelCheckpoint(self.path, monitor='acc', verbose=1, mode='max')
|
||||||
tensorboard = keras.callbacks.TensorBoard(log_dir='./logs')
|
tensorboard = keras.callbacks.TensorBoard(log_dir='./logs', update_freq='epoch')
|
||||||
# map_callback = keras.callbacks.LambdaCallback(on_epoch_end=self.map_callback)
|
# map_callback = keras.callbacks.LambdaCallback(on_epoch_end=self.map_callback)
|
||||||
|
|
||||||
self.model.fit(
|
self.model.fit(
|
||||||
@ -95,16 +101,17 @@ class Model():
|
|||||||
return np.argmax(self.model.predict(a), axis=1)
|
return np.argmax(self.model.predict(a), axis=1)
|
||||||
|
|
||||||
A = Model('a', epochs=2)
|
A = Model('a', epochs=2)
|
||||||
B = Model('b', learning_rate=0.005, epochs=100)
|
B = Model('b', learning_rate=0.001, epochs=450)
|
||||||
|
|
||||||
def compile_b():
|
def compile_b():
|
||||||
B.prepare_dataset(df, dataframe_to_dataset_biomes)
|
B.prepare_dataset(df, dataframe_to_dataset_biomes)
|
||||||
B.create_model([64, 128], tf.nn.softmax)
|
B.create_model([32], tf.nn.softmax)
|
||||||
B.compile(loss='sparse_categorical_crossentropy')
|
B.compile(loss='sparse_categorical_crossentropy')
|
||||||
|
|
||||||
def compile_a():
|
def compile_a():
|
||||||
A.prepare_dataset(df, dataframe_to_dataset_temp_precip)
|
A.prepare_dataset(df, dataframe_to_dataset_temp_precip)
|
||||||
A.create_model([(4, tf.nn.elu)])
|
A.create_model([(4, tf.nn.elu)])
|
||||||
|
# A.create_model([]) # linear model
|
||||||
A.compile(metrics=['accuracy', 'mae'])
|
A.compile(metrics=['accuracy', 'mae'])
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
@ -118,5 +125,5 @@ if __name__ == "__main__":
|
|||||||
# print(np.unique(predictions))
|
# print(np.unique(predictions))
|
||||||
# print('loss: {}, evaluation: {}'.format(*B.evaluate()))
|
# print('loss: {}, evaluation: {}'.format(*B.evaluate()))
|
||||||
|
|
||||||
compile_a()
|
# compile_a()
|
||||||
A.train()
|
# A.train()
|
||||||
|
30
predict.py
30
predict.py
@ -1,7 +1,7 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from utils import *
|
from utils import *
|
||||||
from nn import B
|
from nn import B, compile_b
|
||||||
from draw import draw
|
from draw import draw
|
||||||
import time
|
import time
|
||||||
|
|
||||||
@ -10,16 +10,14 @@ def chunker(seq, size):
|
|||||||
|
|
||||||
year = MAX_YEAR - 1
|
year = MAX_YEAR - 1
|
||||||
|
|
||||||
df = pd.read_pickle('data_final.p')
|
df = pd.read_pickle('data.p')
|
||||||
latitude = np.array(df.index.get_level_values(1))
|
|
||||||
df.loc[:, 'latitude'] = pd.Series(latitude, index=df.index)
|
|
||||||
|
|
||||||
compile_b()
|
compile_b()
|
||||||
|
|
||||||
for change in range(0, 1):
|
for change in range(0, 1):
|
||||||
print('TEMPERATURE MODIFICATION OF {}'.format(change))
|
print('TEMPERATURE MODIFICATION OF {}'.format(change))
|
||||||
|
|
||||||
inputs = ['elevation', 'distance_to_water']
|
inputs = ['latitude', 'longitude', 'elevation', 'distance_to_water']
|
||||||
|
|
||||||
for season in SEASONS:
|
for season in SEASONS:
|
||||||
inputs += [
|
inputs += [
|
||||||
@ -27,22 +25,28 @@ for change in range(0, 1):
|
|||||||
'precip_{}_{}'.format(season, year)
|
'precip_{}_{}'.format(season, year)
|
||||||
]
|
]
|
||||||
|
|
||||||
inputs += ['latitude']
|
# print(inputs)
|
||||||
|
|
||||||
frame = df[inputs]
|
frame = df[inputs]
|
||||||
print(frame.head())
|
# print(frame.head())
|
||||||
|
|
||||||
for season in SEASONS:
|
for season in SEASONS:
|
||||||
frame.loc[:, 'temp_{}_{}'.format(season, year)] += change
|
frame.loc[:, 'temp_{}_{}'.format(season, year)] += change
|
||||||
|
|
||||||
columns = ['biome_num']
|
columns = ['latitude', 'longitude', 'biome_num']
|
||||||
new_data = pd.DataFrame(columns=columns)
|
new_data = pd.DataFrame(columns=columns)
|
||||||
|
|
||||||
for i, chunk in enumerate(chunker(frame, B.batch_size)):
|
for i, chunk in enumerate(chunker(frame, B.batch_size)):
|
||||||
input_data = normalize_ndarray(chunk.values)
|
if chunk.shape[0] < B.batch_size:
|
||||||
|
continue
|
||||||
|
input_data = normalize_ndarray(chunk.loc[:, chunk.columns != 'longitude'].values)
|
||||||
out = B.predict(input_data)
|
out = B.predict(input_data)
|
||||||
new_index = np.concatenate((chunk.index.values, new_data.index.values))
|
|
||||||
|
|
||||||
new_data = new_data.reindex(new_index)
|
f = pd.DataFrame({
|
||||||
new_data.loc[chunk.index.values, 'biome_num'] = out
|
'longitude': chunk.loc[:, 'longitude'],
|
||||||
|
'latitude': chunk.loc[:, 'latitude'],
|
||||||
|
'biome_num': out
|
||||||
|
}, columns=columns)
|
||||||
|
new_data = new_data.append(f)
|
||||||
|
|
||||||
|
print(new_data)
|
||||||
draw(new_data)
|
draw(new_data)
|
||||||
|
@ -4,6 +4,6 @@ matplotlib==3.0.2
|
|||||||
descartes==1.1.0
|
descartes==1.1.0
|
||||||
pysal==2.0.0
|
pysal==2.0.0
|
||||||
rasterio==1.0.15
|
rasterio==1.0.15
|
||||||
tensorflow==1.12.0
|
tensorflow==1.13.1
|
||||||
Cartopy==0.17.0
|
Cartopy==0.17.0
|
||||||
numpy==1.16.1
|
numpy==1.16.1
|
||||||
|
89
tracks
Normal file
89
tracks
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
Layer (type) Output Shape Param #
|
||||||
|
=================================================================
|
||||||
|
Group 1
|
||||||
|
-----------------------------------------------------------------
|
||||||
|
dense (Dense) (None, 128) 1536
|
||||||
|
_________________________________________________________________
|
||||||
|
dense_1 (Dense) (None, 256) 33024
|
||||||
|
_________________________________________________________________
|
||||||
|
dense_2 (Dense) (None, 14) 3598
|
||||||
|
-----------------------------------------------------------------
|
||||||
|
Total params: 38,158
|
||||||
|
1 Epoch: loss: 0.3822 - acc: 0.8684
|
||||||
|
Learning rate: 0.005
|
||||||
|
=================================================================
|
||||||
|
|
||||||
|
Group 2
|
||||||
|
-----------------------------------------------------------------
|
||||||
|
dense (Dense) (None, 32) 384
|
||||||
|
_________________________________________________________________
|
||||||
|
dense_1 (Dense) (None, 64) 2112
|
||||||
|
_________________________________________________________________
|
||||||
|
dense_2 (Dense) (None, 32) 2080
|
||||||
|
_________________________________________________________________
|
||||||
|
dense_3 (Dense) (None, 14) 462
|
||||||
|
-----------------------------------------------------------------
|
||||||
|
Total params: 5,038
|
||||||
|
1 Epoch: loss: 0.3760 - acc: 0.8678 @ 20minutes
|
||||||
|
Stopped converging, loss increasing
|
||||||
|
Learning rate: 0.005
|
||||||
|
=================================================================
|
||||||
|
|
||||||
|
Group 3
|
||||||
|
-----------------------------------------------------------------
|
||||||
|
dense (Dense) (None, 16) 192
|
||||||
|
_________________________________________________________________
|
||||||
|
dense_1 (Dense) (None, 32) 544
|
||||||
|
_________________________________________________________________
|
||||||
|
dense_2 (Dense) (None, 16) 528
|
||||||
|
_________________________________________________________________
|
||||||
|
dense_3 (Dense) (None, 14) 238
|
||||||
|
-----------------------------------------------------------------
|
||||||
|
Total params: 1,502
|
||||||
|
1 Epoch: loss: 0.3702 - acc: 0.8671 @ 12minutes
|
||||||
|
10 Epochs: loss: 0.3280 - acc: 0.8815
|
||||||
|
Stopped converging after 5 epochs, was oscillating
|
||||||
|
Learning rate: 0.005
|
||||||
|
=================================================================
|
||||||
|
|
||||||
|
Group 4
|
||||||
|
_________________________________________________________________
|
||||||
|
dense (Dense) (None, 12) 144
|
||||||
|
_________________________________________________________________
|
||||||
|
dense_1 (Dense) (None, 14) 182
|
||||||
|
_________________________________________________________________
|
||||||
|
Total params: 326
|
||||||
|
1 Epoch: loss: 0.4412 - acc: 0.8457 @ 10m
|
||||||
|
60 Epochs: loss: 0.4146 - acc: 0.8546
|
||||||
|
Stopped converging
|
||||||
|
Learning rate: 0.005
|
||||||
|
=================================================================
|
||||||
|
|
||||||
|
Group 5
|
||||||
|
_________________________________________________________________
|
||||||
|
dense (Dense) (None, 12) 144
|
||||||
|
_________________________________________________________________
|
||||||
|
dense_1 (Dense) (None, 14) 182
|
||||||
|
_________________________________________________________________
|
||||||
|
Total params: 326
|
||||||
|
1 Epoch: loss: 0.5057 - acc: 0.8268 @ 10m
|
||||||
|
15 epoch: loss: 0.4240 - acc: 0.8481
|
||||||
|
Stopped converging
|
||||||
|
Learning rate: 0.001
|
||||||
|
=================================================================
|
||||||
|
|
||||||
|
Group 6
|
||||||
|
_________________________________________________________________
|
||||||
|
Layer (type) Output Shape Param #
|
||||||
|
=================================================================
|
||||||
|
dense (Dense) (None, 24) 288
|
||||||
|
_________________________________________________________________
|
||||||
|
dense_1 (Dense) (None, 14) 350
|
||||||
|
_________________________________________________________________
|
||||||
|
Total params: 638
|
||||||
|
1 Epoch: loss: 0.4520 - acc: 0.8416 @ 12m
|
||||||
|
30 epochs: loss: 0.3562 - acc: 0.8691, still converging
|
||||||
|
stopped converging after 100 epochs
|
||||||
|
Learning rate: 0.001
|
||||||
|
|
||||||
|
|
29
utils.py
29
utils.py
@ -3,7 +3,7 @@ import tensorflow as tf
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from constants import *
|
from constants import *
|
||||||
|
|
||||||
inputs = ['elevation', 'distance_to_water']
|
inputs = ['elevation', 'distance_to_water', 'latitude']
|
||||||
output = 'biome_num'
|
output = 'biome_num'
|
||||||
|
|
||||||
def normalize(v):
|
def normalize(v):
|
||||||
@ -18,7 +18,7 @@ def normalize_ndarray(ar):
|
|||||||
|
|
||||||
def normalize_df(df):
|
def normalize_df(df):
|
||||||
for col in df.columns:
|
for col in df.columns:
|
||||||
df.loc[col] = normalize(df[col])
|
df.loc[col] = normalize_ndarray(df[col])
|
||||||
|
|
||||||
return df
|
return df
|
||||||
|
|
||||||
@ -29,9 +29,24 @@ def dataframe_to_dataset_biomes(df):
|
|||||||
# 3 for latitude, elevation and distance_to_water
|
# 3 for latitude, elevation and distance_to_water
|
||||||
columns = 11
|
columns = 11
|
||||||
|
|
||||||
|
# make biomes uniformly distributed so each biome has enough data to avoid a biased dataset
|
||||||
|
biome_shares = df.groupby(['biome_num']).agg({ 'biome_num': lambda x: x.count() / df.shape[0] })
|
||||||
|
max_share = np.max(biome_shares['biome_num'])
|
||||||
|
dsize = df.shape[0]
|
||||||
|
max_share_count = int(max_share * dsize)
|
||||||
|
|
||||||
|
for biome_num in biome_shares.index:
|
||||||
|
share = biome_shares.values[biome_num][0]
|
||||||
|
share_count = int(share * dsize)
|
||||||
|
diff = max_share_count - share_count
|
||||||
|
rows = df.loc[df['biome_num'] == biome_num]
|
||||||
|
diff_ratio = int(diff / rows.shape[0])
|
||||||
|
df = pd.concat([df] + [rows] * diff_ratio, ignore_index=True)
|
||||||
|
|
||||||
|
# print(df.groupby(['biome_num']).agg({ 'biome_num': lambda x: x.count() / df.shape[0] }))
|
||||||
|
|
||||||
tf_inputs = np.empty((0, columns))
|
tf_inputs = np.empty((0, columns))
|
||||||
tf_output = np.empty((0))
|
tf_output = np.empty((0))
|
||||||
latitude = np.array(df.index.get_level_values(1))
|
|
||||||
|
|
||||||
for year in range(MIN_YEAR, MAX_YEAR + 1):
|
for year in range(MIN_YEAR, MAX_YEAR + 1):
|
||||||
local_inputs = list(inputs)
|
local_inputs = list(inputs)
|
||||||
@ -43,7 +58,6 @@ def dataframe_to_dataset_biomes(df):
|
|||||||
|
|
||||||
|
|
||||||
local_df = df[local_inputs]
|
local_df = df[local_inputs]
|
||||||
local_df.loc[:, 'latitude'] = pd.Series(latitude, index=local_df.index)
|
|
||||||
|
|
||||||
tf_inputs = np.concatenate((tf_inputs, local_df.values), axis=0)
|
tf_inputs = np.concatenate((tf_inputs, local_df.values), axis=0)
|
||||||
tf_output = np.concatenate((tf_output, df[output].values), axis=0)
|
tf_output = np.concatenate((tf_output, df[output].values), axis=0)
|
||||||
@ -62,7 +76,6 @@ def dataframe_to_dataset_temp_precip(df):
|
|||||||
|
|
||||||
tf_inputs = np.empty((0, columns))
|
tf_inputs = np.empty((0, columns))
|
||||||
tf_output = np.empty((0, 2))
|
tf_output = np.empty((0, 2))
|
||||||
latitude = np.array(df.index.get_level_values(1))
|
|
||||||
|
|
||||||
for year in range(MIN_YEAR, MAX_YEAR + 1):
|
for year in range(MIN_YEAR, MAX_YEAR + 1):
|
||||||
local_inputs = list(inputs)
|
local_inputs = list(inputs)
|
||||||
@ -70,7 +83,6 @@ def dataframe_to_dataset_temp_precip(df):
|
|||||||
for idx, season in enumerate(SEASONS):
|
for idx, season in enumerate(SEASONS):
|
||||||
season_index = idx / len(season)
|
season_index = idx / len(season)
|
||||||
local_df = df[local_inputs]
|
local_df = df[local_inputs]
|
||||||
local_df.loc[:, 'latitude'] = pd.Series(latitude, index=local_df.index)
|
|
||||||
local_df.loc[:, 'season'] = pd.Series(np.repeat(season_index, rows), index=local_df.index)
|
local_df.loc[:, 'season'] = pd.Series(np.repeat(season_index, rows), index=local_df.index)
|
||||||
local_df.loc[:, 'year'] = pd.Series(np.repeat(year, rows), index=local_df.index)
|
local_df.loc[:, 'year'] = pd.Series(np.repeat(year, rows), index=local_df.index)
|
||||||
|
|
||||||
@ -79,7 +91,10 @@ def dataframe_to_dataset_temp_precip(df):
|
|||||||
tf_output = np.concatenate((tf_output, df[output].values), axis=0)
|
tf_output = np.concatenate((tf_output, df[output].values), axis=0)
|
||||||
|
|
||||||
tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32)
|
tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32)
|
||||||
tf_output = tf.cast(normalize_ndarray(tf_output), tf.float32)
|
tf_output = tf.cast(tf_output, tf.float32)
|
||||||
|
|
||||||
return int(tf_inputs.shape[0]), 5, 2, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
|
return int(tf_inputs.shape[0]), 5, 2, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
|
||||||
|
|
||||||
|
|
||||||
|
# df = pd.read_pickle('data.p')
|
||||||
|
# print(dataframe_to_dataset_biomes(df))
|
||||||
|
Loading…
Reference in New Issue
Block a user