diff --git a/constants.py b/constants.py new file mode 100644 index 0000000..7cd3b38 --- /dev/null +++ b/constants.py @@ -0,0 +1,18 @@ +import os + +directory = os.path.dirname(os.path.abspath(__file__)) + +GEODATA = os.path.join(directory, 'geodata') +ECOREGIONS = os.path.join(GEODATA, 'ecoregions', 'single-parts.shp') +ELEVATION = os.path.join(GEODATA, 'srtm', 'topo30-180.tif') +TEMP = os.path.join(GEODATA, 'air_temp') +PRECIP = os.path.join(GEODATA, 'precipitation') + +MIN_YEAR = 1900 +MAX_YEAR = 2017 + +SEASONS = ['winter', 'spring', 'summer', 'autumn'] +WINTER_MONTHS = ['december', 'january', 'february'] +SPRING_MONTHS = ['march', 'april', 'may'] +SUMMER_MONTHS = ['june', 'july', 'august'] +AUTUMN_MONTHS = ['september', 'november', 'october'] diff --git a/data.py b/data.py index d3a543c..5e507e4 100644 --- a/data.py +++ b/data.py @@ -1,19 +1,12 @@ import geopandas -import os import rasterio import pandas as pd import numpy as np import time from matplotlib import pyplot from shapely.geometry import Point +from constants import * -directory = os.path.dirname(os.path.abspath(__file__)) - -GEODATA = os.path.join(directory, 'geodata') -ECOREGIONS = os.path.join(GEODATA, 'ecoregions', 'single-parts.shp') -ELEVATION = os.path.join(GEODATA, 'srtm', 'topo30-180.tif') -TEMP = os.path.join(GEODATA, 'air_temp') -PRECIP = os.path.join(GEODATA, 'precipitation') def read_temp_data(year): return pd.read_csv(os.path.join(TEMP, 'air_temp.{}'.format(year)), sep='\s+', header=None, @@ -38,8 +31,6 @@ elevation_data = elevation.read(1) temp = {} precip = {} -MIN_YEAR = 1900 -MAX_YEAR = 2017 for year in range(MIN_YEAR, MAX_YEAR + 1): temp[year] = read_temp_data(year) precip[year] = read_precip_data(year) @@ -52,7 +43,7 @@ boundary = world.boundary temp_precip_columns = [] for year in range(MIN_YEAR, MAX_YEAR + 1): - for s in ['winter', 'spring', 'summer', 'autumn']: + for s in SEASONS: temp_precip_columns += ['temp_{}_{}'.format(s, year), 'precip_{}_{}'.format(s, year)] columns = ['biome_num', 'biome_name', 'elevation', 'distance_to_water'] + temp_precip_columns @@ -86,14 +77,14 @@ def get_point_information(longitude, latitude): winter_temp = [yt.january, yt.february] + ([yearly_temp[year - 1].december] if year > MIN_YEAR else []) winter_precip = [yp.january, yp.february] + ([yearly_precip[year - 1].december] if year > MIN_YEAR else []) - spring_temp = [yt[month] for month in ['march', 'april', 'may']] - spring_precip = [yp[month] for month in ['march', 'april', 'may']] + spring_temp = [yt[month] for month in SPRING_MONTHS] + spring_precip = [yp[month] for month in SPRING_MONTHS] - summer_temp = [yt[month] for month in ['june', 'july', 'august']] - summer_precip = [yp[month] for month in ['june', 'july', 'august']] + summer_temp = [yt[month] for month in SUMMER_MONTHS] + summer_precip = [yp[month] for month in SUMMER_MONTHS] - autumn_temp = [yt[month] for month in ['september', 'november', 'october']] - autumn_precip = [yp[month] for month in ['september', 'november', 'october']] + autumn_temp = [yt[month] for month in AUTUMN_MONTHS] + autumn_precip = [yp[month] for month in AUTUMN_MONTHS] item['temp_winter_{}'.format(year)] = np.mean(winter_temp) item['precip_winter_{}'.format(year)] = np.mean(winter_temp) diff --git a/nn.py b/nn.py index b49999f..7e00fbc 100644 --- a/nn.py +++ b/nn.py @@ -7,7 +7,18 @@ from tensorflow import keras # Helper libraries import numpy as np import matplotlib.pyplot as plt +import pandas as pd -import data +from utils import * + +tf.enable_eager_execution() + +df = pd.read_pickle('data_distance.p') +# print(df.head()) + +dataset = dataframe_to_dataset_biomes(df) + +for feature, target in dataset: + print('{} => {}'.format(feature, target)) print(tf.__version__) diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..84615c1 --- /dev/null +++ b/utils.py @@ -0,0 +1,39 @@ +import numpy as np +import tensorflow as tf +import pandas as pd +from constants import * + +inputs = ['elevation', 'distance_to_water'] +output = 'biome_num' + +def dataframe_to_dataset_biomes(df): + rows = df.shape[0] + + # 8 for seasonal temp and precipitation + # 3 for latitude, elevation and distance_to_water + columns = 11 + + tf_inputs = np.empty((0, columns)) + tf_output = np.empty((0)) + latitude = np.array(df.index.get_level_values(1)) + longitude = np.array(df.index.get_level_values(0)) + + for year in range(MIN_YEAR, MAX_YEAR + 1): + local_inputs = list(inputs) + for season in SEASONS: + local_inputs += [ + 'temp_{}_{}'.format(season, year), + 'precip_{}_{}'.format(season, year) + ] + + + local_df = df[local_inputs] + local_df.loc[:, 'latitude'] = pd.Series(latitude, index=local_df.index) + + tf_inputs = np.concatenate((tf_inputs, local_df.values), axis=0) + tf_output = np.concatenate((tf_output, df[output].values), axis=0) + + tf_inputs = tf.cast(tf_inputs, tf.float32) + tf_output = tf.cast(tf_output, tf.int32) + + return tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))