feat(tf): transform dataframe to tensorflow dataset

This commit is contained in:
Mahdi Dibaiee 2019-02-12 08:41:33 +03:30
parent ef604661ca
commit 4318cf71be
4 changed files with 77 additions and 18 deletions

18
constants.py Normal file
View File

@ -0,0 +1,18 @@
import os
directory = os.path.dirname(os.path.abspath(__file__))
GEODATA = os.path.join(directory, 'geodata')
ECOREGIONS = os.path.join(GEODATA, 'ecoregions', 'single-parts.shp')
ELEVATION = os.path.join(GEODATA, 'srtm', 'topo30-180.tif')
TEMP = os.path.join(GEODATA, 'air_temp')
PRECIP = os.path.join(GEODATA, 'precipitation')
MIN_YEAR = 1900
MAX_YEAR = 2017
SEASONS = ['winter', 'spring', 'summer', 'autumn']
WINTER_MONTHS = ['december', 'january', 'february']
SPRING_MONTHS = ['march', 'april', 'may']
SUMMER_MONTHS = ['june', 'july', 'august']
AUTUMN_MONTHS = ['september', 'november', 'october']

25
data.py
View File

@ -1,19 +1,12 @@
import geopandas import geopandas
import os
import rasterio import rasterio
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import time import time
from matplotlib import pyplot from matplotlib import pyplot
from shapely.geometry import Point from shapely.geometry import Point
from constants import *
directory = os.path.dirname(os.path.abspath(__file__))
GEODATA = os.path.join(directory, 'geodata')
ECOREGIONS = os.path.join(GEODATA, 'ecoregions', 'single-parts.shp')
ELEVATION = os.path.join(GEODATA, 'srtm', 'topo30-180.tif')
TEMP = os.path.join(GEODATA, 'air_temp')
PRECIP = os.path.join(GEODATA, 'precipitation')
def read_temp_data(year): def read_temp_data(year):
return pd.read_csv(os.path.join(TEMP, 'air_temp.{}'.format(year)), sep='\s+', header=None, return pd.read_csv(os.path.join(TEMP, 'air_temp.{}'.format(year)), sep='\s+', header=None,
@ -38,8 +31,6 @@ elevation_data = elevation.read(1)
temp = {} temp = {}
precip = {} precip = {}
MIN_YEAR = 1900
MAX_YEAR = 2017
for year in range(MIN_YEAR, MAX_YEAR + 1): for year in range(MIN_YEAR, MAX_YEAR + 1):
temp[year] = read_temp_data(year) temp[year] = read_temp_data(year)
precip[year] = read_precip_data(year) precip[year] = read_precip_data(year)
@ -52,7 +43,7 @@ boundary = world.boundary
temp_precip_columns = [] temp_precip_columns = []
for year in range(MIN_YEAR, MAX_YEAR + 1): for year in range(MIN_YEAR, MAX_YEAR + 1):
for s in ['winter', 'spring', 'summer', 'autumn']: for s in SEASONS:
temp_precip_columns += ['temp_{}_{}'.format(s, year), 'precip_{}_{}'.format(s, year)] temp_precip_columns += ['temp_{}_{}'.format(s, year), 'precip_{}_{}'.format(s, year)]
columns = ['biome_num', 'biome_name', 'elevation', 'distance_to_water'] + temp_precip_columns columns = ['biome_num', 'biome_name', 'elevation', 'distance_to_water'] + temp_precip_columns
@ -86,14 +77,14 @@ def get_point_information(longitude, latitude):
winter_temp = [yt.january, yt.february] + ([yearly_temp[year - 1].december] if year > MIN_YEAR else []) winter_temp = [yt.january, yt.february] + ([yearly_temp[year - 1].december] if year > MIN_YEAR else [])
winter_precip = [yp.january, yp.february] + ([yearly_precip[year - 1].december] if year > MIN_YEAR else []) winter_precip = [yp.january, yp.february] + ([yearly_precip[year - 1].december] if year > MIN_YEAR else [])
spring_temp = [yt[month] for month in ['march', 'april', 'may']] spring_temp = [yt[month] for month in SPRING_MONTHS]
spring_precip = [yp[month] for month in ['march', 'april', 'may']] spring_precip = [yp[month] for month in SPRING_MONTHS]
summer_temp = [yt[month] for month in ['june', 'july', 'august']] summer_temp = [yt[month] for month in SUMMER_MONTHS]
summer_precip = [yp[month] for month in ['june', 'july', 'august']] summer_precip = [yp[month] for month in SUMMER_MONTHS]
autumn_temp = [yt[month] for month in ['september', 'november', 'october']] autumn_temp = [yt[month] for month in AUTUMN_MONTHS]
autumn_precip = [yp[month] for month in ['september', 'november', 'october']] autumn_precip = [yp[month] for month in AUTUMN_MONTHS]
item['temp_winter_{}'.format(year)] = np.mean(winter_temp) item['temp_winter_{}'.format(year)] = np.mean(winter_temp)
item['precip_winter_{}'.format(year)] = np.mean(winter_temp) item['precip_winter_{}'.format(year)] = np.mean(winter_temp)

13
nn.py
View File

@ -7,7 +7,18 @@ from tensorflow import keras
# Helper libraries # Helper libraries
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import pandas as pd
import data from utils import *
tf.enable_eager_execution()
df = pd.read_pickle('data_distance.p')
# print(df.head())
dataset = dataframe_to_dataset_biomes(df)
for feature, target in dataset:
print('{} => {}'.format(feature, target))
print(tf.__version__) print(tf.__version__)

39
utils.py Normal file
View File

@ -0,0 +1,39 @@
import numpy as np
import tensorflow as tf
import pandas as pd
from constants import *
inputs = ['elevation', 'distance_to_water']
output = 'biome_num'
def dataframe_to_dataset_biomes(df):
rows = df.shape[0]
# 8 for seasonal temp and precipitation
# 3 for latitude, elevation and distance_to_water
columns = 11
tf_inputs = np.empty((0, columns))
tf_output = np.empty((0))
latitude = np.array(df.index.get_level_values(1))
longitude = np.array(df.index.get_level_values(0))
for year in range(MIN_YEAR, MAX_YEAR + 1):
local_inputs = list(inputs)
for season in SEASONS:
local_inputs += [
'temp_{}_{}'.format(season, year),
'precip_{}_{}'.format(season, year)
]
local_df = df[local_inputs]
local_df.loc[:, 'latitude'] = pd.Series(latitude, index=local_df.index)
tf_inputs = np.concatenate((tf_inputs, local_df.values), axis=0)
tf_output = np.concatenate((tf_output, df[output].values), axis=0)
tf_inputs = tf.cast(tf_inputs, tf.float32)
tf_output = tf.cast(tf_output, tf.int32)
return tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))