feat(tf): transform dataframe to tensorflow dataset

This commit is contained in:
Mahdi Dibaiee 2019-02-12 08:41:33 +03:30
parent ef604661ca
commit 4318cf71be
4 changed files with 77 additions and 18 deletions

18
constants.py Normal file
View File

@ -0,0 +1,18 @@
import os
directory = os.path.dirname(os.path.abspath(__file__))
GEODATA = os.path.join(directory, 'geodata')
ECOREGIONS = os.path.join(GEODATA, 'ecoregions', 'single-parts.shp')
ELEVATION = os.path.join(GEODATA, 'srtm', 'topo30-180.tif')
TEMP = os.path.join(GEODATA, 'air_temp')
PRECIP = os.path.join(GEODATA, 'precipitation')
MIN_YEAR = 1900
MAX_YEAR = 2017
SEASONS = ['winter', 'spring', 'summer', 'autumn']
WINTER_MONTHS = ['december', 'january', 'february']
SPRING_MONTHS = ['march', 'april', 'may']
SUMMER_MONTHS = ['june', 'july', 'august']
AUTUMN_MONTHS = ['september', 'november', 'october']

25
data.py
View File

@ -1,19 +1,12 @@
import geopandas
import os
import rasterio
import pandas as pd
import numpy as np
import time
from matplotlib import pyplot
from shapely.geometry import Point
from constants import *
directory = os.path.dirname(os.path.abspath(__file__))
GEODATA = os.path.join(directory, 'geodata')
ECOREGIONS = os.path.join(GEODATA, 'ecoregions', 'single-parts.shp')
ELEVATION = os.path.join(GEODATA, 'srtm', 'topo30-180.tif')
TEMP = os.path.join(GEODATA, 'air_temp')
PRECIP = os.path.join(GEODATA, 'precipitation')
def read_temp_data(year):
return pd.read_csv(os.path.join(TEMP, 'air_temp.{}'.format(year)), sep='\s+', header=None,
@ -38,8 +31,6 @@ elevation_data = elevation.read(1)
temp = {}
precip = {}
MIN_YEAR = 1900
MAX_YEAR = 2017
for year in range(MIN_YEAR, MAX_YEAR + 1):
temp[year] = read_temp_data(year)
precip[year] = read_precip_data(year)
@ -52,7 +43,7 @@ boundary = world.boundary
temp_precip_columns = []
for year in range(MIN_YEAR, MAX_YEAR + 1):
for s in ['winter', 'spring', 'summer', 'autumn']:
for s in SEASONS:
temp_precip_columns += ['temp_{}_{}'.format(s, year), 'precip_{}_{}'.format(s, year)]
columns = ['biome_num', 'biome_name', 'elevation', 'distance_to_water'] + temp_precip_columns
@ -86,14 +77,14 @@ def get_point_information(longitude, latitude):
winter_temp = [yt.january, yt.february] + ([yearly_temp[year - 1].december] if year > MIN_YEAR else [])
winter_precip = [yp.january, yp.february] + ([yearly_precip[year - 1].december] if year > MIN_YEAR else [])
spring_temp = [yt[month] for month in ['march', 'april', 'may']]
spring_precip = [yp[month] for month in ['march', 'april', 'may']]
spring_temp = [yt[month] for month in SPRING_MONTHS]
spring_precip = [yp[month] for month in SPRING_MONTHS]
summer_temp = [yt[month] for month in ['june', 'july', 'august']]
summer_precip = [yp[month] for month in ['june', 'july', 'august']]
summer_temp = [yt[month] for month in SUMMER_MONTHS]
summer_precip = [yp[month] for month in SUMMER_MONTHS]
autumn_temp = [yt[month] for month in ['september', 'november', 'october']]
autumn_precip = [yp[month] for month in ['september', 'november', 'october']]
autumn_temp = [yt[month] for month in AUTUMN_MONTHS]
autumn_precip = [yp[month] for month in AUTUMN_MONTHS]
item['temp_winter_{}'.format(year)] = np.mean(winter_temp)
item['precip_winter_{}'.format(year)] = np.mean(winter_temp)

13
nn.py
View File

@ -7,7 +7,18 @@ from tensorflow import keras
# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import data
from utils import *
tf.enable_eager_execution()
df = pd.read_pickle('data_distance.p')
# print(df.head())
dataset = dataframe_to_dataset_biomes(df)
for feature, target in dataset:
print('{} => {}'.format(feature, target))
print(tf.__version__)

39
utils.py Normal file
View File

@ -0,0 +1,39 @@
import numpy as np
import tensorflow as tf
import pandas as pd
from constants import *
inputs = ['elevation', 'distance_to_water']
output = 'biome_num'
def dataframe_to_dataset_biomes(df):
rows = df.shape[0]
# 8 for seasonal temp and precipitation
# 3 for latitude, elevation and distance_to_water
columns = 11
tf_inputs = np.empty((0, columns))
tf_output = np.empty((0))
latitude = np.array(df.index.get_level_values(1))
longitude = np.array(df.index.get_level_values(0))
for year in range(MIN_YEAR, MAX_YEAR + 1):
local_inputs = list(inputs)
for season in SEASONS:
local_inputs += [
'temp_{}_{}'.format(season, year),
'precip_{}_{}'.format(season, year)
]
local_df = df[local_inputs]
local_df.loc[:, 'latitude'] = pd.Series(latitude, index=local_df.index)
tf_inputs = np.concatenate((tf_inputs, local_df.values), axis=0)
tf_output = np.concatenate((tf_output, df[output].values), axis=0)
tf_inputs = tf.cast(tf_inputs, tf.float32)
tf_output = tf.cast(tf_output, tf.int32)
return tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))