feat(tf): transform dataframe to tensorflow dataset
This commit is contained in:
parent
ef604661ca
commit
4318cf71be
18
constants.py
Normal file
18
constants.py
Normal file
@ -0,0 +1,18 @@
|
||||
import os
|
||||
|
||||
directory = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
GEODATA = os.path.join(directory, 'geodata')
|
||||
ECOREGIONS = os.path.join(GEODATA, 'ecoregions', 'single-parts.shp')
|
||||
ELEVATION = os.path.join(GEODATA, 'srtm', 'topo30-180.tif')
|
||||
TEMP = os.path.join(GEODATA, 'air_temp')
|
||||
PRECIP = os.path.join(GEODATA, 'precipitation')
|
||||
|
||||
MIN_YEAR = 1900
|
||||
MAX_YEAR = 2017
|
||||
|
||||
SEASONS = ['winter', 'spring', 'summer', 'autumn']
|
||||
WINTER_MONTHS = ['december', 'january', 'february']
|
||||
SPRING_MONTHS = ['march', 'april', 'may']
|
||||
SUMMER_MONTHS = ['june', 'july', 'august']
|
||||
AUTUMN_MONTHS = ['september', 'november', 'october']
|
25
data.py
25
data.py
@ -1,19 +1,12 @@
|
||||
import geopandas
|
||||
import os
|
||||
import rasterio
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import time
|
||||
from matplotlib import pyplot
|
||||
from shapely.geometry import Point
|
||||
from constants import *
|
||||
|
||||
directory = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
GEODATA = os.path.join(directory, 'geodata')
|
||||
ECOREGIONS = os.path.join(GEODATA, 'ecoregions', 'single-parts.shp')
|
||||
ELEVATION = os.path.join(GEODATA, 'srtm', 'topo30-180.tif')
|
||||
TEMP = os.path.join(GEODATA, 'air_temp')
|
||||
PRECIP = os.path.join(GEODATA, 'precipitation')
|
||||
|
||||
def read_temp_data(year):
|
||||
return pd.read_csv(os.path.join(TEMP, 'air_temp.{}'.format(year)), sep='\s+', header=None,
|
||||
@ -38,8 +31,6 @@ elevation_data = elevation.read(1)
|
||||
|
||||
temp = {}
|
||||
precip = {}
|
||||
MIN_YEAR = 1900
|
||||
MAX_YEAR = 2017
|
||||
for year in range(MIN_YEAR, MAX_YEAR + 1):
|
||||
temp[year] = read_temp_data(year)
|
||||
precip[year] = read_precip_data(year)
|
||||
@ -52,7 +43,7 @@ boundary = world.boundary
|
||||
temp_precip_columns = []
|
||||
|
||||
for year in range(MIN_YEAR, MAX_YEAR + 1):
|
||||
for s in ['winter', 'spring', 'summer', 'autumn']:
|
||||
for s in SEASONS:
|
||||
temp_precip_columns += ['temp_{}_{}'.format(s, year), 'precip_{}_{}'.format(s, year)]
|
||||
|
||||
columns = ['biome_num', 'biome_name', 'elevation', 'distance_to_water'] + temp_precip_columns
|
||||
@ -86,14 +77,14 @@ def get_point_information(longitude, latitude):
|
||||
winter_temp = [yt.january, yt.february] + ([yearly_temp[year - 1].december] if year > MIN_YEAR else [])
|
||||
winter_precip = [yp.january, yp.february] + ([yearly_precip[year - 1].december] if year > MIN_YEAR else [])
|
||||
|
||||
spring_temp = [yt[month] for month in ['march', 'april', 'may']]
|
||||
spring_precip = [yp[month] for month in ['march', 'april', 'may']]
|
||||
spring_temp = [yt[month] for month in SPRING_MONTHS]
|
||||
spring_precip = [yp[month] for month in SPRING_MONTHS]
|
||||
|
||||
summer_temp = [yt[month] for month in ['june', 'july', 'august']]
|
||||
summer_precip = [yp[month] for month in ['june', 'july', 'august']]
|
||||
summer_temp = [yt[month] for month in SUMMER_MONTHS]
|
||||
summer_precip = [yp[month] for month in SUMMER_MONTHS]
|
||||
|
||||
autumn_temp = [yt[month] for month in ['september', 'november', 'october']]
|
||||
autumn_precip = [yp[month] for month in ['september', 'november', 'october']]
|
||||
autumn_temp = [yt[month] for month in AUTUMN_MONTHS]
|
||||
autumn_precip = [yp[month] for month in AUTUMN_MONTHS]
|
||||
|
||||
item['temp_winter_{}'.format(year)] = np.mean(winter_temp)
|
||||
item['precip_winter_{}'.format(year)] = np.mean(winter_temp)
|
||||
|
13
nn.py
13
nn.py
@ -7,7 +7,18 @@ from tensorflow import keras
|
||||
# Helper libraries
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
|
||||
import data
|
||||
from utils import *
|
||||
|
||||
tf.enable_eager_execution()
|
||||
|
||||
df = pd.read_pickle('data_distance.p')
|
||||
# print(df.head())
|
||||
|
||||
dataset = dataframe_to_dataset_biomes(df)
|
||||
|
||||
for feature, target in dataset:
|
||||
print('{} => {}'.format(feature, target))
|
||||
|
||||
print(tf.__version__)
|
||||
|
39
utils.py
Normal file
39
utils.py
Normal file
@ -0,0 +1,39 @@
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
import pandas as pd
|
||||
from constants import *
|
||||
|
||||
inputs = ['elevation', 'distance_to_water']
|
||||
output = 'biome_num'
|
||||
|
||||
def dataframe_to_dataset_biomes(df):
|
||||
rows = df.shape[0]
|
||||
|
||||
# 8 for seasonal temp and precipitation
|
||||
# 3 for latitude, elevation and distance_to_water
|
||||
columns = 11
|
||||
|
||||
tf_inputs = np.empty((0, columns))
|
||||
tf_output = np.empty((0))
|
||||
latitude = np.array(df.index.get_level_values(1))
|
||||
longitude = np.array(df.index.get_level_values(0))
|
||||
|
||||
for year in range(MIN_YEAR, MAX_YEAR + 1):
|
||||
local_inputs = list(inputs)
|
||||
for season in SEASONS:
|
||||
local_inputs += [
|
||||
'temp_{}_{}'.format(season, year),
|
||||
'precip_{}_{}'.format(season, year)
|
||||
]
|
||||
|
||||
|
||||
local_df = df[local_inputs]
|
||||
local_df.loc[:, 'latitude'] = pd.Series(latitude, index=local_df.index)
|
||||
|
||||
tf_inputs = np.concatenate((tf_inputs, local_df.values), axis=0)
|
||||
tf_output = np.concatenate((tf_output, df[output].values), axis=0)
|
||||
|
||||
tf_inputs = tf.cast(tf_inputs, tf.float32)
|
||||
tf_output = tf.cast(tf_output, tf.int32)
|
||||
|
||||
return tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
|
Loading…
Reference in New Issue
Block a user