feat(tf): transform dataframe to tensorflow dataset
This commit is contained in:
parent
ef604661ca
commit
4318cf71be
18
constants.py
Normal file
18
constants.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
directory = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
GEODATA = os.path.join(directory, 'geodata')
|
||||||
|
ECOREGIONS = os.path.join(GEODATA, 'ecoregions', 'single-parts.shp')
|
||||||
|
ELEVATION = os.path.join(GEODATA, 'srtm', 'topo30-180.tif')
|
||||||
|
TEMP = os.path.join(GEODATA, 'air_temp')
|
||||||
|
PRECIP = os.path.join(GEODATA, 'precipitation')
|
||||||
|
|
||||||
|
MIN_YEAR = 1900
|
||||||
|
MAX_YEAR = 2017
|
||||||
|
|
||||||
|
SEASONS = ['winter', 'spring', 'summer', 'autumn']
|
||||||
|
WINTER_MONTHS = ['december', 'january', 'february']
|
||||||
|
SPRING_MONTHS = ['march', 'april', 'may']
|
||||||
|
SUMMER_MONTHS = ['june', 'july', 'august']
|
||||||
|
AUTUMN_MONTHS = ['september', 'november', 'october']
|
25
data.py
25
data.py
@ -1,19 +1,12 @@
|
|||||||
import geopandas
|
import geopandas
|
||||||
import os
|
|
||||||
import rasterio
|
import rasterio
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import time
|
import time
|
||||||
from matplotlib import pyplot
|
from matplotlib import pyplot
|
||||||
from shapely.geometry import Point
|
from shapely.geometry import Point
|
||||||
|
from constants import *
|
||||||
|
|
||||||
directory = os.path.dirname(os.path.abspath(__file__))
|
|
||||||
|
|
||||||
GEODATA = os.path.join(directory, 'geodata')
|
|
||||||
ECOREGIONS = os.path.join(GEODATA, 'ecoregions', 'single-parts.shp')
|
|
||||||
ELEVATION = os.path.join(GEODATA, 'srtm', 'topo30-180.tif')
|
|
||||||
TEMP = os.path.join(GEODATA, 'air_temp')
|
|
||||||
PRECIP = os.path.join(GEODATA, 'precipitation')
|
|
||||||
|
|
||||||
def read_temp_data(year):
|
def read_temp_data(year):
|
||||||
return pd.read_csv(os.path.join(TEMP, 'air_temp.{}'.format(year)), sep='\s+', header=None,
|
return pd.read_csv(os.path.join(TEMP, 'air_temp.{}'.format(year)), sep='\s+', header=None,
|
||||||
@ -38,8 +31,6 @@ elevation_data = elevation.read(1)
|
|||||||
|
|
||||||
temp = {}
|
temp = {}
|
||||||
precip = {}
|
precip = {}
|
||||||
MIN_YEAR = 1900
|
|
||||||
MAX_YEAR = 2017
|
|
||||||
for year in range(MIN_YEAR, MAX_YEAR + 1):
|
for year in range(MIN_YEAR, MAX_YEAR + 1):
|
||||||
temp[year] = read_temp_data(year)
|
temp[year] = read_temp_data(year)
|
||||||
precip[year] = read_precip_data(year)
|
precip[year] = read_precip_data(year)
|
||||||
@ -52,7 +43,7 @@ boundary = world.boundary
|
|||||||
temp_precip_columns = []
|
temp_precip_columns = []
|
||||||
|
|
||||||
for year in range(MIN_YEAR, MAX_YEAR + 1):
|
for year in range(MIN_YEAR, MAX_YEAR + 1):
|
||||||
for s in ['winter', 'spring', 'summer', 'autumn']:
|
for s in SEASONS:
|
||||||
temp_precip_columns += ['temp_{}_{}'.format(s, year), 'precip_{}_{}'.format(s, year)]
|
temp_precip_columns += ['temp_{}_{}'.format(s, year), 'precip_{}_{}'.format(s, year)]
|
||||||
|
|
||||||
columns = ['biome_num', 'biome_name', 'elevation', 'distance_to_water'] + temp_precip_columns
|
columns = ['biome_num', 'biome_name', 'elevation', 'distance_to_water'] + temp_precip_columns
|
||||||
@ -86,14 +77,14 @@ def get_point_information(longitude, latitude):
|
|||||||
winter_temp = [yt.january, yt.february] + ([yearly_temp[year - 1].december] if year > MIN_YEAR else [])
|
winter_temp = [yt.january, yt.february] + ([yearly_temp[year - 1].december] if year > MIN_YEAR else [])
|
||||||
winter_precip = [yp.january, yp.february] + ([yearly_precip[year - 1].december] if year > MIN_YEAR else [])
|
winter_precip = [yp.january, yp.february] + ([yearly_precip[year - 1].december] if year > MIN_YEAR else [])
|
||||||
|
|
||||||
spring_temp = [yt[month] for month in ['march', 'april', 'may']]
|
spring_temp = [yt[month] for month in SPRING_MONTHS]
|
||||||
spring_precip = [yp[month] for month in ['march', 'april', 'may']]
|
spring_precip = [yp[month] for month in SPRING_MONTHS]
|
||||||
|
|
||||||
summer_temp = [yt[month] for month in ['june', 'july', 'august']]
|
summer_temp = [yt[month] for month in SUMMER_MONTHS]
|
||||||
summer_precip = [yp[month] for month in ['june', 'july', 'august']]
|
summer_precip = [yp[month] for month in SUMMER_MONTHS]
|
||||||
|
|
||||||
autumn_temp = [yt[month] for month in ['september', 'november', 'october']]
|
autumn_temp = [yt[month] for month in AUTUMN_MONTHS]
|
||||||
autumn_precip = [yp[month] for month in ['september', 'november', 'october']]
|
autumn_precip = [yp[month] for month in AUTUMN_MONTHS]
|
||||||
|
|
||||||
item['temp_winter_{}'.format(year)] = np.mean(winter_temp)
|
item['temp_winter_{}'.format(year)] = np.mean(winter_temp)
|
||||||
item['precip_winter_{}'.format(year)] = np.mean(winter_temp)
|
item['precip_winter_{}'.format(year)] = np.mean(winter_temp)
|
||||||
|
13
nn.py
13
nn.py
@ -7,7 +7,18 @@ from tensorflow import keras
|
|||||||
# Helper libraries
|
# Helper libraries
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
import data
|
from utils import *
|
||||||
|
|
||||||
|
tf.enable_eager_execution()
|
||||||
|
|
||||||
|
df = pd.read_pickle('data_distance.p')
|
||||||
|
# print(df.head())
|
||||||
|
|
||||||
|
dataset = dataframe_to_dataset_biomes(df)
|
||||||
|
|
||||||
|
for feature, target in dataset:
|
||||||
|
print('{} => {}'.format(feature, target))
|
||||||
|
|
||||||
print(tf.__version__)
|
print(tf.__version__)
|
||||||
|
39
utils.py
Normal file
39
utils.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
import numpy as np
|
||||||
|
import tensorflow as tf
|
||||||
|
import pandas as pd
|
||||||
|
from constants import *
|
||||||
|
|
||||||
|
inputs = ['elevation', 'distance_to_water']
|
||||||
|
output = 'biome_num'
|
||||||
|
|
||||||
|
def dataframe_to_dataset_biomes(df):
|
||||||
|
rows = df.shape[0]
|
||||||
|
|
||||||
|
# 8 for seasonal temp and precipitation
|
||||||
|
# 3 for latitude, elevation and distance_to_water
|
||||||
|
columns = 11
|
||||||
|
|
||||||
|
tf_inputs = np.empty((0, columns))
|
||||||
|
tf_output = np.empty((0))
|
||||||
|
latitude = np.array(df.index.get_level_values(1))
|
||||||
|
longitude = np.array(df.index.get_level_values(0))
|
||||||
|
|
||||||
|
for year in range(MIN_YEAR, MAX_YEAR + 1):
|
||||||
|
local_inputs = list(inputs)
|
||||||
|
for season in SEASONS:
|
||||||
|
local_inputs += [
|
||||||
|
'temp_{}_{}'.format(season, year),
|
||||||
|
'precip_{}_{}'.format(season, year)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
local_df = df[local_inputs]
|
||||||
|
local_df.loc[:, 'latitude'] = pd.Series(latitude, index=local_df.index)
|
||||||
|
|
||||||
|
tf_inputs = np.concatenate((tf_inputs, local_df.values), axis=0)
|
||||||
|
tf_output = np.concatenate((tf_output, df[output].values), axis=0)
|
||||||
|
|
||||||
|
tf_inputs = tf.cast(tf_inputs, tf.float32)
|
||||||
|
tf_output = tf.cast(tf_output, tf.int32)
|
||||||
|
|
||||||
|
return tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
|
Loading…
Reference in New Issue
Block a user