import numpy as np import tensorflow as tf import pandas as pd from constants import * inputs = ['elevation', 'distance_to_water'] output = 'biome_num' def dataframe_to_dataset_biomes(df): rows = df.shape[0] # 8 for seasonal temp and precipitation # 3 for latitude, elevation and distance_to_water columns = 11 tf_inputs = np.empty((0, columns)) tf_output = np.empty((0)) latitude = np.array(df.index.get_level_values(1)) longitude = np.array(df.index.get_level_values(0)) for year in range(MIN_YEAR, MAX_YEAR + 1): local_inputs = list(inputs) for season in SEASONS: local_inputs += [ 'temp_{}_{}'.format(season, year), 'precip_{}_{}'.format(season, year) ] local_df = df[local_inputs] local_df.loc[:, 'latitude'] = pd.Series(latitude, index=local_df.index) tf_inputs = np.concatenate((tf_inputs, local_df.values), axis=0) tf_output = np.concatenate((tf_output, df[output].values), axis=0) tf_inputs = tf.cast(tf_inputs, tf.float32) tf_output = tf.cast(tf_output, tf.int32) return tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))