From a2ff08b195dd329ebf565ccafe232544e9d310b1 Mon Sep 17 00:00:00 2001 From: Mahdi Dibaiee Date: Thu, 14 Feb 2019 12:36:09 +0330 Subject: [PATCH] fix(data.py): precipication value was same as temp --- data.py | 10 +++++----- nn.py | 9 +++++++-- utils.py | 52 +++++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 61 insertions(+), 10 deletions(-) diff --git a/data.py b/data.py index 5e507e4..aa86898 100644 --- a/data.py +++ b/data.py @@ -35,7 +35,7 @@ for year in range(MIN_YEAR, MAX_YEAR + 1): temp[year] = read_temp_data(year) precip[year] = read_precip_data(year) precip[year]['yearly_avg'] = precip[year].mean(axis=1) - + world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))[['geometry']].unary_union boundary = world.boundary @@ -87,16 +87,16 @@ def get_point_information(longitude, latitude): autumn_precip = [yp[month] for month in AUTUMN_MONTHS] item['temp_winter_{}'.format(year)] = np.mean(winter_temp) - item['precip_winter_{}'.format(year)] = np.mean(winter_temp) + item['precip_winter_{}'.format(year)] = np.mean(winter_precip) item['temp_spring_{}'.format(year)] = np.mean(spring_temp) - item['precip_spring_{}'.format(year)] = np.mean(spring_temp) + item['precip_spring_{}'.format(year)] = np.mean(spring_precip) item['temp_summer_{}'.format(year)] = np.mean(summer_temp) - item['precip_summer_{}'.format(year)] = np.mean(summer_temp) + item['precip_summer_{}'.format(year)] = np.mean(summer_precip) item['temp_autumn_{}'.format(year)] = np.mean(autumn_temp) - item['precip_autumn_{}'.format(year)] = np.mean(autumn_temp) + item['precip_autumn_{}'.format(year)] = np.mean(autumn_precip) return item diff --git a/nn.py b/nn.py index 7e00fbc..7793afc 100644 --- a/nn.py +++ b/nn.py @@ -13,12 +13,17 @@ from utils import * tf.enable_eager_execution() -df = pd.read_pickle('data_distance.p') +df = pd.read_pickle('data_final.p') # print(df.head()) -dataset = dataframe_to_dataset_biomes(df) +# dataset = dataframe_to_dataset_biomes(df) +dataset = dataframe_to_dataset_temp_precip(df) +i = 0 for feature, target in dataset: + i += 1 + if i > 10: + break print('{} => {}'.format(feature, target)) print(tf.__version__) diff --git a/utils.py b/utils.py index 84615c1..000e571 100644 --- a/utils.py +++ b/utils.py @@ -6,6 +6,22 @@ from constants import * inputs = ['elevation', 'distance_to_water'] output = 'biome_num' +def normalize(v): + return (v - np.min(v)) / (np.max(v) - np.min(v)) + +def normalize_ndarray(ar): + tr = np.transpose(ar) + for i in range(tr.shape[0]): + tr[i] = normalize(tr[i]) + + return np.transpose(tr) + +def normalize_df(df): + for col in df.columns: + df[col] = normalize(df[col]) + + return df + def dataframe_to_dataset_biomes(df): rows = df.shape[0] @@ -16,7 +32,6 @@ def dataframe_to_dataset_biomes(df): tf_inputs = np.empty((0, columns)) tf_output = np.empty((0)) latitude = np.array(df.index.get_level_values(1)) - longitude = np.array(df.index.get_level_values(0)) for year in range(MIN_YEAR, MAX_YEAR + 1): local_inputs = list(inputs) @@ -33,7 +48,38 @@ def dataframe_to_dataset_biomes(df): tf_inputs = np.concatenate((tf_inputs, local_df.values), axis=0) tf_output = np.concatenate((tf_output, df[output].values), axis=0) - tf_inputs = tf.cast(tf_inputs, tf.float32) - tf_output = tf.cast(tf_output, tf.int32) + tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32) + tf_output = tf.cast(normalize_ndarray(tf_output), tf.int32) return tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output)) + +def dataframe_to_dataset_temp_precip(df): + rows = df.shape[0] + + # elevation, distance_to_water, latitude + # season, year + columns = 5 + + tf_inputs = np.empty((0, columns)) + tf_output = np.empty((0, 2)) + latitude = np.array(df.index.get_level_values(1)) + + for year in range(MIN_YEAR, MAX_YEAR + 1): + local_inputs = list(inputs) + + for idx, season in enumerate(SEASONS): + season_index = idx / len(season) + local_df = df[local_inputs] + local_df.loc[:, 'latitude'] = pd.Series(latitude, index=local_df.index) + local_df.loc[:, 'season'] = pd.Series(np.repeat(season_index, rows), index=local_df.index) + local_df.loc[:, 'year'] = pd.Series(np.repeat(year, rows), index=local_df.index) + + output = ['temp_{}_{}'.format(season, year), 'precip_{}_{}'.format(season, year)] + tf_inputs = np.concatenate((tf_inputs, local_df.values), axis=0) + tf_output = np.concatenate((tf_output, df[output].values), axis=0) + + tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32) + tf_output = tf.cast(normalize_ndarray(tf_output), tf.float32) + + return tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output)) +