From ef604661caa31e33cf73c856353125c545c64c26 Mon Sep 17 00:00:00 2001 From: Mahdi Dibaiee Date: Mon, 11 Feb 2019 14:49:14 +0330 Subject: [PATCH] feat(data): seasonal temp/precip data + distance to water --- data.py | 89 ++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 59 insertions(+), 30 deletions(-) diff --git a/data.py b/data.py index f6feaa4..d3a543c 100644 --- a/data.py +++ b/data.py @@ -14,7 +14,6 @@ ECOREGIONS = os.path.join(GEODATA, 'ecoregions', 'single-parts.shp') ELEVATION = os.path.join(GEODATA, 'srtm', 'topo30-180.tif') TEMP = os.path.join(GEODATA, 'air_temp') PRECIP = os.path.join(GEODATA, 'precipitation') -YEAR = 2014 def read_temp_data(year): return pd.read_csv(os.path.join(TEMP, 'air_temp.{}'.format(year)), sep='\s+', header=None, @@ -37,54 +36,83 @@ eco = geopandas.read_file(ECOREGIONS) elevation = rasterio.open(ELEVATION) elevation_data = elevation.read(1) -temp = read_temp_data(YEAR) +temp = {} +precip = {} +MIN_YEAR = 1900 +MAX_YEAR = 2017 +for year in range(MIN_YEAR, MAX_YEAR + 1): + temp[year] = read_temp_data(year) + precip[year] = read_precip_data(year) + precip[year]['yearly_avg'] = precip[year].mean(axis=1) -precip = read_precip_data(YEAR) -precip['yearly_avg'] = precip.mean(axis=1) -print('# Elevation') -print('bounds: left={} bottom={} top={} right={}'.format(elevation.bounds.left, elevation.bounds.bottom, elevation.bounds.top, elevation.bounds.right)) -print('min: {}, max: {}\n'.format(elevation_data.min(), elevation_data.max())) +world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))[['geometry']].unary_union +boundary = world.boundary -print('# Temperature ({})'.format(YEAR)) -print('Yearly average min: {}, max: {}\n'.format(temp.yearly_avg.min(), temp.yearly_avg.max())) +temp_precip_columns = [] -print('# Precipitation ({})'.format(YEAR)) -print('Yearly average min: {}, max: {}\n'.format(precip.yearly_avg.min(), precip.yearly_avg.max())) +for year in range(MIN_YEAR, MAX_YEAR + 1): + for s in ['winter', 'spring', 'summer', 'autumn']: + temp_precip_columns += ['temp_{}_{}'.format(s, year), 'precip_{}_{}'.format(s, year)] -columns = ['biome_num', 'biome_name', 'elevation', 'temp_yearly_avg', 'precip_yearly_avg'] +columns = ['biome_num', 'biome_name', 'elevation', 'distance_to_water'] + temp_precip_columns indices = ['longitude', 'latitude'] final_data = pd.DataFrame(index=indices, columns=columns) def get_point_information(longitude, latitude): - start_time = time.time() + item = {} p = Point(longitude, latitude) - # print('({},{})'.format(longitude, latitude)) ecoregion = eco.loc[lambda c: c.geometry.contains(p)] - print("er%ss" % (time.time() - start_time)) if ecoregion.empty: return False - start_time = time.time() + + item['biome_num'] = ecoregion.BIOME_NUM.iloc[0] + item['biome_name'] = ecoregion.BIOME_NAME.iloc[0] + elev = elevation_data[elevation.index(longitude, latitude)] - start_time = time.time() - t = np.argmin(np.array((temp.longitude - longitude)**2 + (temp.latitude - latitude)**2)) - start_time = time.time() - p = np.argmin(np.array((precip.longitude - longitude)**2 + (precip.latitude - latitude)**2)) + item['elevation'] = elev - return { - 'biome_num': ecoregion.BIOME_NUM.iloc[0], - 'biome_name': ecoregion.BIOME_NAME.iloc[0], - 'elevation': elev, - 'temp_yearly_avg': temp.iloc[t, 2:].yearly_avg, - 'precip_yearly_avg': precip.iloc[p, 2:].yearly_avg - } + distance_to_sea = p.distance(boundary) + item['distance_to_water'] = distance_to_sea + t = np.argmin(np.array((temp[MIN_YEAR].longitude - longitude)**2 + (temp[MIN_YEAR].latitude - latitude)**2)) + p = np.argmin(np.array((precip[MIN_YEAR].longitude - longitude)**2 + (precip[MIN_YEAR].latitude - latitude)**2)) + + yearly_temp = {} + yearly_precip = {} + for year in range(MIN_YEAR, MAX_YEAR + 1): + yearly_temp[year] = yt = temp[year].iloc[t, 2:] + yearly_precip[year] = yp = precip[year].iloc[p, 2:] + winter_temp = [yt.january, yt.february] + ([yearly_temp[year - 1].december] if year > MIN_YEAR else []) + winter_precip = [yp.january, yp.february] + ([yearly_precip[year - 1].december] if year > MIN_YEAR else []) + + spring_temp = [yt[month] for month in ['march', 'april', 'may']] + spring_precip = [yp[month] for month in ['march', 'april', 'may']] + + summer_temp = [yt[month] for month in ['june', 'july', 'august']] + summer_precip = [yp[month] for month in ['june', 'july', 'august']] + + autumn_temp = [yt[month] for month in ['september', 'november', 'october']] + autumn_precip = [yp[month] for month in ['september', 'november', 'october']] + + item['temp_winter_{}'.format(year)] = np.mean(winter_temp) + item['precip_winter_{}'.format(year)] = np.mean(winter_temp) + + item['temp_spring_{}'.format(year)] = np.mean(spring_temp) + item['precip_spring_{}'.format(year)] = np.mean(spring_temp) + + item['temp_summer_{}'.format(year)] = np.mean(summer_temp) + item['precip_summer_{}'.format(year)] = np.mean(summer_temp) + + item['temp_autumn_{}'.format(year)] = np.mean(autumn_temp) + item['precip_autumn_{}'.format(year)] = np.mean(autumn_temp) + + return item data_indices = [] - data_map = {} for col in columns: - data_map[col] = {} + data_map[col] = {} i = 0 @@ -103,6 +131,7 @@ for longitude in range(-179, 179): data_map[key][(longitude, latitude)] = value print('+', end='') + print('') print("--- Calculations: %s seconds ---" % (time.time() - start_time)) @@ -110,7 +139,7 @@ print("--- Calculations: %s seconds ---" % (time.time() - start_time)) start_time = time.time() df = pd.DataFrame(data_map) print("--- Generating DataFrame: %s seconds ---" % (time.time() - start_time)) -print(df.head()) +print(df) start_time = time.time() df.to_pickle('data.p') print("--- Pickling DataFrame: %s seconds ---" % (time.time() - start_time))