2019-02-03 05:34:28 +00:00
|
|
|
import geopandas
|
|
|
|
import os
|
|
|
|
import rasterio
|
|
|
|
import pandas as pd
|
2019-02-08 14:44:57 +00:00
|
|
|
import numpy as np
|
|
|
|
import time
|
2019-02-03 05:34:28 +00:00
|
|
|
from matplotlib import pyplot
|
|
|
|
from shapely.geometry import Point
|
|
|
|
|
|
|
|
directory = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
|
|
|
|
GEODATA = os.path.join(directory, 'geodata')
|
2019-02-08 14:44:57 +00:00
|
|
|
ECOREGIONS = os.path.join(GEODATA, 'ecoregions', 'single-parts.shp')
|
2019-02-03 05:34:28 +00:00
|
|
|
ELEVATION = os.path.join(GEODATA, 'srtm', 'topo30-180.tif')
|
|
|
|
TEMP = os.path.join(GEODATA, 'air_temp')
|
|
|
|
PRECIP = os.path.join(GEODATA, 'precipitation')
|
2019-02-08 14:44:57 +00:00
|
|
|
YEAR = 2014
|
2019-02-03 05:34:28 +00:00
|
|
|
|
|
|
|
def read_temp_data(year):
|
|
|
|
return pd.read_csv(os.path.join(TEMP, 'air_temp.{}'.format(year)), sep='\s+', header=None,
|
|
|
|
names=['longitude', 'latitude', 'january',
|
|
|
|
'february', 'march', 'april',
|
|
|
|
'may', 'june', 'july', 'august',
|
|
|
|
'september', 'november', 'october',
|
|
|
|
'december', 'yearly_avg'])
|
|
|
|
|
|
|
|
def read_precip_data(year):
|
|
|
|
return pd.read_csv(os.path.join(PRECIP, 'precip.{}'.format(year)), sep='\s+', header=None,
|
|
|
|
names=['longitude', 'latitude', 'january',
|
|
|
|
'february', 'march', 'april',
|
|
|
|
'may', 'june', 'july', 'august',
|
|
|
|
'september', 'november', 'october',
|
|
|
|
'december', 'yearly_avg'])
|
|
|
|
|
|
|
|
eco = geopandas.read_file(ECOREGIONS)
|
2019-02-08 14:44:57 +00:00
|
|
|
|
2019-02-03 05:34:28 +00:00
|
|
|
elevation = rasterio.open(ELEVATION)
|
2019-02-08 14:44:57 +00:00
|
|
|
elevation_data = elevation.read(1)
|
|
|
|
|
|
|
|
temp = read_temp_data(YEAR)
|
2019-02-03 05:34:28 +00:00
|
|
|
|
2019-02-08 14:44:57 +00:00
|
|
|
precip = read_precip_data(YEAR)
|
|
|
|
precip['yearly_avg'] = precip.mean(axis=1)
|
2019-02-03 05:34:28 +00:00
|
|
|
|
2019-02-08 14:44:57 +00:00
|
|
|
print('# Elevation')
|
|
|
|
print('bounds: left={} bottom={} top={} right={}'.format(elevation.bounds.left, elevation.bounds.bottom, elevation.bounds.top, elevation.bounds.right))
|
|
|
|
print('min: {}, max: {}\n'.format(elevation_data.min(), elevation_data.max()))
|
2019-02-03 05:34:28 +00:00
|
|
|
|
2019-02-08 14:44:57 +00:00
|
|
|
print('# Temperature ({})'.format(YEAR))
|
|
|
|
print('Yearly average min: {}, max: {}\n'.format(temp.yearly_avg.min(), temp.yearly_avg.max()))
|
2019-02-03 05:34:28 +00:00
|
|
|
|
2019-02-08 14:44:57 +00:00
|
|
|
print('# Precipitation ({})'.format(YEAR))
|
|
|
|
print('Yearly average min: {}, max: {}\n'.format(precip.yearly_avg.min(), precip.yearly_avg.max()))
|
|
|
|
|
|
|
|
columns = ['biome_num', 'biome_name', 'elevation', 'temp_yearly_avg', 'precip_yearly_avg']
|
|
|
|
indices = ['longitude', 'latitude']
|
|
|
|
final_data = pd.DataFrame(index=indices, columns=columns)
|
2019-02-03 05:34:28 +00:00
|
|
|
|
|
|
|
def get_point_information(longitude, latitude):
|
2019-02-08 14:44:57 +00:00
|
|
|
start_time = time.time()
|
2019-02-03 05:34:28 +00:00
|
|
|
p = Point(longitude, latitude)
|
2019-02-08 14:44:57 +00:00
|
|
|
# print('({},{})'.format(longitude, latitude))
|
2019-02-03 05:34:28 +00:00
|
|
|
ecoregion = eco.loc[lambda c: c.geometry.contains(p)]
|
2019-02-08 14:44:57 +00:00
|
|
|
print("er%ss" % (time.time() - start_time))
|
|
|
|
if ecoregion.empty:
|
|
|
|
return False
|
|
|
|
start_time = time.time()
|
|
|
|
elev = elevation_data[elevation.index(longitude, latitude)]
|
|
|
|
start_time = time.time()
|
|
|
|
t = np.argmin(np.array((temp.longitude - longitude)**2 + (temp.latitude - latitude)**2))
|
|
|
|
start_time = time.time()
|
|
|
|
p = np.argmin(np.array((precip.longitude - longitude)**2 + (precip.latitude - latitude)**2))
|
|
|
|
|
2019-02-03 05:34:28 +00:00
|
|
|
return {
|
2019-02-08 14:44:57 +00:00
|
|
|
'biome_num': ecoregion.BIOME_NUM.iloc[0],
|
|
|
|
'biome_name': ecoregion.BIOME_NAME.iloc[0],
|
|
|
|
'elevation': elev,
|
|
|
|
'temp_yearly_avg': temp.iloc[t, 2:].yearly_avg,
|
|
|
|
'precip_yearly_avg': precip.iloc[p, 2:].yearly_avg
|
2019-02-03 05:34:28 +00:00
|
|
|
}
|
2019-02-08 14:44:57 +00:00
|
|
|
|
|
|
|
|
|
|
|
data_indices = []
|
|
|
|
|
|
|
|
data_map = {}
|
|
|
|
for col in columns:
|
|
|
|
data_map[col] = {}
|
|
|
|
|
|
|
|
i = 0
|
|
|
|
|
|
|
|
start_time = time.time()
|
|
|
|
|
|
|
|
for longitude in range(-179, 179):
|
|
|
|
print('-', end='')
|
|
|
|
for latitude in range(-89, 89):
|
|
|
|
# generate data and save to file
|
|
|
|
d = get_point_information(longitude, latitude)
|
|
|
|
if d == False:
|
|
|
|
print('.', end='')
|
|
|
|
continue
|
|
|
|
|
|
|
|
for key, value in d.items():
|
|
|
|
data_map[key][(longitude, latitude)] = value
|
|
|
|
|
|
|
|
print('+', end='')
|
|
|
|
print('')
|
|
|
|
|
|
|
|
print("--- Calculations: %s seconds ---" % (time.time() - start_time))
|
|
|
|
|
2019-02-03 05:34:28 +00:00
|
|
|
start_time = time.time()
|
2019-02-08 14:44:57 +00:00
|
|
|
df = pd.DataFrame(data_map)
|
|
|
|
print("--- Generating DataFrame: %s seconds ---" % (time.time() - start_time))
|
|
|
|
print(df.head())
|
2019-02-03 05:34:28 +00:00
|
|
|
start_time = time.time()
|
2019-02-08 14:44:57 +00:00
|
|
|
df.to_pickle('data.p')
|
|
|
|
print("--- Pickling DataFrame: %s seconds ---" % (time.time() - start_time))
|