world-ecoregion/data.py

117 lines
4.0 KiB
Python
Raw Normal View History

2019-02-03 05:34:28 +00:00
import geopandas
import os
import rasterio
import pandas as pd
import numpy as np
import time
2019-02-03 05:34:28 +00:00
from matplotlib import pyplot
from shapely.geometry import Point
directory = os.path.dirname(os.path.abspath(__file__))
GEODATA = os.path.join(directory, 'geodata')
ECOREGIONS = os.path.join(GEODATA, 'ecoregions', 'single-parts.shp')
2019-02-03 05:34:28 +00:00
ELEVATION = os.path.join(GEODATA, 'srtm', 'topo30-180.tif')
TEMP = os.path.join(GEODATA, 'air_temp')
PRECIP = os.path.join(GEODATA, 'precipitation')
YEAR = 2014
2019-02-03 05:34:28 +00:00
def read_temp_data(year):
return pd.read_csv(os.path.join(TEMP, 'air_temp.{}'.format(year)), sep='\s+', header=None,
names=['longitude', 'latitude', 'january',
'february', 'march', 'april',
'may', 'june', 'july', 'august',
'september', 'november', 'october',
'december', 'yearly_avg'])
def read_precip_data(year):
return pd.read_csv(os.path.join(PRECIP, 'precip.{}'.format(year)), sep='\s+', header=None,
names=['longitude', 'latitude', 'january',
'february', 'march', 'april',
'may', 'june', 'july', 'august',
'september', 'november', 'october',
'december', 'yearly_avg'])
eco = geopandas.read_file(ECOREGIONS)
2019-02-03 05:34:28 +00:00
elevation = rasterio.open(ELEVATION)
elevation_data = elevation.read(1)
temp = read_temp_data(YEAR)
2019-02-03 05:34:28 +00:00
precip = read_precip_data(YEAR)
precip['yearly_avg'] = precip.mean(axis=1)
2019-02-03 05:34:28 +00:00
print('# Elevation')
print('bounds: left={} bottom={} top={} right={}'.format(elevation.bounds.left, elevation.bounds.bottom, elevation.bounds.top, elevation.bounds.right))
print('min: {}, max: {}\n'.format(elevation_data.min(), elevation_data.max()))
2019-02-03 05:34:28 +00:00
print('# Temperature ({})'.format(YEAR))
print('Yearly average min: {}, max: {}\n'.format(temp.yearly_avg.min(), temp.yearly_avg.max()))
2019-02-03 05:34:28 +00:00
print('# Precipitation ({})'.format(YEAR))
print('Yearly average min: {}, max: {}\n'.format(precip.yearly_avg.min(), precip.yearly_avg.max()))
columns = ['biome_num', 'biome_name', 'elevation', 'temp_yearly_avg', 'precip_yearly_avg']
indices = ['longitude', 'latitude']
final_data = pd.DataFrame(index=indices, columns=columns)
2019-02-03 05:34:28 +00:00
def get_point_information(longitude, latitude):
start_time = time.time()
2019-02-03 05:34:28 +00:00
p = Point(longitude, latitude)
# print('({},{})'.format(longitude, latitude))
2019-02-03 05:34:28 +00:00
ecoregion = eco.loc[lambda c: c.geometry.contains(p)]
print("er%ss" % (time.time() - start_time))
if ecoregion.empty:
return False
start_time = time.time()
elev = elevation_data[elevation.index(longitude, latitude)]
start_time = time.time()
t = np.argmin(np.array((temp.longitude - longitude)**2 + (temp.latitude - latitude)**2))
start_time = time.time()
p = np.argmin(np.array((precip.longitude - longitude)**2 + (precip.latitude - latitude)**2))
2019-02-03 05:34:28 +00:00
return {
'biome_num': ecoregion.BIOME_NUM.iloc[0],
'biome_name': ecoregion.BIOME_NAME.iloc[0],
'elevation': elev,
'temp_yearly_avg': temp.iloc[t, 2:].yearly_avg,
'precip_yearly_avg': precip.iloc[p, 2:].yearly_avg
2019-02-03 05:34:28 +00:00
}
data_indices = []
data_map = {}
for col in columns:
data_map[col] = {}
i = 0
start_time = time.time()
for longitude in range(-179, 179):
print('-', end='')
for latitude in range(-89, 89):
# generate data and save to file
d = get_point_information(longitude, latitude)
if d == False:
print('.', end='')
continue
for key, value in d.items():
data_map[key][(longitude, latitude)] = value
print('+', end='')
print('')
print("--- Calculations: %s seconds ---" % (time.time() - start_time))
2019-02-03 05:34:28 +00:00
start_time = time.time()
df = pd.DataFrame(data_map)
print("--- Generating DataFrame: %s seconds ---" % (time.time() - start_time))
print(df.head())
2019-02-03 05:34:28 +00:00
start_time = time.time()
df.to_pickle('data.p')
print("--- Pickling DataFrame: %s seconds ---" % (time.time() - start_time))