2019-02-03 05:34:28 +00:00
|
|
|
import geopandas
|
|
|
|
import rasterio
|
|
|
|
import pandas as pd
|
2019-02-08 14:44:57 +00:00
|
|
|
import numpy as np
|
|
|
|
import time
|
2019-02-03 05:34:28 +00:00
|
|
|
from matplotlib import pyplot
|
|
|
|
from shapely.geometry import Point
|
2019-02-12 05:11:33 +00:00
|
|
|
from constants import *
|
2019-02-03 05:34:28 +00:00
|
|
|
|
|
|
|
|
|
|
|
def read_temp_data(year):
|
|
|
|
return pd.read_csv(os.path.join(TEMP, 'air_temp.{}'.format(year)), sep='\s+', header=None,
|
|
|
|
names=['longitude', 'latitude', 'january',
|
|
|
|
'february', 'march', 'april',
|
|
|
|
'may', 'june', 'july', 'august',
|
|
|
|
'september', 'november', 'october',
|
|
|
|
'december', 'yearly_avg'])
|
|
|
|
|
|
|
|
def read_precip_data(year):
|
|
|
|
return pd.read_csv(os.path.join(PRECIP, 'precip.{}'.format(year)), sep='\s+', header=None,
|
|
|
|
names=['longitude', 'latitude', 'january',
|
|
|
|
'february', 'march', 'april',
|
|
|
|
'may', 'june', 'july', 'august',
|
|
|
|
'september', 'november', 'october',
|
|
|
|
'december', 'yearly_avg'])
|
|
|
|
|
|
|
|
eco = geopandas.read_file(ECOREGIONS)
|
2019-02-08 14:44:57 +00:00
|
|
|
|
2019-02-03 05:34:28 +00:00
|
|
|
elevation = rasterio.open(ELEVATION)
|
2019-02-08 14:44:57 +00:00
|
|
|
elevation_data = elevation.read(1)
|
|
|
|
|
2019-02-11 11:19:14 +00:00
|
|
|
temp = {}
|
|
|
|
precip = {}
|
|
|
|
for year in range(MIN_YEAR, MAX_YEAR + 1):
|
|
|
|
temp[year] = read_temp_data(year)
|
|
|
|
precip[year] = read_precip_data(year)
|
|
|
|
precip[year]['yearly_avg'] = precip[year].mean(axis=1)
|
2019-02-14 09:06:09 +00:00
|
|
|
|
2019-02-03 05:34:28 +00:00
|
|
|
|
2019-02-11 11:19:14 +00:00
|
|
|
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))[['geometry']].unary_union
|
|
|
|
boundary = world.boundary
|
2019-02-03 05:34:28 +00:00
|
|
|
|
2019-02-11 11:19:14 +00:00
|
|
|
temp_precip_columns = []
|
2019-02-03 05:34:28 +00:00
|
|
|
|
2019-02-11 11:19:14 +00:00
|
|
|
for year in range(MIN_YEAR, MAX_YEAR + 1):
|
2019-02-12 05:11:33 +00:00
|
|
|
for s in SEASONS:
|
2019-02-11 11:19:14 +00:00
|
|
|
temp_precip_columns += ['temp_{}_{}'.format(s, year), 'precip_{}_{}'.format(s, year)]
|
2019-02-08 14:44:57 +00:00
|
|
|
|
2019-03-05 07:59:30 +00:00
|
|
|
columns = ['longitude', 'latitude', 'biome_num', 'biome_name', 'elevation', 'distance_to_water'] + temp_precip_columns
|
|
|
|
final_data = pd.DataFrame(columns=columns)
|
2019-02-03 05:34:28 +00:00
|
|
|
|
|
|
|
def get_point_information(longitude, latitude):
|
2019-02-11 11:19:14 +00:00
|
|
|
item = {}
|
2019-02-03 05:34:28 +00:00
|
|
|
p = Point(longitude, latitude)
|
|
|
|
ecoregion = eco.loc[lambda c: c.geometry.contains(p)]
|
2019-02-08 14:44:57 +00:00
|
|
|
if ecoregion.empty:
|
|
|
|
return False
|
2019-02-11 11:19:14 +00:00
|
|
|
|
2019-03-05 07:59:30 +00:00
|
|
|
item['longitude'] = longitude
|
|
|
|
item['latitude'] = latitude
|
2019-02-11 11:19:14 +00:00
|
|
|
item['biome_num'] = ecoregion.BIOME_NUM.iloc[0]
|
|
|
|
item['biome_name'] = ecoregion.BIOME_NAME.iloc[0]
|
|
|
|
|
2019-02-08 14:44:57 +00:00
|
|
|
elev = elevation_data[elevation.index(longitude, latitude)]
|
2019-02-11 11:19:14 +00:00
|
|
|
item['elevation'] = elev
|
2019-02-08 14:44:57 +00:00
|
|
|
|
2019-02-11 11:19:14 +00:00
|
|
|
distance_to_sea = p.distance(boundary)
|
|
|
|
item['distance_to_water'] = distance_to_sea
|
2019-02-08 14:44:57 +00:00
|
|
|
|
2019-02-11 11:19:14 +00:00
|
|
|
t = np.argmin(np.array((temp[MIN_YEAR].longitude - longitude)**2 + (temp[MIN_YEAR].latitude - latitude)**2))
|
|
|
|
p = np.argmin(np.array((precip[MIN_YEAR].longitude - longitude)**2 + (precip[MIN_YEAR].latitude - latitude)**2))
|
2019-02-08 14:44:57 +00:00
|
|
|
|
2019-02-11 11:19:14 +00:00
|
|
|
yearly_temp = {}
|
|
|
|
yearly_precip = {}
|
|
|
|
for year in range(MIN_YEAR, MAX_YEAR + 1):
|
|
|
|
yearly_temp[year] = yt = temp[year].iloc[t, 2:]
|
|
|
|
yearly_precip[year] = yp = precip[year].iloc[p, 2:]
|
|
|
|
winter_temp = [yt.january, yt.february] + ([yearly_temp[year - 1].december] if year > MIN_YEAR else [])
|
|
|
|
winter_precip = [yp.january, yp.february] + ([yearly_precip[year - 1].december] if year > MIN_YEAR else [])
|
|
|
|
|
2019-02-12 05:11:33 +00:00
|
|
|
spring_temp = [yt[month] for month in SPRING_MONTHS]
|
|
|
|
spring_precip = [yp[month] for month in SPRING_MONTHS]
|
2019-02-11 11:19:14 +00:00
|
|
|
|
2019-02-12 05:11:33 +00:00
|
|
|
summer_temp = [yt[month] for month in SUMMER_MONTHS]
|
|
|
|
summer_precip = [yp[month] for month in SUMMER_MONTHS]
|
2019-02-11 11:19:14 +00:00
|
|
|
|
2019-02-12 05:11:33 +00:00
|
|
|
autumn_temp = [yt[month] for month in AUTUMN_MONTHS]
|
|
|
|
autumn_precip = [yp[month] for month in AUTUMN_MONTHS]
|
2019-02-11 11:19:14 +00:00
|
|
|
|
|
|
|
item['temp_winter_{}'.format(year)] = np.mean(winter_temp)
|
2019-02-14 09:06:09 +00:00
|
|
|
item['precip_winter_{}'.format(year)] = np.mean(winter_precip)
|
2019-02-08 14:44:57 +00:00
|
|
|
|
2019-02-11 11:19:14 +00:00
|
|
|
item['temp_spring_{}'.format(year)] = np.mean(spring_temp)
|
2019-02-14 09:06:09 +00:00
|
|
|
item['precip_spring_{}'.format(year)] = np.mean(spring_precip)
|
2019-02-11 11:19:14 +00:00
|
|
|
|
|
|
|
item['temp_summer_{}'.format(year)] = np.mean(summer_temp)
|
2019-02-14 09:06:09 +00:00
|
|
|
item['precip_summer_{}'.format(year)] = np.mean(summer_precip)
|
2019-02-11 11:19:14 +00:00
|
|
|
|
|
|
|
item['temp_autumn_{}'.format(year)] = np.mean(autumn_temp)
|
2019-02-14 09:06:09 +00:00
|
|
|
item['precip_autumn_{}'.format(year)] = np.mean(autumn_precip)
|
2019-02-11 11:19:14 +00:00
|
|
|
|
|
|
|
return item
|
|
|
|
|
2019-03-05 07:59:30 +00:00
|
|
|
data = {}
|
2019-02-08 14:44:57 +00:00
|
|
|
for col in columns:
|
2019-03-05 07:59:30 +00:00
|
|
|
data[col] = []
|
2019-02-08 14:44:57 +00:00
|
|
|
|
2019-03-05 07:59:30 +00:00
|
|
|
# i = 0
|
2019-02-08 14:44:57 +00:00
|
|
|
|
|
|
|
start_time = time.time()
|
|
|
|
|
|
|
|
for longitude in range(-179, 179):
|
|
|
|
print('-', end='')
|
|
|
|
for latitude in range(-89, 89):
|
2019-03-05 07:59:30 +00:00
|
|
|
|
2019-02-08 14:44:57 +00:00
|
|
|
# generate data and save to file
|
|
|
|
d = get_point_information(longitude, latitude)
|
|
|
|
if d == False:
|
|
|
|
print('.', end='')
|
|
|
|
continue
|
|
|
|
|
|
|
|
for key, value in d.items():
|
2019-03-05 07:59:30 +00:00
|
|
|
data[key].append(value)
|
2019-02-08 14:44:57 +00:00
|
|
|
|
|
|
|
print('+', end='')
|
2019-02-11 11:19:14 +00:00
|
|
|
|
2019-02-08 14:44:57 +00:00
|
|
|
print('')
|
|
|
|
|
|
|
|
print("--- Calculations: %s seconds ---" % (time.time() - start_time))
|
|
|
|
|
2019-02-03 05:34:28 +00:00
|
|
|
start_time = time.time()
|
2019-03-05 07:59:30 +00:00
|
|
|
df = pd.DataFrame(data)
|
2019-02-08 14:44:57 +00:00
|
|
|
print("--- Generating DataFrame: %s seconds ---" % (time.time() - start_time))
|
2019-02-11 11:19:14 +00:00
|
|
|
print(df)
|
2019-02-03 05:34:28 +00:00
|
|
|
start_time = time.time()
|
2019-02-08 14:44:57 +00:00
|
|
|
df.to_pickle('data.p')
|
|
|
|
print("--- Pickling DataFrame: %s seconds ---" % (time.time() - start_time))
|