import geopandas import os import rasterio import pandas as pd import numpy as np import time from matplotlib import pyplot from shapely.geometry import Point directory = os.path.dirname(os.path.abspath(__file__)) GEODATA = os.path.join(directory, 'geodata') ECOREGIONS = os.path.join(GEODATA, 'ecoregions', 'single-parts.shp') ELEVATION = os.path.join(GEODATA, 'srtm', 'topo30-180.tif') TEMP = os.path.join(GEODATA, 'air_temp') PRECIP = os.path.join(GEODATA, 'precipitation') YEAR = 2014 def read_temp_data(year): return pd.read_csv(os.path.join(TEMP, 'air_temp.{}'.format(year)), sep='\s+', header=None, names=['longitude', 'latitude', 'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'november', 'october', 'december', 'yearly_avg']) def read_precip_data(year): return pd.read_csv(os.path.join(PRECIP, 'precip.{}'.format(year)), sep='\s+', header=None, names=['longitude', 'latitude', 'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'november', 'october', 'december', 'yearly_avg']) eco = geopandas.read_file(ECOREGIONS) elevation = rasterio.open(ELEVATION) elevation_data = elevation.read(1) temp = read_temp_data(YEAR) precip = read_precip_data(YEAR) precip['yearly_avg'] = precip.mean(axis=1) print('# Elevation') print('bounds: left={} bottom={} top={} right={}'.format(elevation.bounds.left, elevation.bounds.bottom, elevation.bounds.top, elevation.bounds.right)) print('min: {}, max: {}\n'.format(elevation_data.min(), elevation_data.max())) print('# Temperature ({})'.format(YEAR)) print('Yearly average min: {}, max: {}\n'.format(temp.yearly_avg.min(), temp.yearly_avg.max())) print('# Precipitation ({})'.format(YEAR)) print('Yearly average min: {}, max: {}\n'.format(precip.yearly_avg.min(), precip.yearly_avg.max())) columns = ['biome_num', 'biome_name', 'elevation', 'temp_yearly_avg', 'precip_yearly_avg'] indices = ['longitude', 'latitude'] final_data = pd.DataFrame(index=indices, columns=columns) def get_point_information(longitude, latitude): start_time = time.time() p = Point(longitude, latitude) # print('({},{})'.format(longitude, latitude)) ecoregion = eco.loc[lambda c: c.geometry.contains(p)] print("er%ss" % (time.time() - start_time)) if ecoregion.empty: return False start_time = time.time() elev = elevation_data[elevation.index(longitude, latitude)] start_time = time.time() t = np.argmin(np.array((temp.longitude - longitude)**2 + (temp.latitude - latitude)**2)) start_time = time.time() p = np.argmin(np.array((precip.longitude - longitude)**2 + (precip.latitude - latitude)**2)) return { 'biome_num': ecoregion.BIOME_NUM.iloc[0], 'biome_name': ecoregion.BIOME_NAME.iloc[0], 'elevation': elev, 'temp_yearly_avg': temp.iloc[t, 2:].yearly_avg, 'precip_yearly_avg': precip.iloc[p, 2:].yearly_avg } data_indices = [] data_map = {} for col in columns: data_map[col] = {} i = 0 start_time = time.time() for longitude in range(-179, 179): print('-', end='') for latitude in range(-89, 89): # generate data and save to file d = get_point_information(longitude, latitude) if d == False: print('.', end='') continue for key, value in d.items(): data_map[key][(longitude, latitude)] = value print('+', end='') print('') print("--- Calculations: %s seconds ---" % (time.time() - start_time)) start_time = time.time() df = pd.DataFrame(data_map) print("--- Generating DataFrame: %s seconds ---" % (time.time() - start_time)) print(df.head()) start_time = time.time() df.to_pickle('data.p') print("--- Pickling DataFrame: %s seconds ---" % (time.time() - start_time))