import geopandas import rasterio import pandas as pd import numpy as np import time from matplotlib import pyplot from shapely.geometry import Point from constants import * def read_temp_data(year): return pd.read_csv(os.path.join(TEMP, 'air_temp.{}'.format(year)), sep='\s+', header=None, names=['longitude', 'latitude', 'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'november', 'october', 'december', 'yearly_avg']) def read_precip_data(year): return pd.read_csv(os.path.join(PRECIP, 'precip.{}'.format(year)), sep='\s+', header=None, names=['longitude', 'latitude', 'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'november', 'october', 'december', 'yearly_avg']) eco = geopandas.read_file(ECOREGIONS) elevation = rasterio.open(ELEVATION) elevation_data = elevation.read(1) temp = {} precip = {} for year in range(MIN_YEAR, MAX_YEAR + 1): temp[year] = read_temp_data(year) precip[year] = read_precip_data(year) precip[year]['yearly_avg'] = precip[year].mean(axis=1) world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))[['geometry']].unary_union boundary = world.boundary temp_precip_columns = [] for year in range(MIN_YEAR, MAX_YEAR + 1): for s in SEASONS: temp_precip_columns += ['temp_{}_{}'.format(s, year), 'precip_{}_{}'.format(s, year)] columns = ['biome_num', 'biome_name', 'elevation', 'distance_to_water'] + temp_precip_columns indices = ['longitude', 'latitude'] final_data = pd.DataFrame(index=indices, columns=columns) def get_point_information(longitude, latitude): item = {} p = Point(longitude, latitude) ecoregion = eco.loc[lambda c: c.geometry.contains(p)] if ecoregion.empty: return False item['biome_num'] = ecoregion.BIOME_NUM.iloc[0] item['biome_name'] = ecoregion.BIOME_NAME.iloc[0] elev = elevation_data[elevation.index(longitude, latitude)] item['elevation'] = elev distance_to_sea = p.distance(boundary) item['distance_to_water'] = distance_to_sea t = np.argmin(np.array((temp[MIN_YEAR].longitude - longitude)**2 + (temp[MIN_YEAR].latitude - latitude)**2)) p = np.argmin(np.array((precip[MIN_YEAR].longitude - longitude)**2 + (precip[MIN_YEAR].latitude - latitude)**2)) yearly_temp = {} yearly_precip = {} for year in range(MIN_YEAR, MAX_YEAR + 1): yearly_temp[year] = yt = temp[year].iloc[t, 2:] yearly_precip[year] = yp = precip[year].iloc[p, 2:] winter_temp = [yt.january, yt.february] + ([yearly_temp[year - 1].december] if year > MIN_YEAR else []) winter_precip = [yp.january, yp.february] + ([yearly_precip[year - 1].december] if year > MIN_YEAR else []) spring_temp = [yt[month] for month in SPRING_MONTHS] spring_precip = [yp[month] for month in SPRING_MONTHS] summer_temp = [yt[month] for month in SUMMER_MONTHS] summer_precip = [yp[month] for month in SUMMER_MONTHS] autumn_temp = [yt[month] for month in AUTUMN_MONTHS] autumn_precip = [yp[month] for month in AUTUMN_MONTHS] item['temp_winter_{}'.format(year)] = np.mean(winter_temp) item['precip_winter_{}'.format(year)] = np.mean(winter_precip) item['temp_spring_{}'.format(year)] = np.mean(spring_temp) item['precip_spring_{}'.format(year)] = np.mean(spring_precip) item['temp_summer_{}'.format(year)] = np.mean(summer_temp) item['precip_summer_{}'.format(year)] = np.mean(summer_precip) item['temp_autumn_{}'.format(year)] = np.mean(autumn_temp) item['precip_autumn_{}'.format(year)] = np.mean(autumn_precip) return item data_indices = [] data_map = {} for col in columns: data_map[col] = {} i = 0 start_time = time.time() for longitude in range(-179, 179): print('-', end='') for latitude in range(-89, 89): # generate data and save to file d = get_point_information(longitude, latitude) if d == False: print('.', end='') continue for key, value in d.items(): data_map[key][(longitude, latitude)] = value print('+', end='') print('') print("--- Calculations: %s seconds ---" % (time.time() - start_time)) start_time = time.time() df = pd.DataFrame(data_map) print("--- Generating DataFrame: %s seconds ---" % (time.time() - start_time)) print(df) start_time = time.time() df.to_pickle('data.p') print("--- Pickling DataFrame: %s seconds ---" % (time.time() - start_time))