feat(data): seasonal temp/precip data + distance to water
This commit is contained in:
parent
caa1b0443c
commit
ef604661ca
89
data.py
89
data.py
@ -14,7 +14,6 @@ ECOREGIONS = os.path.join(GEODATA, 'ecoregions', 'single-parts.shp')
|
|||||||
ELEVATION = os.path.join(GEODATA, 'srtm', 'topo30-180.tif')
|
ELEVATION = os.path.join(GEODATA, 'srtm', 'topo30-180.tif')
|
||||||
TEMP = os.path.join(GEODATA, 'air_temp')
|
TEMP = os.path.join(GEODATA, 'air_temp')
|
||||||
PRECIP = os.path.join(GEODATA, 'precipitation')
|
PRECIP = os.path.join(GEODATA, 'precipitation')
|
||||||
YEAR = 2014
|
|
||||||
|
|
||||||
def read_temp_data(year):
|
def read_temp_data(year):
|
||||||
return pd.read_csv(os.path.join(TEMP, 'air_temp.{}'.format(year)), sep='\s+', header=None,
|
return pd.read_csv(os.path.join(TEMP, 'air_temp.{}'.format(year)), sep='\s+', header=None,
|
||||||
@ -37,54 +36,83 @@ eco = geopandas.read_file(ECOREGIONS)
|
|||||||
elevation = rasterio.open(ELEVATION)
|
elevation = rasterio.open(ELEVATION)
|
||||||
elevation_data = elevation.read(1)
|
elevation_data = elevation.read(1)
|
||||||
|
|
||||||
temp = read_temp_data(YEAR)
|
temp = {}
|
||||||
|
precip = {}
|
||||||
|
MIN_YEAR = 1900
|
||||||
|
MAX_YEAR = 2017
|
||||||
|
for year in range(MIN_YEAR, MAX_YEAR + 1):
|
||||||
|
temp[year] = read_temp_data(year)
|
||||||
|
precip[year] = read_precip_data(year)
|
||||||
|
precip[year]['yearly_avg'] = precip[year].mean(axis=1)
|
||||||
|
|
||||||
precip = read_precip_data(YEAR)
|
|
||||||
precip['yearly_avg'] = precip.mean(axis=1)
|
|
||||||
|
|
||||||
print('# Elevation')
|
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))[['geometry']].unary_union
|
||||||
print('bounds: left={} bottom={} top={} right={}'.format(elevation.bounds.left, elevation.bounds.bottom, elevation.bounds.top, elevation.bounds.right))
|
boundary = world.boundary
|
||||||
print('min: {}, max: {}\n'.format(elevation_data.min(), elevation_data.max()))
|
|
||||||
|
|
||||||
print('# Temperature ({})'.format(YEAR))
|
temp_precip_columns = []
|
||||||
print('Yearly average min: {}, max: {}\n'.format(temp.yearly_avg.min(), temp.yearly_avg.max()))
|
|
||||||
|
|
||||||
print('# Precipitation ({})'.format(YEAR))
|
for year in range(MIN_YEAR, MAX_YEAR + 1):
|
||||||
print('Yearly average min: {}, max: {}\n'.format(precip.yearly_avg.min(), precip.yearly_avg.max()))
|
for s in ['winter', 'spring', 'summer', 'autumn']:
|
||||||
|
temp_precip_columns += ['temp_{}_{}'.format(s, year), 'precip_{}_{}'.format(s, year)]
|
||||||
|
|
||||||
columns = ['biome_num', 'biome_name', 'elevation', 'temp_yearly_avg', 'precip_yearly_avg']
|
columns = ['biome_num', 'biome_name', 'elevation', 'distance_to_water'] + temp_precip_columns
|
||||||
indices = ['longitude', 'latitude']
|
indices = ['longitude', 'latitude']
|
||||||
final_data = pd.DataFrame(index=indices, columns=columns)
|
final_data = pd.DataFrame(index=indices, columns=columns)
|
||||||
|
|
||||||
def get_point_information(longitude, latitude):
|
def get_point_information(longitude, latitude):
|
||||||
start_time = time.time()
|
item = {}
|
||||||
p = Point(longitude, latitude)
|
p = Point(longitude, latitude)
|
||||||
# print('({},{})'.format(longitude, latitude))
|
|
||||||
ecoregion = eco.loc[lambda c: c.geometry.contains(p)]
|
ecoregion = eco.loc[lambda c: c.geometry.contains(p)]
|
||||||
print("er%ss" % (time.time() - start_time))
|
|
||||||
if ecoregion.empty:
|
if ecoregion.empty:
|
||||||
return False
|
return False
|
||||||
start_time = time.time()
|
|
||||||
|
item['biome_num'] = ecoregion.BIOME_NUM.iloc[0]
|
||||||
|
item['biome_name'] = ecoregion.BIOME_NAME.iloc[0]
|
||||||
|
|
||||||
elev = elevation_data[elevation.index(longitude, latitude)]
|
elev = elevation_data[elevation.index(longitude, latitude)]
|
||||||
start_time = time.time()
|
item['elevation'] = elev
|
||||||
t = np.argmin(np.array((temp.longitude - longitude)**2 + (temp.latitude - latitude)**2))
|
|
||||||
start_time = time.time()
|
|
||||||
p = np.argmin(np.array((precip.longitude - longitude)**2 + (precip.latitude - latitude)**2))
|
|
||||||
|
|
||||||
return {
|
distance_to_sea = p.distance(boundary)
|
||||||
'biome_num': ecoregion.BIOME_NUM.iloc[0],
|
item['distance_to_water'] = distance_to_sea
|
||||||
'biome_name': ecoregion.BIOME_NAME.iloc[0],
|
|
||||||
'elevation': elev,
|
|
||||||
'temp_yearly_avg': temp.iloc[t, 2:].yearly_avg,
|
|
||||||
'precip_yearly_avg': precip.iloc[p, 2:].yearly_avg
|
|
||||||
}
|
|
||||||
|
|
||||||
|
t = np.argmin(np.array((temp[MIN_YEAR].longitude - longitude)**2 + (temp[MIN_YEAR].latitude - latitude)**2))
|
||||||
|
p = np.argmin(np.array((precip[MIN_YEAR].longitude - longitude)**2 + (precip[MIN_YEAR].latitude - latitude)**2))
|
||||||
|
|
||||||
|
yearly_temp = {}
|
||||||
|
yearly_precip = {}
|
||||||
|
for year in range(MIN_YEAR, MAX_YEAR + 1):
|
||||||
|
yearly_temp[year] = yt = temp[year].iloc[t, 2:]
|
||||||
|
yearly_precip[year] = yp = precip[year].iloc[p, 2:]
|
||||||
|
winter_temp = [yt.january, yt.february] + ([yearly_temp[year - 1].december] if year > MIN_YEAR else [])
|
||||||
|
winter_precip = [yp.january, yp.february] + ([yearly_precip[year - 1].december] if year > MIN_YEAR else [])
|
||||||
|
|
||||||
|
spring_temp = [yt[month] for month in ['march', 'april', 'may']]
|
||||||
|
spring_precip = [yp[month] for month in ['march', 'april', 'may']]
|
||||||
|
|
||||||
|
summer_temp = [yt[month] for month in ['june', 'july', 'august']]
|
||||||
|
summer_precip = [yp[month] for month in ['june', 'july', 'august']]
|
||||||
|
|
||||||
|
autumn_temp = [yt[month] for month in ['september', 'november', 'october']]
|
||||||
|
autumn_precip = [yp[month] for month in ['september', 'november', 'october']]
|
||||||
|
|
||||||
|
item['temp_winter_{}'.format(year)] = np.mean(winter_temp)
|
||||||
|
item['precip_winter_{}'.format(year)] = np.mean(winter_temp)
|
||||||
|
|
||||||
|
item['temp_spring_{}'.format(year)] = np.mean(spring_temp)
|
||||||
|
item['precip_spring_{}'.format(year)] = np.mean(spring_temp)
|
||||||
|
|
||||||
|
item['temp_summer_{}'.format(year)] = np.mean(summer_temp)
|
||||||
|
item['precip_summer_{}'.format(year)] = np.mean(summer_temp)
|
||||||
|
|
||||||
|
item['temp_autumn_{}'.format(year)] = np.mean(autumn_temp)
|
||||||
|
item['precip_autumn_{}'.format(year)] = np.mean(autumn_temp)
|
||||||
|
|
||||||
|
return item
|
||||||
|
|
||||||
data_indices = []
|
data_indices = []
|
||||||
|
|
||||||
data_map = {}
|
data_map = {}
|
||||||
for col in columns:
|
for col in columns:
|
||||||
data_map[col] = {}
|
data_map[col] = {}
|
||||||
|
|
||||||
i = 0
|
i = 0
|
||||||
|
|
||||||
@ -103,6 +131,7 @@ for longitude in range(-179, 179):
|
|||||||
data_map[key][(longitude, latitude)] = value
|
data_map[key][(longitude, latitude)] = value
|
||||||
|
|
||||||
print('+', end='')
|
print('+', end='')
|
||||||
|
|
||||||
print('')
|
print('')
|
||||||
|
|
||||||
print("--- Calculations: %s seconds ---" % (time.time() - start_time))
|
print("--- Calculations: %s seconds ---" % (time.time() - start_time))
|
||||||
@ -110,7 +139,7 @@ print("--- Calculations: %s seconds ---" % (time.time() - start_time))
|
|||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
df = pd.DataFrame(data_map)
|
df = pd.DataFrame(data_map)
|
||||||
print("--- Generating DataFrame: %s seconds ---" % (time.time() - start_time))
|
print("--- Generating DataFrame: %s seconds ---" % (time.time() - start_time))
|
||||||
print(df.head())
|
print(df)
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
df.to_pickle('data.p')
|
df.to_pickle('data.p')
|
||||||
print("--- Pickling DataFrame: %s seconds ---" % (time.time() - start_time))
|
print("--- Pickling DataFrame: %s seconds ---" % (time.time() - start_time))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user