fix(data.py): optimize for optimal performance and generate data
This commit is contained in:
parent
902be97332
commit
caa1b0443c
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,4 +1,5 @@
|
||||
geodata
|
||||
*.p
|
||||
#### joe made this: http://goel.io/joe
|
||||
#### python ####
|
||||
# Byte-compiled / optimized / DLL files
|
||||
|
102
data.py
102
data.py
@ -2,16 +2,19 @@ import geopandas
|
||||
import os
|
||||
import rasterio
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import time
|
||||
from matplotlib import pyplot
|
||||
from shapely.geometry import Point
|
||||
|
||||
directory = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
GEODATA = os.path.join(directory, 'geodata')
|
||||
ECOREGIONS = os.path.join(GEODATA, 'ecoregions', 'Ecoregions2017.shp')
|
||||
ECOREGIONS = os.path.join(GEODATA, 'ecoregions', 'single-parts.shp')
|
||||
ELEVATION = os.path.join(GEODATA, 'srtm', 'topo30-180.tif')
|
||||
TEMP = os.path.join(GEODATA, 'air_temp')
|
||||
PRECIP = os.path.join(GEODATA, 'precipitation')
|
||||
YEAR = 2014
|
||||
|
||||
def read_temp_data(year):
|
||||
return pd.read_csv(os.path.join(TEMP, 'air_temp.{}'.format(year)), sep='\s+', header=None,
|
||||
@ -30,45 +33,84 @@ def read_precip_data(year):
|
||||
'december', 'yearly_avg'])
|
||||
|
||||
eco = geopandas.read_file(ECOREGIONS)
|
||||
|
||||
elevation = rasterio.open(ELEVATION)
|
||||
elevation_data = elevation.read(1)
|
||||
|
||||
# world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
|
||||
# world['geometry'] = world['geometry'].unary_union
|
||||
temp = read_temp_data(YEAR)
|
||||
|
||||
# world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
|
||||
# print(world.head())
|
||||
# world = world[['continent', 'geometry']]
|
||||
# continents = world.dissolve(level=1)
|
||||
# continents.plot();
|
||||
precip = read_precip_data(YEAR)
|
||||
precip['yearly_avg'] = precip.mean(axis=1)
|
||||
|
||||
# print(eco.head())
|
||||
# print(elevation)
|
||||
print('# Elevation')
|
||||
print('bounds: left={} bottom={} top={} right={}'.format(elevation.bounds.left, elevation.bounds.bottom, elevation.bounds.top, elevation.bounds.right))
|
||||
print('min: {}, max: {}\n'.format(elevation_data.min(), elevation_data.max()))
|
||||
|
||||
# eco['geometry'].boundary.plot()
|
||||
# eco.dissolve()
|
||||
print('# Temperature ({})'.format(YEAR))
|
||||
print('Yearly average min: {}, max: {}\n'.format(temp.yearly_avg.min(), temp.yearly_avg.max()))
|
||||
|
||||
print('# Precipitation ({})'.format(YEAR))
|
||||
print('Yearly average min: {}, max: {}\n'.format(precip.yearly_avg.min(), precip.yearly_avg.max()))
|
||||
|
||||
columns = ['biome_num', 'biome_name', 'elevation', 'temp_yearly_avg', 'precip_yearly_avg']
|
||||
indices = ['longitude', 'latitude']
|
||||
final_data = pd.DataFrame(index=indices, columns=columns)
|
||||
|
||||
# eco.plot()
|
||||
# # rasterio.plot.show(src)
|
||||
# # pyplot.imshow(elevation.read(1))
|
||||
# 51.42
|
||||
# 35.69
|
||||
# tehran = eco.geometry.contains()
|
||||
def get_point_information(longitude, latitude):
|
||||
start_time = time.time()
|
||||
p = Point(longitude, latitude)
|
||||
# print('({},{})'.format(longitude, latitude))
|
||||
ecoregion = eco.loc[lambda c: c.geometry.contains(p)]
|
||||
print("er%ss" % (time.time() - start_time))
|
||||
if ecoregion.empty:
|
||||
return False
|
||||
start_time = time.time()
|
||||
elev = elevation_data[elevation.index(longitude, latitude)]
|
||||
start_time = time.time()
|
||||
t = np.argmin(np.array((temp.longitude - longitude)**2 + (temp.latitude - latitude)**2))
|
||||
start_time = time.time()
|
||||
p = np.argmin(np.array((precip.longitude - longitude)**2 + (precip.latitude - latitude)**2))
|
||||
|
||||
return {
|
||||
'biome_num': ecoregion.loc['BIOME_NUM'].iloc[0],
|
||||
'biome_name': ecoregion.loc['BIOME_NAME'].iloc[0],
|
||||
'biome_num': ecoregion.BIOME_NUM.iloc[0],
|
||||
'biome_name': ecoregion.BIOME_NAME.iloc[0],
|
||||
'elevation': elev,
|
||||
'temp_yearly_avg': temp.iloc[t, 2:].yearly_avg,
|
||||
'precip_yearly_avg': precip.iloc[p, 2:].yearly_avg
|
||||
}
|
||||
import time
|
||||
|
||||
|
||||
data_indices = []
|
||||
|
||||
data_map = {}
|
||||
for col in columns:
|
||||
data_map[col] = {}
|
||||
|
||||
i = 0
|
||||
|
||||
start_time = time.time()
|
||||
print('Before call')
|
||||
print('Tehran', get_point_information(51.42, 35.69))
|
||||
print("--- %s seconds ---" % (time.time() - start_time))
|
||||
|
||||
for longitude in range(-179, 179):
|
||||
print('-', end='')
|
||||
for latitude in range(-89, 89):
|
||||
# generate data and save to file
|
||||
d = get_point_information(longitude, latitude)
|
||||
if d == False:
|
||||
print('.', end='')
|
||||
continue
|
||||
|
||||
for key, value in d.items():
|
||||
data_map[key][(longitude, latitude)] = value
|
||||
|
||||
print('+', end='')
|
||||
print('')
|
||||
|
||||
print("--- Calculations: %s seconds ---" % (time.time() - start_time))
|
||||
|
||||
start_time = time.time()
|
||||
print('Amazon', get_point_information(-59.78, -5.5))
|
||||
print("--- %s seconds ---" % (time.time() - start_time))
|
||||
# print(eco.geometry)
|
||||
# print(tehran.distance(world.boundary))
|
||||
# world.boundary.plot()
|
||||
pyplot.show()
|
||||
df = pd.DataFrame(data_map)
|
||||
print("--- Generating DataFrame: %s seconds ---" % (time.time() - start_time))
|
||||
print(df.head())
|
||||
start_time = time.time()
|
||||
df.to_pickle('data.p')
|
||||
print("--- Pickling DataFrame: %s seconds ---" % (time.time() - start_time))
|
||||
|
Loading…
Reference in New Issue
Block a user