fix(data.py): optimize for optimal performance and generate data
This commit is contained in:
parent
902be97332
commit
caa1b0443c
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,4 +1,5 @@
|
|||||||
geodata
|
geodata
|
||||||
|
*.p
|
||||||
#### joe made this: http://goel.io/joe
|
#### joe made this: http://goel.io/joe
|
||||||
#### python ####
|
#### python ####
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
|
102
data.py
102
data.py
@ -2,16 +2,19 @@ import geopandas
|
|||||||
import os
|
import os
|
||||||
import rasterio
|
import rasterio
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import time
|
||||||
from matplotlib import pyplot
|
from matplotlib import pyplot
|
||||||
from shapely.geometry import Point
|
from shapely.geometry import Point
|
||||||
|
|
||||||
directory = os.path.dirname(os.path.abspath(__file__))
|
directory = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
GEODATA = os.path.join(directory, 'geodata')
|
GEODATA = os.path.join(directory, 'geodata')
|
||||||
ECOREGIONS = os.path.join(GEODATA, 'ecoregions', 'Ecoregions2017.shp')
|
ECOREGIONS = os.path.join(GEODATA, 'ecoregions', 'single-parts.shp')
|
||||||
ELEVATION = os.path.join(GEODATA, 'srtm', 'topo30-180.tif')
|
ELEVATION = os.path.join(GEODATA, 'srtm', 'topo30-180.tif')
|
||||||
TEMP = os.path.join(GEODATA, 'air_temp')
|
TEMP = os.path.join(GEODATA, 'air_temp')
|
||||||
PRECIP = os.path.join(GEODATA, 'precipitation')
|
PRECIP = os.path.join(GEODATA, 'precipitation')
|
||||||
|
YEAR = 2014
|
||||||
|
|
||||||
def read_temp_data(year):
|
def read_temp_data(year):
|
||||||
return pd.read_csv(os.path.join(TEMP, 'air_temp.{}'.format(year)), sep='\s+', header=None,
|
return pd.read_csv(os.path.join(TEMP, 'air_temp.{}'.format(year)), sep='\s+', header=None,
|
||||||
@ -30,45 +33,84 @@ def read_precip_data(year):
|
|||||||
'december', 'yearly_avg'])
|
'december', 'yearly_avg'])
|
||||||
|
|
||||||
eco = geopandas.read_file(ECOREGIONS)
|
eco = geopandas.read_file(ECOREGIONS)
|
||||||
|
|
||||||
elevation = rasterio.open(ELEVATION)
|
elevation = rasterio.open(ELEVATION)
|
||||||
|
elevation_data = elevation.read(1)
|
||||||
|
|
||||||
# world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
|
temp = read_temp_data(YEAR)
|
||||||
# world['geometry'] = world['geometry'].unary_union
|
|
||||||
|
|
||||||
# world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
|
precip = read_precip_data(YEAR)
|
||||||
# print(world.head())
|
precip['yearly_avg'] = precip.mean(axis=1)
|
||||||
# world = world[['continent', 'geometry']]
|
|
||||||
# continents = world.dissolve(level=1)
|
|
||||||
# continents.plot();
|
|
||||||
|
|
||||||
# print(eco.head())
|
print('# Elevation')
|
||||||
# print(elevation)
|
print('bounds: left={} bottom={} top={} right={}'.format(elevation.bounds.left, elevation.bounds.bottom, elevation.bounds.top, elevation.bounds.right))
|
||||||
|
print('min: {}, max: {}\n'.format(elevation_data.min(), elevation_data.max()))
|
||||||
|
|
||||||
# eco['geometry'].boundary.plot()
|
print('# Temperature ({})'.format(YEAR))
|
||||||
# eco.dissolve()
|
print('Yearly average min: {}, max: {}\n'.format(temp.yearly_avg.min(), temp.yearly_avg.max()))
|
||||||
|
|
||||||
|
print('# Precipitation ({})'.format(YEAR))
|
||||||
|
print('Yearly average min: {}, max: {}\n'.format(precip.yearly_avg.min(), precip.yearly_avg.max()))
|
||||||
|
|
||||||
|
columns = ['biome_num', 'biome_name', 'elevation', 'temp_yearly_avg', 'precip_yearly_avg']
|
||||||
|
indices = ['longitude', 'latitude']
|
||||||
|
final_data = pd.DataFrame(index=indices, columns=columns)
|
||||||
|
|
||||||
# eco.plot()
|
|
||||||
# # rasterio.plot.show(src)
|
|
||||||
# # pyplot.imshow(elevation.read(1))
|
|
||||||
# 51.42
|
|
||||||
# 35.69
|
|
||||||
# tehran = eco.geometry.contains()
|
|
||||||
def get_point_information(longitude, latitude):
|
def get_point_information(longitude, latitude):
|
||||||
|
start_time = time.time()
|
||||||
p = Point(longitude, latitude)
|
p = Point(longitude, latitude)
|
||||||
|
# print('({},{})'.format(longitude, latitude))
|
||||||
ecoregion = eco.loc[lambda c: c.geometry.contains(p)]
|
ecoregion = eco.loc[lambda c: c.geometry.contains(p)]
|
||||||
|
print("er%ss" % (time.time() - start_time))
|
||||||
|
if ecoregion.empty:
|
||||||
|
return False
|
||||||
|
start_time = time.time()
|
||||||
|
elev = elevation_data[elevation.index(longitude, latitude)]
|
||||||
|
start_time = time.time()
|
||||||
|
t = np.argmin(np.array((temp.longitude - longitude)**2 + (temp.latitude - latitude)**2))
|
||||||
|
start_time = time.time()
|
||||||
|
p = np.argmin(np.array((precip.longitude - longitude)**2 + (precip.latitude - latitude)**2))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'biome_num': ecoregion.loc['BIOME_NUM'].iloc[0],
|
'biome_num': ecoregion.BIOME_NUM.iloc[0],
|
||||||
'biome_name': ecoregion.loc['BIOME_NAME'].iloc[0],
|
'biome_name': ecoregion.BIOME_NAME.iloc[0],
|
||||||
|
'elevation': elev,
|
||||||
|
'temp_yearly_avg': temp.iloc[t, 2:].yearly_avg,
|
||||||
|
'precip_yearly_avg': precip.iloc[p, 2:].yearly_avg
|
||||||
}
|
}
|
||||||
import time
|
|
||||||
|
|
||||||
|
data_indices = []
|
||||||
|
|
||||||
|
data_map = {}
|
||||||
|
for col in columns:
|
||||||
|
data_map[col] = {}
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
print('Before call')
|
|
||||||
print('Tehran', get_point_information(51.42, 35.69))
|
for longitude in range(-179, 179):
|
||||||
print("--- %s seconds ---" % (time.time() - start_time))
|
print('-', end='')
|
||||||
|
for latitude in range(-89, 89):
|
||||||
|
# generate data and save to file
|
||||||
|
d = get_point_information(longitude, latitude)
|
||||||
|
if d == False:
|
||||||
|
print('.', end='')
|
||||||
|
continue
|
||||||
|
|
||||||
|
for key, value in d.items():
|
||||||
|
data_map[key][(longitude, latitude)] = value
|
||||||
|
|
||||||
|
print('+', end='')
|
||||||
|
print('')
|
||||||
|
|
||||||
|
print("--- Calculations: %s seconds ---" % (time.time() - start_time))
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
print('Amazon', get_point_information(-59.78, -5.5))
|
df = pd.DataFrame(data_map)
|
||||||
print("--- %s seconds ---" % (time.time() - start_time))
|
print("--- Generating DataFrame: %s seconds ---" % (time.time() - start_time))
|
||||||
# print(eco.geometry)
|
print(df.head())
|
||||||
# print(tehran.distance(world.boundary))
|
start_time = time.time()
|
||||||
# world.boundary.plot()
|
df.to_pickle('data.p')
|
||||||
pyplot.show()
|
print("--- Pickling DataFrame: %s seconds ---" % (time.time() - start_time))
|
||||||
|
Loading…
Reference in New Issue
Block a user