chore(biomes): move biomes to /biomes

2019-04-09 08:20:32 +04:30
parent e977239027
commit e29d461319
9 changed files with 0 additions and 0 deletions
--- a/biomes/INSTALL.md
+++ b/biomes/INSTALL.md
@@ -0,0 +1,8 @@
+```
+pyenv install $(cat .python-version)
+pyenv local
+pip install -r requirements.txt
+apt install proj-bin libproj-dev # https://proj4.org/install.html#install
+apt install libgeos-3.6.2 libgeos-dev libgeos++-dev # https://packages.ubuntu.com/search?keywords=geos&searchon=sourcenames&suite=all&section=all
+```
+
--- a/biomes/constants.py
+++ b/biomes/constants.py
@@ -0,0 +1,81 @@
+import os
+
+directory = os.path.dirname(os.path.abspath(__file__))
+
+GEODATA = os.path.join(directory, 'geodata')
+ECOREGIONS = os.path.join(GEODATA, 'ecoregions', 'single-parts.shp')
+ELEVATION = os.path.join(GEODATA, 'srtm', 'topo30-180.tif')
+TEMP = os.path.join(GEODATA, 'air_temp')
+PRECIP = os.path.join(GEODATA, 'precipitation')
+
+MIN_YEAR = 1900
+MAX_YEAR = 2017
+
+SEASONS = ['winter', 'spring', 'summer', 'autumn']
+WINTER_MONTHS = ['december', 'january', 'february']
+SPRING_MONTHS = ['march', 'april', 'may']
+SUMMER_MONTHS = ['june', 'july', 'august']
+AUTUMN_MONTHS = ['september', 'november', 'october']
+
+INPUTS = ['elevation', 'distance_to_water', 'latitude']
+OUTPUT = 'biome_num'
+
+BIOMES = [
+    {
+        'name': 'Tropical & Subtropical Moist Broadleaf Forests',
+        'color': '#016936',
+    },
+    {
+        'name': 'Tropical & Subtropical Dry Broadleaf Forests',
+        'color': '#B2D127',
+    },
+    {
+        'name': 'Tropical & Subtropical Coniferous Forests',
+        'color': '#77CC00',
+    },
+    {
+        'name': 'Temperate Broadleaf & Mixed Forests',
+        'color': '#99C500',
+    },
+    {
+        'name': 'Temperate Conifer Forests',
+        'color': '#B6CC00',
+    },
+    {
+        'name': 'Boreal Forests/Taiga',
+        'color': '#00C5B5',
+    },
+    {
+        'name': 'Tropical & Subtropical Grasslands, Savannas & Shrublands',
+        'color': '#EFFF00',
+    },
+    {
+        'name': 'Temperate Grasslands, Savannas & Shrublands',
+        'color': '#FFEE00',
+    },
+    {
+        'name': 'Flooded Grasslands & Savannas',
+        'color': '#009BFF',
+    },
+    {
+        'name': 'Montane Grasslands & Shrublands',
+        'color': '#A0ADBA',
+    },
+    {
+        'name': 'Tundra',
+        'color': '#5C62FF',
+    },
+    {
+        'name': 'Mediterranean Forests, Woodlands & Scrub',
+        'color': '#00850F',
+    },
+    {
+        'name': 'Deserts & Xeric Shrublands',
+        'color': '#FF9E1F',
+    },
+    {
+        'name': 'Mangroves',
+        'color': '#FF1F97'
+    }
+]
+
--- a/biomes/data.py
+++ b/biomes/data.py
@@ -0,0 +1,137 @@
+import geopandas
+import rasterio
+import pandas as pd
+import numpy as np
+import time
+from matplotlib import pyplot
+from shapely.geometry import Point
+from constants import *
+
+
+def read_temp_data(year):
+    return pd.read_csv(os.path.join(TEMP, 'air_temp.{}'.format(year)), sep='\s+', header=None,
+                       names=['longitude', 'latitude', 'january',
+                              'february', 'march', 'april',
+                              'may', 'june', 'july', 'august',
+                              'september', 'november', 'october',
+                              'december', 'yearly_avg'])
+
+def read_precip_data(year):
+    return pd.read_csv(os.path.join(PRECIP, 'precip.{}'.format(year)), sep='\s+', header=None,
+                       names=['longitude', 'latitude', 'january',
+                              'february', 'march', 'april',
+                              'may', 'june', 'july', 'august',
+                              'september', 'november', 'october',
+                              'december', 'yearly_avg'])
+
+eco = geopandas.read_file(ECOREGIONS)
+
+elevation = rasterio.open(ELEVATION)
+elevation_data = elevation.read(1)
+
+temp = {}
+precip = {}
+for year in range(MIN_YEAR, MAX_YEAR + 1):
+    temp[year] = read_temp_data(year)
+    precip[year] = read_precip_data(year)
+    precip[year]['yearly_avg'] = precip[year].mean(axis=1)
+    
+
+world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))[['geometry']].unary_union
+boundary = world.boundary
+
+temp_precip_columns = []
+
+for year in range(MIN_YEAR, MAX_YEAR + 1):
+    for s in SEASONS:
+        temp_precip_columns += ['temp_{}_{}'.format(s, year), 'precip_{}_{}'.format(s, year)]
+
+columns = ['longitude', 'latitude', 'biome_num', 'biome_name', 'elevation', 'distance_to_water'] + temp_precip_columns
+final_data = pd.DataFrame(columns=columns)
+
+def get_point_information(longitude, latitude):
+    item = {}
+    p = Point(longitude, latitude)
+    ecoregion = eco.loc[lambda c: c.geometry.contains(p)]
+    if ecoregion.empty:
+        return False
+
+    item['longitude'] = longitude
+    item['latitude'] = latitude
+    item['biome_num'] = ecoregion.BIOME_NUM.iloc[0]
+    item['biome_name'] = ecoregion.BIOME_NAME.iloc[0]
+
+    elev = elevation_data[elevation.index(longitude, latitude)]
+    item['elevation'] = elev
+
+    distance_to_sea = p.distance(boundary)
+    item['distance_to_water'] = distance_to_sea
+
+    t = np.argmin(np.array((temp[MIN_YEAR].longitude - longitude)**2 + (temp[MIN_YEAR].latitude - latitude)**2))
+    p = np.argmin(np.array((precip[MIN_YEAR].longitude - longitude)**2 + (precip[MIN_YEAR].latitude - latitude)**2))
+
+    yearly_temp = {}
+    yearly_precip = {}
+    for year in range(MIN_YEAR, MAX_YEAR + 1):
+        yearly_temp[year] = yt = temp[year].iloc[t, 2:]
+        yearly_precip[year] = yp = precip[year].iloc[p, 2:]
+        winter_temp = [yt.january, yt.february] + ([yearly_temp[year - 1].december] if year > MIN_YEAR else [])
+        winter_precip = [yp.january, yp.february] + ([yearly_precip[year - 1].december] if year > MIN_YEAR else [])
+
+        spring_temp = [yt[month] for month in SPRING_MONTHS]
+        spring_precip = [yp[month] for month in SPRING_MONTHS]
+
+        summer_temp = [yt[month] for month in SUMMER_MONTHS]
+        summer_precip = [yp[month] for month in SUMMER_MONTHS]
+
+        autumn_temp = [yt[month] for month in AUTUMN_MONTHS]
+        autumn_precip = [yp[month] for month in AUTUMN_MONTHS]
+
+        item['temp_winter_{}'.format(year)] = np.mean(winter_temp)
+        item['precip_winter_{}'.format(year)] = np.mean(winter_precip)
+
+        item['temp_spring_{}'.format(year)] = np.mean(spring_temp)
+        item['precip_spring_{}'.format(year)] = np.mean(spring_precip)
+
+        item['temp_summer_{}'.format(year)] = np.mean(summer_temp)
+        item['precip_summer_{}'.format(year)] = np.mean(summer_precip)
+
+        item['temp_autumn_{}'.format(year)] = np.mean(autumn_temp)
+        item['precip_autumn_{}'.format(year)] = np.mean(autumn_precip)
+
+    return item
+
+data = {}
+for col in columns:
+    data[col] = []
+
+# i = 0
+
+start_time = time.time()
+
+for longitude in range(-179, 179):
+    print('-', end='')
+    for latitude in range(-89, 89):
+
+        # generate data and save to file
+        d = get_point_information(longitude, latitude)
+        if d == False:
+            print('.', end='')
+            continue
+
+        for key, value in d.items():
+            data[key].append(value)
+
+        print('+', end='')
+
+    print('')
+
+print("--- Calculations: %s seconds ---" % (time.time() - start_time))
+
+start_time = time.time()
+df = pd.DataFrame(data)
+print("--- Generating DataFrame: %s seconds ---" % (time.time() - start_time))
+print(df)
+start_time = time.time()
+df.to_pickle('data.p')
+print("--- Pickling DataFrame: %s seconds ---" % (time.time() - start_time))
--- a/biomes/draw.py
+++ b/biomes/draw.py
@@ -0,0 +1,51 @@
+import fire
+import matplotlib.pyplot as plt
+from matplotlib.collections import PatchCollection
+from matplotlib.patches import Circle, Patch
+from utils import logger
+from constants import BIOMES
+
+import pandas as pd
+import cartopy.crs as ccrs
+
+def draw(df, path=None):
+    logger.debug('draw(df, %s)', path)
+    biomes = {}
+    biome_numbers = df['biome_num'].unique()
+
+    for i, row in df.iterrows():
+        p = (row.longitude, row.latitude)
+        if row.biome_num in biomes:
+            biomes[row.biome_num].append(p)
+        else:
+            biomes[row.biome_num] = [p]
+
+    ax = plt.axes(projection=ccrs.PlateCarree())
+    ax.stock_img()
+
+    legend_handles = []
+    for n in biome_numbers:
+        color = BIOMES[n]['color']
+
+        patches = [Circle(p, radius=0.4) for p in biomes[n]]
+        collection = PatchCollection(patches, color=color)
+
+        legend_handles.append(Patch(color=color, label=BIOMES[n]['name']))
+        ax.add_collection(collection)
+
+    ax.legend(handles=legend_handles, loc='center left', bbox_to_anchor=(1, 0.5), markerscale=4)
+    
+    ax.autoscale_view()
+    figure = plt.gcf()
+    figure.set_size_inches(23.22, 13)
+    figure.subplots_adjust(left=0.02, right=0.79)
+    if path:
+        plt.savefig(path)
+    else:
+        plt.show()
+
+def draw_cmd(path=None):
+    draw(pd.read_pickle('data.p'), path=path)
+
+if __name__ == "__main__":
+    fire.Fire(draw_cmd)
--- a/biomes/model.py
+++ b/biomes/model.py
@@ -0,0 +1,144 @@
+from __future__ import absolute_import, division, print_function
+
+# TensorFlow and tf.keras
+import tensorflow as tf
+from tensorflow import keras
+
+# Helper libraries
+import numpy as np
+import pandas as pd
+
+from utils import *
+
+RANDOM_SEED = 1
+
+logger.debug('Tensorflow version: %s', tf.__version__)
+logger.debug('Random Seed: %s', RANDOM_SEED)
+
+tf.set_random_seed(RANDOM_SEED)
+np.random.seed(RANDOM_SEED)
+
+DEFAULT_BATCH_SIZE=256
+DEFAULT_LAYERS = [512, 512]
+DEFAULT_BUFFER_SIZE=500
+DEFAULT_OUT_ACTIVATION = tf.nn.softmax
+DEFAULT_LOSS = 'sparse_categorical_crossentropy'
+DEFAULT_OPTIMIZER = tf.keras.optimizers.Adam(lr=0.001)
+
+class Model():
+    def __init__(self, name, epochs=1):
+        self.name = name
+        self.path = "checkpoints/{}.hdf5".format(name)
+
+        self.epochs = epochs
+
+    def prepare_dataset(self, df, fn, **kwargs):
+        self.dataset_fn = fn
+
+        self.set_dataset(*fn(df), **kwargs)
+
+    def set_dataset(self, dataset_size, features, output_size, class_weight, dataset, shuffle_buffer_size=DEFAULT_BUFFER_SIZE, batch_size=DEFAULT_BATCH_SIZE):
+        self.shuffle_buffer_size = shuffle_buffer_size
+
+        self.class_weight = class_weight
+        self.dataset = dataset.shuffle(self.shuffle_buffer_size)
+        self.TRAIN_SIZE = int(dataset_size * 0.85)
+        self.TEST_SIZE = dataset_size - self.TRAIN_SIZE
+        (training, test) = (self.dataset.take(self.TRAIN_SIZE),
+                            self.dataset.skip(self.TRAIN_SIZE))
+
+        logger.debug('Model dataset info: size=%s, train=%s, test=%s', dataset_size, self.TRAIN_SIZE, self.TEST_SIZE)
+
+        self.dataset_size = dataset_size
+        self.features = features
+        self.output_size = output_size
+        self.training = training
+        self.test = test
+
+        logger.debug('Model input size: %s', self.features)
+        logger.debug('Model output size: %s', self.output_size)
+
+        self.batch_size = batch_size
+        self.training_batched = self.training.batch(self.batch_size).repeat()
+        self.test_batched = self.test.batch(self.batch_size).repeat()
+
+    def create_model(self, layers=DEFAULT_LAYERS, out_activation=DEFAULT_OUT_ACTIVATION):
+        params = {
+                'kernel_initializer': 'lecun_uniform',
+                'bias_initializer': 'zeros',
+                # 'kernel_regularizer': keras.regularizers.l2(l=0.01)
+                'input_shape': [self.features]
+        }
+
+        activation = tf.nn.elu
+
+        logger.debug('Model layer parameters: %s', params)
+        logger.debug('Model layer sizes: %s', layers)
+        logger.debug('Model layer activation function: %s', activation)
+        logger.debug('Model out activation function: %s', out_activation)
+
+
+        self.model = keras.Sequential([
+            keras.layers.Dense(n, activation=activation, **params) for n in layers
+        ] + [
+            keras.layers.Dense(self.output_size, activation=out_activation, **params)
+        ])
+
+    def compile(self, loss=DEFAULT_LOSS, metrics=['accuracy'], optimizer=DEFAULT_OPTIMIZER):
+        logger.debug('Model loss function: %s', loss)
+        logger.debug('Model optimizer: %s', optimizer)
+        logger.debug('Model metrics: %s', metrics)
+
+        self.model.compile(loss=loss,
+                    optimizer=optimizer,
+                    metrics=metrics)
+
+    def restore(self, path):
+        logger.debug('Restoring model weights from path: %s', path)
+        return self.model.load_weights(path)
+
+    def save(self, path):
+        logger.debug('Saving model weights to path: %s', path)
+        self.model.save_weights(path)
+        return path
+
+    def evaluate(self):
+        return self.model.evaluate(
+            self.test,
+            batch_size=self.batch_size,
+            steps=int(self.dataset_size / self.batch_size),
+            verbose=1
+        )
+
+    def evaluate_print(self):
+        loss, accuracy = self.evaluate()
+        print('Test evaluation: loss: {}, accuracy: {}'.format(loss, accuracy))
+
+    def train(self, config):
+        self.model.summary()
+
+        # map_callback = MapHistory()
+
+        out = self.model.fit(
+            self.training_batched,
+            batch_size=self.batch_size,
+            epochs=self.epochs,
+            steps_per_epoch=int(self.TRAIN_SIZE / self.batch_size),
+            class_weight=self.class_weight,
+            validation_data=self.test_batched,
+            validation_steps=int(self.TEST_SIZE / self.batch_size),
+            verbose=1
+        )
+
+        return out
+
+    def predict(self, a):
+        return np.argmax(self.model.predict(a), axis=1)
+
+    def prepare_for_use(self, df=None, batch_size=DEFAULT_BUFFER_SIZE, layers=DEFAULT_LAYERS, out_activation=DEFAULT_OUT_ACTIVATION, loss=DEFAULT_LOSS, optimizer=DEFAULT_OPTIMIZER):
+        if df is None:
+            df = pd.read_pickle('data.p')
+        self.prepare_dataset(df, dataframe_to_dataset_biomes, batch_size=batch_size)
+        self.create_model(layers=layers, out_activation=out_activation)
+        self.compile(loss=loss, optimizer=optimizer)
+
--- a/biomes/predict.py
+++ b/biomes/predict.py
@@ -0,0 +1,58 @@
+import fire
+import numpy as np 
+
+from utils import *
+#from nn import compile_b
+from constants import INPUTS
+from model import Model
+from draw import draw
+
+def predicted_map(B, change=0, path=None):
+    year = MAX_YEAR - 1
+
+    df = pd.read_pickle('data.p')
+
+    logger.info('temperature change of %s', change)
+
+    inputs = list(INPUTS)
+
+    for season in SEASONS:
+        inputs += [
+            'temp_{}_{}'.format(season, year),
+            'precip_{}_{}'.format(season, year)
+        ]
+
+    frame = df[inputs + ['longitude']]
+    frame_cp = df[inputs + ['longitude']]
+
+    for season in SEASONS:
+        frame.loc[:, 'temp_{}_{}'.format(season, year)] += change
+
+    columns = ['latitude', 'longitude', 'biome_num']
+    new_data = pd.DataFrame(columns=columns)
+    nframe = pd.DataFrame(columns=frame.columns, data=normalize_ndarray(frame.to_numpy(), frame_cp.to_numpy()))
+
+    for i, (chunk, chunk_original) in enumerate(zip(chunker(nframe, B.batch_size), chunker(frame, B.batch_size))):
+        if chunk.shape[0] < B.batch_size:
+            continue
+        input_data = chunk.loc[:, inputs].values
+        out = B.predict(input_data)
+
+        f = pd.DataFrame({
+            'longitude': chunk_original.loc[:, 'longitude'], 
+            'latitude': chunk_original.loc[:, 'latitude'],
+            'biome_num': out
+        }, columns=columns)
+        new_data = new_data.append(f)
+
+    draw(new_data, path=path)
+
+def predicted_map_cmd(checkpoint='checkpoints/save.h5', change=0, path=None):
+    B = Model('b', epochs=1)
+    B.prepare_for_use()
+    B.restore(checkpoint)
+    predicted_map(B, change=change, path=path)
+
+if __name__ == "__main__":
+    fire.Fire(predicted_map_cmd)
+
--- a/biomes/requirements.txt
+++ b/biomes/requirements.txt
@@ -0,0 +1,13 @@
+geopandas==0.4.0
+geopy==0.99
+matplotlib==3.0.2
+descartes==1.1.0
+pysal==2.0.0
+rasterio==1.0.15
+tensorflow==1.13.1
+Cartopy==0.17.0
+numpy==1.16.1
+scikit-learn==0.20.3
+https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.7.0.dev1-cp36-cp36m-manylinux1_x86_64.whl
+fire==0.1.3
+psutil==5.6.1
--- a/biomes/train.py
+++ b/biomes/train.py
@@ -0,0 +1,67 @@
+import fire
+import ray
+import pandas as pd
+import tensorflow as tf
+from ray import tune
+from tensorflow import keras
+from utils import logger
+from model import Model
+
+B_params = {
+    'batch_size': tune.grid_search([256]),
+    'layers': tune.grid_search([[512, 512]]),
+    'lr': tune.grid_search([1e-4]),
+    'optimizer': tune.grid_search([tf.keras.optimizers.Adam]),
+}
+
+df = pd.read_pickle('data.p')
+
+class TuneB(tune.Trainable):
+    def _setup(self, config):
+        logger.debug('Ray Tune model configuration %s', config)
+
+        self.model = Model('b', epochs=1)
+
+        optimizer = config['optimizer']
+        optimizer = config['optimizer'](lr=config['lr'])
+
+        self.model.prepare_for_use(df=df, batch_size=config['batch_size'], layers=config['layers'], optimizer=optimizer)
+
+    def _train(self):
+        logs = self.model.train(self.config)
+
+        metrics = {
+            'mean_accuracy': logs.history['acc'][0],
+            'loss': logs.history['loss'][0],
+            'val_accuracy': logs.history['val_acc'][0],
+            'val_loss': logs.history['val_loss'][0],
+        }
+
+        return metrics
+
+    def _save(self, checkpoint_dir):
+        return self.model.save(checkpoint_dir)
+
+    def _restore(self, path):
+        return self.model.restore(path)
+
+def start_tuning(cpu=1, gpu=2, checkpoint_freq=1, checkpoint_at_end=True, resume=False, restore=None, stop=500):
+    ray.init()
+
+    tune.run(TuneB,
+            config=B_params,
+            resources_per_trial={
+                "cpu": cpu,
+                "gpu": gpu
+            },
+            resume=resume,
+            checkpoint_at_end=checkpoint_at_end,
+            checkpoint_freq=checkpoint_freq,
+            restore=restore,
+            stop={
+              'training_iteration': stop
+            })
+
+
+if __name__ == "__main__":
+    fire.Fire(start_tuning)
--- a/biomes/utils.py
+++ b/biomes/utils.py
@@ -0,0 +1,101 @@
+import numpy as np
+import tensorflow as tf
+import pandas as pd
+from collections import Counter
+from sklearn.utils import class_weight
+from constants import *
+import logging
+import os
+
+logger = logging.getLogger('main')
+logger.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))
+
+
+def normalize(v, o=None):
+    if o is None:
+        o = v
+    return (v - np.mean(o)) / np.std(o)
+
+def normalize_ndarray(ar, o=None):
+    if o is None:
+        o = ar
+
+    # transpose: operate over columns
+    tr = np.transpose(ar)
+    to = np.transpose(o)
+    for i in range(tr.shape[0]):
+        tr[i] = normalize(tr[i], to[i])
+
+    # transpose back
+    return np.transpose(tr)
+
+def dataframe_to_dataset_biomes(df):
+    rows = df.shape[0]
+
+    # 8 for seasonal temp and precipitation
+    # 3 for latitude, elevation and distance_to_water
+    input_columns = 11 
+
+    tf_inputs = np.empty((0, input_columns))
+    tf_output = np.empty((0))
+
+    for year in range(MIN_YEAR, MAX_YEAR + 1):
+        local_inputs = list(INPUTS)
+        for season in SEASONS:
+            local_inputs += [
+                'temp_{}_{}'.format(season, year),
+                'precip_{}_{}'.format(season, year)
+            ]
+
+
+        local_df = df[local_inputs]
+
+        tf_inputs = np.concatenate((tf_inputs, local_df.values), axis=0)
+        tf_output = np.concatenate((tf_output, df[OUTPUT].values), axis=0)
+
+    # balance class weights for the loss function, since the data is highly unbalanced
+    num_classes = len(np.unique(tf_output))
+    class_weights = class_weight.compute_class_weight('balanced', np.unique(tf_output), tf_output)
+    logger.debug('class_weights %s', class_weights)
+
+    tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32)
+    tf_output = tf.cast(tf_output, tf.int64)
+
+    logger.debug('dataset size: rows=%d, input_columns=%d, num_classes=%d', int(tf_inputs.shape[0]), input_columns, num_classes)
+    return int(tf_inputs.shape[0]), input_columns, num_classes, class_weights, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
+
+def dataframe_to_dataset_temp_precip(df):
+    rows = df.shape[0]
+
+    # elevation, distance_to_water, latitude
+    # season, year
+    input_columns = 5
+    num_classes = 2
+
+    tf_inputs = np.empty((0, input_columns))
+    tf_output = np.empty((0, num_classes))
+
+    for year in range(MIN_YEAR, MAX_YEAR + 1):
+        local_inputs = list(INPUTS)
+
+        for idx, season in enumerate(SEASONS):
+            season_index = idx / len(season)
+            local_df = df[local_inputs]
+            local_df.loc[:, 'season'] = pd.Series(np.repeat(season_index, rows), index=local_df.index)
+            local_df.loc[:, 'year'] = pd.Series(np.repeat(year, rows), index=local_df.index)
+
+            output = ['temp_{}_{}'.format(season, year), 'precip_{}_{}'.format(season, year)]
+            tf_inputs = np.concatenate((tf_inputs, local_df.values), axis=0)
+            tf_output = np.concatenate((tf_output, df[output].values), axis=0)
+
+    tf_inputs = tf.cast(normalize_ndarray(tf_inputs), tf.float32)
+    tf_output = tf.cast(tf_output, tf.float32)
+
+    logger.debug('dataset size: rows=%d, input_columns=%d, num_classes=%d', int(tf_inputs.shape[0]), input_columns, num_classes)
+    return int(tf_inputs.shape[0]), input_columns, num_classes, tf.data.Dataset.from_tensor_slices((tf_inputs, tf_output))
+
+
+flatten = lambda l: [item for sublist in l for item in sublist]
+
+def chunker(seq, size):
+    return (seq[pos:pos + size] for pos in range(0, len(seq), size))