minigrida/protocols/jurse.py

#!/usr/bin/python
# -*- coding: utf-8 -*-
# \file jurse.py
# \brief TODO
# \author Florent Guiotte <florent.guiotte@gmail.com>
# \version 0.1
# \date 07 sept. 2018
#
# TODO details

import hashlib
import importlib
from collections import OrderedDict
import numpy as np
import pandas as pd
from sklearn import metrics
# TODO: create package, use dev
import sys
sys.path.append('../triskele/python')
import triskele
from .protocol import Protocol, TestError


class Jurse(Protocol):
    """First JURSE test protocol for LiDAR classification with 2D maps.

    This first protocol compute attribute profiles on the whole scene then
    split in train and test for a random forest classifier.

    """

    def __init__(self, expe):
        super().__init__(expe, self.__class__.__name__)

    def _get_hashes(self):
        hashes = OrderedDict()
        glob = hashlib.sha1()

        for k in ['ground_truth', 'descriptors_script',
                  'cross_validation', 'classifier']:
            val = str(self._expe[k]).encode('utf-8')
            hashes[k] = hashlib.sha1(val).hexdigest()
            glob.update(val)
        hashes['global'] = glob.hexdigest()

        return hashes

    def _run(self):
        self._log.info('Compute descriptors')
        try:
            descriptors = self._compute_descriptors()
        except Exception:
            raise TestError('Error occured during description')

        self._log.info('Classify data')
        try:
            classification = self._compute_classificatin(descriptors)
        except Exception:
            raise TestError('Error occured during classification')

        self._log.info('Run metrics')
        self._metrics = self._run_metrics(classification, descriptors)

    def _compute_descriptors(self):
        script = self._expe['descriptors_script']

        desc = importlib.import_module(script['name'])
        att = desc.run(**script['parameters'])

        return att

    def _compute_classification(self, descriptors):
        # Ground truth
        gt = self._get_ground_truth()

        # CrossVal and ML
        cv = self._expe['cross_validation']
        cl = self._expe['classifier']

        cross_val = getattr(importlib.import_module(cv['package']), cv['name'])
        classifier = getattr(importlib.import_module(cl['package']), cl['name'])

        prediction = np.zeros_like(gt)

        for xt, xv, yt, yv, ti in cross_val(gt, descriptors, **cv['parameters']):
            rfc = classifier(**cl['parameters'])
            rfc.fit(xt, yt)

            ypred = rfc.predict(xv)

            prediction[ti] = ypred

        return prediction

    def _get_ground_truth(self):
        gt_expe = self._expe['ground_truth']
        gt = triskele.read(gt_expe['raster'])

        # Meta labeling
        idx_map = np.arange(gt.max() + 1)

        if 'meta_labels' in self._expe:
            meta_idx = pd.read_csv(gt_expe['meta_labels'])
            idx = np.array(meta_idx['index'])
            midx = np.array(meta_idx['metaclass_index'])
            idx_map[idx] = midx

        return idx_map[gt]

    def _run_metrics(self, classification, descriptors):
        gt = self._get_ground_truth()

        f = np.nonzero(classification)
        pred = classification[f].ravel()
        gt = gt[f].ravel()

        results = OrderedDict()
        results['dimension'] = descriptors.shape[-1]
        results['overall_accuracy'] = float(metrics.accuracy_score(gt, pred))
        results['cohen_kappa'] = float(metrics.cohen_kappa_score(gt, pred))

        return results