From 2d6c399acd3ea4f4ccce09f7dc68a5c4b9cf3c77 Mon Sep 17 00:00:00 2001 From: Karamaz0V1 Date: Thu, 13 Sep 2018 12:17:20 +0200 Subject: [PATCH] Abort JurseSF, upgrade scripts and new cvgenerator --- cvgenerators/__init__.py | 0 cvgenerators/jurse.py | 150 +++++++++++++++++++++++++++++++++++++++ descriptors/dfc_aps.py | 60 ++++++++++++---- protocols/jurse.py | 88 ----------------------- test_mockup.yml | 7 +- 5 files changed, 202 insertions(+), 103 deletions(-) create mode 100644 cvgenerators/__init__.py create mode 100644 cvgenerators/jurse.py diff --git a/cvgenerators/__init__.py b/cvgenerators/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cvgenerators/jurse.py b/cvgenerators/jurse.py new file mode 100644 index 0000000..1f6948e --- /dev/null +++ b/cvgenerators/jurse.py @@ -0,0 +1,150 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# \file CrossValidationGenerator.py +# \brief TODO +# \author Florent Guiotte +# \version 0.1 +# \date 28 Mar 2018 +# +# TODO details + +import numpy as np +import ipdb + +class Split: + """Geographic split cross validation generator. + + Split `n_test` times along given dimension. One split is for test, the + others are used in training. + + If used with a split first description, make sure you use compatible + settings. + + """ + + def __init__(self, ground_truth, attributes, n_test=2, order_dim=0, remove_unclassified=True): + self._att = attributes + self._gt = ground_truth + self._n = n_test + self._d = order_dim + self._s = 0 + self._r = remove_unclassified + + self._size = ground_truth.shape[order_dim] + self._step = int(ground_truth.shape[order_dim] / n_test) + + def __iter__(self): + return self + + def __next__(self): + if self._s == self._n: + raise StopIteration + + cfilter = (np.arange(self._size) - self._step * self._s) % self._size < self._step + + test_index = np.zeros_like(self._gt, dtype=np.bool) + view = np.moveaxis(test_index, self._d, 0) + view[cfilter] = True + + unclassified = self._gt == 0 + train_index = ~test_index & ~unclassified + + if self._r: + test_index &= ~unclassified + + #ipdb.set_trace() + xtrain = self._att[train_index] + xtest = self._att[test_index] + ytrain = self._gt[train_index] + ytest = self._gt[test_index] + + self._s += 1 + + return xtrain, xtest, ytrain, ytest, test_index + + +class CVG_legacy: + def __init__(self, attributes, ground_truth, n_test=2, order_dim=0): + self._order = order_dim + self._ntests = n_test + self._actual_ntest = 0 + self._size = attributes.shape[order_dim] + self._att = attributes + self._gt = ground_truth + + if attributes.shape[0] != ground_truth.shape[0] or \ + attributes.shape[1] != ground_truth.shape[1] : + raise ValueError('attributes and ground_truth must have the same 2D shape') + + def __iter__(self): + return self + + def __next__(self): + if self._actual_ntest == self._ntests: + raise StopIteration + + step = self._size / self._ntests + train_filter = (np.arange(self._size) - step * self._actual_ntest) % self._size < step + + if self._order == 0: + Xtrain = self._att[train_filter].reshape(-1, self._att.shape[2]) + Xtest = self._att[train_filter == False].reshape(-1, self._att.shape[2]) + Ytrain = self._gt[train_filter].reshape(-1) + Ytest = self._gt[train_filter == False].reshape(-1) + else: + Xtrain = self._att[:,train_filter].reshape(-1, self._att.shape[2]) + Xtest = self._att[:,train_filter == False].reshape(-1, self._att.shape[2]) + Ytrain = self._gt[:,train_filter].reshape(-1) + Ytest = self._gt[:,train_filter == False].reshape(-1) + + + self._actual_ntest += 1 + + return (Xtrain, Xtest, Ytrain, Ytest, train_filter) + +class APsCVG: + """Cross Validation Generator for Attribute Profiles Descriptors""" + def __init__(self, ground_truth, attributes, n_test=5, label_ignore=None): + self._gt = ground_truth + self._att = attributes + self._cv_count = n_test + self._actual_count = 0 + + if attributes.shape[0] != ground_truth.shape[0] or \ + attributes.shape[1] != ground_truth.shape[1] : + raise ValueError('attributes and ground_truth must have the same 2D shape') + + def __iter__(self): + return self + + def __next__(self): + if self._cv_count == self._actual_count: + raise StopIteration + + split_map = semantic_cvg(self._gt, self._cv_count, self._actual_count) + xtrain = self._att[split_map == 1].reshape(-1, self._att.shape[2]) + xtest = self._att[split_map == 2].reshape(-1, self._att.shape[2]) + ytrain = self._gt[split_map == 1].reshape(-1) + ytest = self._gt[split_map == 2].reshape(-1) + test_index = split_map == 2 + + self._actual_count += 1 + + return xtrain, xtest, ytrain, ytest, test_index + +def semantic_cvg(gt, nb_split, step=0): + count = np.unique(gt, return_counts=True) + + test_part = 1 / nb_split + + split = np.zeros_like(gt) + + for lbli, lblc in zip(count[0][1:], count[1][1:]): + treshold = int(lblc * test_part) + #print('lbli:{}, count:{}, train:{}'.format(lbli, lblc, treshold)) + f = np.nonzero(gt == lbli) + t_int, t_ext = treshold * step, treshold * (step + 1) + split[f[0], f[1]] = 1 + split[f[0][t_int:t_ext], f[1][t_int:t_ext]] = 2 + + return split diff --git a/descriptors/dfc_aps.py b/descriptors/dfc_aps.py index c632f9a..860c81c 100644 --- a/descriptors/dfc_aps.py +++ b/descriptors/dfc_aps.py @@ -5,26 +5,62 @@ import sys sys.path.append('..') import ld2dap -def run(rasters, treshold=1e4, areas=None, sd=None, moi=None): +def run(rasters, treshold=1e4, areas=None, sd=None, moi=None, split=1, split_dim=0): + """DFC Attribute Profiles + + Compute description vectors for parameters. Rasters can be splitted along + `split_dim` before description proceeds. + + """ + # Parse attribute type treshold = float(treshold) areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int) sd = None if sd is None else np.array(sd).astype(np.float) moi = None if moi is None else np.array(moi).astype(np.float) - # APs Pipelines + # Load and filter loader = ld2dap.LoadTIFF(rasters) dfc_filter = ld2dap.Treshold(treshold) - dfc_filter.input = loader - aps = ld2dap.AttributeProfiles(area=areas, sd=sd, moi=moi) - aps.input = dfc_filter - out_vectors = ld2dap.RawOutput() - out_vectors.input = aps + normalize = ld2dap.Normalize(dtype=np.uint8) + raw_out = ld2dap.RawOutput() - # Compute vectors - out_vectors.run() - - return out_vectors.data + raw_out.input = normalize + normalize.input = dfc_filter + dfc_filter.input = loader + raw_out.run() + + # Split + n = split; d = split_dim + + step = int(raw_out.data.shape[d] / n) + view = np.moveaxis(raw_out.data, d, 0) + cuts = list() + for i in range(n): + cut = np.moveaxis(view[i*step:(i+1)*step+1], 0, d) + cuts.append(cut) + + # Describe + dcuts = list() + for cut in cuts: + rinp = ld2dap.RawInput(cut, raw_out.metadata) + aps = ld2dap.AttributeProfiles(areas, sd, moi, normalize_to_dtype=False) + vout = ld2dap.RawOutput() + + vout.input = aps + aps.input = rinp + vout.run() + + dcuts.append(vout.data) + + # Merge + descriptors = np.zeros(raw_out.data.shape[:2] + (dcuts[0].shape[-1],)) + view = np.moveaxis(descriptors, d, 0) + + for i, cut in enumerate(dcuts): + view[i*step:(i+1)*step+1] = np.moveaxis(cut, 0, d) + + return descriptors def version(): - return 'v0.0' \ No newline at end of file + return 'v0.0' diff --git a/protocols/jurse.py b/protocols/jurse.py index 2c6dfbc..1f41485 100644 --- a/protocols/jurse.py +++ b/protocols/jurse.py @@ -18,94 +18,6 @@ import triskele from .protocol import Protocol, TestError -class JurseSF(Jurse): - """Second JURSE "split first" protocol for LiDAR classification with 2D maps. - - This second protocol split the data set before computing the attribute - profiles to assure the classification is unbiased. - - """ - - def __init__(self, expe): - super().__init__(expe, self.__class__.__name__) - - def _run(self): - self._log.info('Compute descriptors') - try: - descriptors = self._compute_descriptors() - except Exception: - raise TestError('Error occured during description') - self._time('description') - - self._log.info('Classify data') - try: - classification = self._compute_classification(descriptors) - except Exception: - raise TestError('Error occured during classification') - self._time('classification') - - self._log.info('Run metrics') - metrics = self._run_metrics(classification, descriptors) - self._time('metrics') - - cmap = str(self._results_base_name) + '.tif' - self._log.info('Saving classification map {}'.format(cmap)) - triskele.write(cmap, classification) - - results = OrderedDict() - results['classification'] = cmap - results['metrics'] = metrics - self._results = results - - def _compute_descriptors(self): - script = self._expe['descriptors_script'] - - desc = importlib.import_module(script['name']) - att = desc.run(**script['parameters']) - - return att - - def _compute_classification(self, descriptors): - # Ground truth - gt = self._get_ground_truth() - - # CrossVal and ML - cv = self._expe['cross_validation'] - cl = self._expe['classifier'] - - cross_val = getattr(importlib.import_module(cv['package']), cv['name']) - classifier = getattr(importlib.import_module(cl['package']), cl['name']) - - prediction = np.zeros_like(gt, dtype=np.uint8) - - for xt, xv, yt, yv, ti in cross_val(gt, descriptors, **cv['parameters']): - rfc = classifier(**cl['parameters']) - rfc.fit(xt, yt) - - ypred = rfc.predict(xv) - - prediction[ti] = ypred - - return prediction - - def _get_results(self): - return self._results - - def _run_metrics(self, classification, descriptors): - gt = self._get_ground_truth() - - f = np.nonzero(classification) - pred = classification[f].ravel() - gt = gt[f].ravel() - - results = OrderedDict() - results['dimensions'] = descriptors.shape[-1] - results['overall_accuracy'] = float(metrics.accuracy_score(gt, pred)) - results['cohen_kappa'] = float(metrics.cohen_kappa_score(gt, pred)) - - return results - - class Jurse(Protocol): """First JURSE test protocol for LiDAR classification with 2D maps. diff --git a/test_mockup.yml b/test_mockup.yml index 503caee..4f9a518 100644 --- a/test_mockup.yml +++ b/test_mockup.yml @@ -17,6 +17,7 @@ expe: descriptors_script: name: descriptors.dfc_aps parameters: + split: 5 areas: - 100 - 1000 @@ -28,10 +29,10 @@ expe: - ./Data/dfc_rasters/DEM_C123_3msr/UH17_GEG051_TR.tif treshold: 1e4 cross_validation: - name: APsCVG - package: CVGenerators + name: Split + package: cvgenerators.jurse parameters: - n_test: 2 + n_test: 5 classifier: name: RandomForestClassifier package: sklearn.ensemble