From 80bea9972a0fe7748933be739533bc9173335a27 Mon Sep 17 00:00:00 2001 From: Karamaz0V1 Date: Wed, 12 Sep 2018 20:30:01 +0200 Subject: [PATCH 1/3] Add protocol JurseSF --- protocols/jurse.py | 88 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/protocols/jurse.py b/protocols/jurse.py index 1f41485..2c6dfbc 100644 --- a/protocols/jurse.py +++ b/protocols/jurse.py @@ -18,6 +18,94 @@ import triskele from .protocol import Protocol, TestError +class JurseSF(Jurse): + """Second JURSE "split first" protocol for LiDAR classification with 2D maps. + + This second protocol split the data set before computing the attribute + profiles to assure the classification is unbiased. + + """ + + def __init__(self, expe): + super().__init__(expe, self.__class__.__name__) + + def _run(self): + self._log.info('Compute descriptors') + try: + descriptors = self._compute_descriptors() + except Exception: + raise TestError('Error occured during description') + self._time('description') + + self._log.info('Classify data') + try: + classification = self._compute_classification(descriptors) + except Exception: + raise TestError('Error occured during classification') + self._time('classification') + + self._log.info('Run metrics') + metrics = self._run_metrics(classification, descriptors) + self._time('metrics') + + cmap = str(self._results_base_name) + '.tif' + self._log.info('Saving classification map {}'.format(cmap)) + triskele.write(cmap, classification) + + results = OrderedDict() + results['classification'] = cmap + results['metrics'] = metrics + self._results = results + + def _compute_descriptors(self): + script = self._expe['descriptors_script'] + + desc = importlib.import_module(script['name']) + att = desc.run(**script['parameters']) + + return att + + def _compute_classification(self, descriptors): + # Ground truth + gt = self._get_ground_truth() + + # CrossVal and ML + cv = self._expe['cross_validation'] + cl = self._expe['classifier'] + + cross_val = getattr(importlib.import_module(cv['package']), cv['name']) + classifier = getattr(importlib.import_module(cl['package']), cl['name']) + + prediction = np.zeros_like(gt, dtype=np.uint8) + + for xt, xv, yt, yv, ti in cross_val(gt, descriptors, **cv['parameters']): + rfc = classifier(**cl['parameters']) + rfc.fit(xt, yt) + + ypred = rfc.predict(xv) + + prediction[ti] = ypred + + return prediction + + def _get_results(self): + return self._results + + def _run_metrics(self, classification, descriptors): + gt = self._get_ground_truth() + + f = np.nonzero(classification) + pred = classification[f].ravel() + gt = gt[f].ravel() + + results = OrderedDict() + results['dimensions'] = descriptors.shape[-1] + results['overall_accuracy'] = float(metrics.accuracy_score(gt, pred)) + results['cohen_kappa'] = float(metrics.cohen_kappa_score(gt, pred)) + + return results + + class Jurse(Protocol): """First JURSE test protocol for LiDAR classification with 2D maps. From 2d6c399acd3ea4f4ccce09f7dc68a5c4b9cf3c77 Mon Sep 17 00:00:00 2001 From: Karamaz0V1 Date: Thu, 13 Sep 2018 12:17:20 +0200 Subject: [PATCH 2/3] Abort JurseSF, upgrade scripts and new cvgenerator --- cvgenerators/__init__.py | 0 cvgenerators/jurse.py | 150 +++++++++++++++++++++++++++++++++++++++ descriptors/dfc_aps.py | 60 ++++++++++++---- protocols/jurse.py | 88 ----------------------- test_mockup.yml | 7 +- 5 files changed, 202 insertions(+), 103 deletions(-) create mode 100644 cvgenerators/__init__.py create mode 100644 cvgenerators/jurse.py diff --git a/cvgenerators/__init__.py b/cvgenerators/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cvgenerators/jurse.py b/cvgenerators/jurse.py new file mode 100644 index 0000000..1f6948e --- /dev/null +++ b/cvgenerators/jurse.py @@ -0,0 +1,150 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# \file CrossValidationGenerator.py +# \brief TODO +# \author Florent Guiotte +# \version 0.1 +# \date 28 Mar 2018 +# +# TODO details + +import numpy as np +import ipdb + +class Split: + """Geographic split cross validation generator. + + Split `n_test` times along given dimension. One split is for test, the + others are used in training. + + If used with a split first description, make sure you use compatible + settings. + + """ + + def __init__(self, ground_truth, attributes, n_test=2, order_dim=0, remove_unclassified=True): + self._att = attributes + self._gt = ground_truth + self._n = n_test + self._d = order_dim + self._s = 0 + self._r = remove_unclassified + + self._size = ground_truth.shape[order_dim] + self._step = int(ground_truth.shape[order_dim] / n_test) + + def __iter__(self): + return self + + def __next__(self): + if self._s == self._n: + raise StopIteration + + cfilter = (np.arange(self._size) - self._step * self._s) % self._size < self._step + + test_index = np.zeros_like(self._gt, dtype=np.bool) + view = np.moveaxis(test_index, self._d, 0) + view[cfilter] = True + + unclassified = self._gt == 0 + train_index = ~test_index & ~unclassified + + if self._r: + test_index &= ~unclassified + + #ipdb.set_trace() + xtrain = self._att[train_index] + xtest = self._att[test_index] + ytrain = self._gt[train_index] + ytest = self._gt[test_index] + + self._s += 1 + + return xtrain, xtest, ytrain, ytest, test_index + + +class CVG_legacy: + def __init__(self, attributes, ground_truth, n_test=2, order_dim=0): + self._order = order_dim + self._ntests = n_test + self._actual_ntest = 0 + self._size = attributes.shape[order_dim] + self._att = attributes + self._gt = ground_truth + + if attributes.shape[0] != ground_truth.shape[0] or \ + attributes.shape[1] != ground_truth.shape[1] : + raise ValueError('attributes and ground_truth must have the same 2D shape') + + def __iter__(self): + return self + + def __next__(self): + if self._actual_ntest == self._ntests: + raise StopIteration + + step = self._size / self._ntests + train_filter = (np.arange(self._size) - step * self._actual_ntest) % self._size < step + + if self._order == 0: + Xtrain = self._att[train_filter].reshape(-1, self._att.shape[2]) + Xtest = self._att[train_filter == False].reshape(-1, self._att.shape[2]) + Ytrain = self._gt[train_filter].reshape(-1) + Ytest = self._gt[train_filter == False].reshape(-1) + else: + Xtrain = self._att[:,train_filter].reshape(-1, self._att.shape[2]) + Xtest = self._att[:,train_filter == False].reshape(-1, self._att.shape[2]) + Ytrain = self._gt[:,train_filter].reshape(-1) + Ytest = self._gt[:,train_filter == False].reshape(-1) + + + self._actual_ntest += 1 + + return (Xtrain, Xtest, Ytrain, Ytest, train_filter) + +class APsCVG: + """Cross Validation Generator for Attribute Profiles Descriptors""" + def __init__(self, ground_truth, attributes, n_test=5, label_ignore=None): + self._gt = ground_truth + self._att = attributes + self._cv_count = n_test + self._actual_count = 0 + + if attributes.shape[0] != ground_truth.shape[0] or \ + attributes.shape[1] != ground_truth.shape[1] : + raise ValueError('attributes and ground_truth must have the same 2D shape') + + def __iter__(self): + return self + + def __next__(self): + if self._cv_count == self._actual_count: + raise StopIteration + + split_map = semantic_cvg(self._gt, self._cv_count, self._actual_count) + xtrain = self._att[split_map == 1].reshape(-1, self._att.shape[2]) + xtest = self._att[split_map == 2].reshape(-1, self._att.shape[2]) + ytrain = self._gt[split_map == 1].reshape(-1) + ytest = self._gt[split_map == 2].reshape(-1) + test_index = split_map == 2 + + self._actual_count += 1 + + return xtrain, xtest, ytrain, ytest, test_index + +def semantic_cvg(gt, nb_split, step=0): + count = np.unique(gt, return_counts=True) + + test_part = 1 / nb_split + + split = np.zeros_like(gt) + + for lbli, lblc in zip(count[0][1:], count[1][1:]): + treshold = int(lblc * test_part) + #print('lbli:{}, count:{}, train:{}'.format(lbli, lblc, treshold)) + f = np.nonzero(gt == lbli) + t_int, t_ext = treshold * step, treshold * (step + 1) + split[f[0], f[1]] = 1 + split[f[0][t_int:t_ext], f[1][t_int:t_ext]] = 2 + + return split diff --git a/descriptors/dfc_aps.py b/descriptors/dfc_aps.py index c632f9a..860c81c 100644 --- a/descriptors/dfc_aps.py +++ b/descriptors/dfc_aps.py @@ -5,26 +5,62 @@ import sys sys.path.append('..') import ld2dap -def run(rasters, treshold=1e4, areas=None, sd=None, moi=None): +def run(rasters, treshold=1e4, areas=None, sd=None, moi=None, split=1, split_dim=0): + """DFC Attribute Profiles + + Compute description vectors for parameters. Rasters can be splitted along + `split_dim` before description proceeds. + + """ + # Parse attribute type treshold = float(treshold) areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int) sd = None if sd is None else np.array(sd).astype(np.float) moi = None if moi is None else np.array(moi).astype(np.float) - # APs Pipelines + # Load and filter loader = ld2dap.LoadTIFF(rasters) dfc_filter = ld2dap.Treshold(treshold) - dfc_filter.input = loader - aps = ld2dap.AttributeProfiles(area=areas, sd=sd, moi=moi) - aps.input = dfc_filter - out_vectors = ld2dap.RawOutput() - out_vectors.input = aps + normalize = ld2dap.Normalize(dtype=np.uint8) + raw_out = ld2dap.RawOutput() - # Compute vectors - out_vectors.run() - - return out_vectors.data + raw_out.input = normalize + normalize.input = dfc_filter + dfc_filter.input = loader + raw_out.run() + + # Split + n = split; d = split_dim + + step = int(raw_out.data.shape[d] / n) + view = np.moveaxis(raw_out.data, d, 0) + cuts = list() + for i in range(n): + cut = np.moveaxis(view[i*step:(i+1)*step+1], 0, d) + cuts.append(cut) + + # Describe + dcuts = list() + for cut in cuts: + rinp = ld2dap.RawInput(cut, raw_out.metadata) + aps = ld2dap.AttributeProfiles(areas, sd, moi, normalize_to_dtype=False) + vout = ld2dap.RawOutput() + + vout.input = aps + aps.input = rinp + vout.run() + + dcuts.append(vout.data) + + # Merge + descriptors = np.zeros(raw_out.data.shape[:2] + (dcuts[0].shape[-1],)) + view = np.moveaxis(descriptors, d, 0) + + for i, cut in enumerate(dcuts): + view[i*step:(i+1)*step+1] = np.moveaxis(cut, 0, d) + + return descriptors def version(): - return 'v0.0' \ No newline at end of file + return 'v0.0' diff --git a/protocols/jurse.py b/protocols/jurse.py index 2c6dfbc..1f41485 100644 --- a/protocols/jurse.py +++ b/protocols/jurse.py @@ -18,94 +18,6 @@ import triskele from .protocol import Protocol, TestError -class JurseSF(Jurse): - """Second JURSE "split first" protocol for LiDAR classification with 2D maps. - - This second protocol split the data set before computing the attribute - profiles to assure the classification is unbiased. - - """ - - def __init__(self, expe): - super().__init__(expe, self.__class__.__name__) - - def _run(self): - self._log.info('Compute descriptors') - try: - descriptors = self._compute_descriptors() - except Exception: - raise TestError('Error occured during description') - self._time('description') - - self._log.info('Classify data') - try: - classification = self._compute_classification(descriptors) - except Exception: - raise TestError('Error occured during classification') - self._time('classification') - - self._log.info('Run metrics') - metrics = self._run_metrics(classification, descriptors) - self._time('metrics') - - cmap = str(self._results_base_name) + '.tif' - self._log.info('Saving classification map {}'.format(cmap)) - triskele.write(cmap, classification) - - results = OrderedDict() - results['classification'] = cmap - results['metrics'] = metrics - self._results = results - - def _compute_descriptors(self): - script = self._expe['descriptors_script'] - - desc = importlib.import_module(script['name']) - att = desc.run(**script['parameters']) - - return att - - def _compute_classification(self, descriptors): - # Ground truth - gt = self._get_ground_truth() - - # CrossVal and ML - cv = self._expe['cross_validation'] - cl = self._expe['classifier'] - - cross_val = getattr(importlib.import_module(cv['package']), cv['name']) - classifier = getattr(importlib.import_module(cl['package']), cl['name']) - - prediction = np.zeros_like(gt, dtype=np.uint8) - - for xt, xv, yt, yv, ti in cross_val(gt, descriptors, **cv['parameters']): - rfc = classifier(**cl['parameters']) - rfc.fit(xt, yt) - - ypred = rfc.predict(xv) - - prediction[ti] = ypred - - return prediction - - def _get_results(self): - return self._results - - def _run_metrics(self, classification, descriptors): - gt = self._get_ground_truth() - - f = np.nonzero(classification) - pred = classification[f].ravel() - gt = gt[f].ravel() - - results = OrderedDict() - results['dimensions'] = descriptors.shape[-1] - results['overall_accuracy'] = float(metrics.accuracy_score(gt, pred)) - results['cohen_kappa'] = float(metrics.cohen_kappa_score(gt, pred)) - - return results - - class Jurse(Protocol): """First JURSE test protocol for LiDAR classification with 2D maps. diff --git a/test_mockup.yml b/test_mockup.yml index 503caee..4f9a518 100644 --- a/test_mockup.yml +++ b/test_mockup.yml @@ -17,6 +17,7 @@ expe: descriptors_script: name: descriptors.dfc_aps parameters: + split: 5 areas: - 100 - 1000 @@ -28,10 +29,10 @@ expe: - ./Data/dfc_rasters/DEM_C123_3msr/UH17_GEG051_TR.tif treshold: 1e4 cross_validation: - name: APsCVG - package: CVGenerators + name: Split + package: cvgenerators.jurse parameters: - n_test: 2 + n_test: 5 classifier: name: RandomForestClassifier package: sklearn.ensemble From 14e1d92c68ca35927b390336c13f7fe803b2c9ac Mon Sep 17 00:00:00 2001 From: Karamaz0V1 Date: Fri, 14 Sep 2018 16:30:05 +0200 Subject: [PATCH 3/3] New DFC scripts with Split First --- descriptors/dfc_aps.py | 4 --- descriptors/dfc_daps.py | 63 +++++++++++++++++++++++++++--------- descriptors/dfc_dsdaps.py | 67 +++++++++++++++++++++++++++++---------- descriptors/dfc_sdaps.py | 60 +++++++++++++++++++++++++++-------- test_mockup.yml | 10 +++--- 5 files changed, 150 insertions(+), 54 deletions(-) diff --git a/descriptors/dfc_aps.py b/descriptors/dfc_aps.py index 860c81c..1c4de33 100644 --- a/descriptors/dfc_aps.py +++ b/descriptors/dfc_aps.py @@ -1,8 +1,4 @@ import numpy as np -import yaml - -import sys -sys.path.append('..') import ld2dap def run(rasters, treshold=1e4, areas=None, sd=None, moi=None, split=1, split_dim=0): diff --git a/descriptors/dfc_daps.py b/descriptors/dfc_daps.py index 12c59fb..2791439 100644 --- a/descriptors/dfc_daps.py +++ b/descriptors/dfc_daps.py @@ -9,33 +9,66 @@ # TODO details import numpy as np - -import sys -sys.path.append('..') import ld2dap -def run(rasters, treshold=1e4, areas=None, sd=None, moi=None): - # Parse parameters type +def run(rasters, treshold=1e4, areas=None, sd=None, moi=None, split=1, split_dim=0): + """DFC Differential Attribute Profiles + + Compute description vectors for parameters. Rasters can be splitted along + `split_dim` before description proceeds. + + """ + + # Parse attribute type treshold = float(treshold) areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int) sd = None if sd is None else np.array(sd).astype(np.float) moi = None if moi is None else np.array(moi).astype(np.float) - # Pipelines + # Load and filter loader = ld2dap.LoadTIFF(rasters) dfc_filter = ld2dap.Treshold(treshold) + normalize = ld2dap.Normalize(dtype=np.uint8) + raw_out = ld2dap.RawOutput() + + raw_out.input = normalize + normalize.input = dfc_filter dfc_filter.input = loader - aps = ld2dap.AttributeProfiles(area=areas, sd=sd, moi=moi) - aps.input = dfc_filter - differential = ld2dap.Differential() - differential.input = aps - out_vectors = ld2dap.RawOutput() - out_vectors.input = differential + raw_out.run() - # Compute vectors - out_vectors.run() + # Split + n = split; d = split_dim - return out_vectors.data + step = int(raw_out.data.shape[d] / n) + view = np.moveaxis(raw_out.data, d, 0) + cuts = list() + for i in range(n): + cut = np.moveaxis(view[i*step:(i+1)*step+1], 0, d) + cuts.append(cut) + + # Describe + dcuts = list() + for cut in cuts: + rinp = ld2dap.RawInput(cut, raw_out.metadata) + aps = ld2dap.AttributeProfiles(areas, sd, moi, normalize_to_dtype=False) + diff = ld2dap.Differential() + vout = ld2dap.RawOutput() + + vout.input = diff + diff.input = aps + aps.input = rinp + vout.run() + + dcuts.append(vout.data) + + # Merge + descriptors = np.zeros(raw_out.data.shape[:2] + (dcuts[0].shape[-1],)) + view = np.moveaxis(descriptors, d, 0) + + for i, cut in enumerate(dcuts): + view[i*step:(i+1)*step+1] = np.moveaxis(cut, 0, d) + + return descriptors def version(): return 'v0.0' diff --git a/descriptors/dfc_dsdaps.py b/descriptors/dfc_dsdaps.py index dca083d..16f335b 100644 --- a/descriptors/dfc_dsdaps.py +++ b/descriptors/dfc_dsdaps.py @@ -1,41 +1,74 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -# \file dfc_dsdaps.py +# \file dfc_daps.py # \brief TODO # \author Florent Guiotte # \version 0.1 -# \date 28 août 2018 +# \date 27 août 2018 # # TODO details import numpy as np - -import sys -sys.path.append('..') import ld2dap -def run(rasters, treshold=1e4, areas=None, sd=None, moi=None): - # Parse parameters type +def run(rasters, treshold=1e4, areas=None, sd=None, moi=None, split=1, split_dim=0): + """DFC Differential Self Dual Attribute Profiles + + Compute description vectors for parameters. Rasters can be splitted along + `split_dim` before description proceeds. + + """ + + # Parse attribute type treshold = float(treshold) areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int) sd = None if sd is None else np.array(sd).astype(np.float) moi = None if moi is None else np.array(moi).astype(np.float) - # Pipelines + # Load and filter loader = ld2dap.LoadTIFF(rasters) dfc_filter = ld2dap.Treshold(treshold) + normalize = ld2dap.Normalize(dtype=np.uint8) + raw_out = ld2dap.RawOutput() + + raw_out.input = normalize + normalize.input = dfc_filter dfc_filter.input = loader - sdaps = ld2dap.SelfDualAttributeProfiles(area=areas, sd=sd, moi=moi) - sdaps.input = dfc_filter - differential = ld2dap.Differential() - differential.input = sdaps - out_vectors = ld2dap.RawOutput() - out_vectors.input = differential + raw_out.run() - # Compute vectors - out_vectors.run() + # Split + n = split; d = split_dim - return out_vectors.data + step = int(raw_out.data.shape[d] / n) + view = np.moveaxis(raw_out.data, d, 0) + cuts = list() + for i in range(n): + cut = np.moveaxis(view[i*step:(i+1)*step+1], 0, d) + cuts.append(cut) + + # Describe + dcuts = list() + for cut in cuts: + rinp = ld2dap.RawInput(cut, raw_out.metadata) + aps = ld2dap.SelfDualAttributeProfiles(areas, sd, moi, normalize_to_dtype=False) + diff = ld2dap.Differential() + vout = ld2dap.RawOutput() + + vout.input = diff + diff.input = aps + aps.input = rinp + vout.run() + + dcuts.append(vout.data) + + # Merge + descriptors = np.zeros(raw_out.data.shape[:2] + (dcuts[0].shape[-1],)) + view = np.moveaxis(descriptors, d, 0) + + for i, cut in enumerate(dcuts): + view[i*step:(i+1)*step+1] = np.moveaxis(cut, 0, d) + + return descriptors def version(): return 'v0.0' diff --git a/descriptors/dfc_sdaps.py b/descriptors/dfc_sdaps.py index ab67dba..21f62ee 100644 --- a/descriptors/dfc_sdaps.py +++ b/descriptors/dfc_sdaps.py @@ -9,32 +9,64 @@ # TODO details import numpy as np - -import sys -sys.path.append('..') import ld2dap -def run(rasters, treshold=1e4, areas=None, sd=None, moi=None): - # Parse parameters type +def run(rasters, treshold=1e4, areas=None, sd=None, moi=None, split=1, split_dim=0): + """DFC Self Dual Attribute Profiles + + Compute description vectors for parameters. Rasters can be splitted along + `split_dim` before description proceeds. + + """ + + # Parse attribute type treshold = float(treshold) areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int) sd = None if sd is None else np.array(sd).astype(np.float) moi = None if moi is None else np.array(moi).astype(np.float) - # Pipelines + # Load and filter loader = ld2dap.LoadTIFF(rasters) dfc_filter = ld2dap.Treshold(treshold) + normalize = ld2dap.Normalize(dtype=np.uint8) + raw_out = ld2dap.RawOutput() + + raw_out.input = normalize + normalize.input = dfc_filter dfc_filter.input = loader - sdaps = ld2dap.SelfDualAttributeProfiles(area=areas, sd=sd, moi=moi) - sdaps.input = dfc_filter - out_vectors = ld2dap.RawOutput() - out_vectors.input = sdaps + raw_out.run() - # Compute vectors - out_vectors.run() + # Split + n = split; d = split_dim - return out_vectors.data + step = int(raw_out.data.shape[d] / n) + view = np.moveaxis(raw_out.data, d, 0) + cuts = list() + for i in range(n): + cut = np.moveaxis(view[i*step:(i+1)*step+1], 0, d) + cuts.append(cut) + + # Describe + dcuts = list() + for cut in cuts: + rinp = ld2dap.RawInput(cut, raw_out.metadata) + aps = ld2dap.SelfDualAttributeProfiles(areas, sd, moi, normalize_to_dtype=False) + vout = ld2dap.RawOutput() + + vout.input = aps + aps.input = rinp + vout.run() + + dcuts.append(vout.data) + + # Merge + descriptors = np.zeros(raw_out.data.shape[:2] + (dcuts[0].shape[-1],)) + view = np.moveaxis(descriptors, d, 0) + + for i, cut in enumerate(dcuts): + view[i*step:(i+1)*step+1] = np.moveaxis(cut, 0, d) + + return descriptors def version(): return 'v0.0' - diff --git a/test_mockup.yml b/test_mockup.yml index 4f9a518..b6b5a1b 100644 --- a/test_mockup.yml +++ b/test_mockup.yml @@ -15,14 +15,16 @@ expe: raster: ./Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif meta_labels: ./Data/ground_truth/jurse_meta_idx.csv descriptors_script: - name: descriptors.dfc_aps + name: descriptors.dfc_sdaps parameters: - split: 5 + split: 4 areas: - 100 - 1000 + - 1e4 moi: - 0.5 + - 0.7 - 0.9 rasters: - ./Data/dfc_rasters/DEM+B_C123/UH17_GEM051_TR.tif @@ -32,12 +34,12 @@ expe: name: Split package: cvgenerators.jurse parameters: - n_test: 5 + n_test: 4 classifier: name: RandomForestClassifier package: sklearn.ensemble parameters: min_samples_leaf: 10 - n_estimators: 10 + n_estimators: 100 n_jobs: -1 random_state: 0