diff --git a/cvgenerators/__init__.py b/cvgenerators/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cvgenerators/jurse.py b/cvgenerators/jurse.py new file mode 100644 index 0000000..1f6948e --- /dev/null +++ b/cvgenerators/jurse.py @@ -0,0 +1,150 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# \file CrossValidationGenerator.py +# \brief TODO +# \author Florent Guiotte +# \version 0.1 +# \date 28 Mar 2018 +# +# TODO details + +import numpy as np +import ipdb + +class Split: + """Geographic split cross validation generator. + + Split `n_test` times along given dimension. One split is for test, the + others are used in training. + + If used with a split first description, make sure you use compatible + settings. + + """ + + def __init__(self, ground_truth, attributes, n_test=2, order_dim=0, remove_unclassified=True): + self._att = attributes + self._gt = ground_truth + self._n = n_test + self._d = order_dim + self._s = 0 + self._r = remove_unclassified + + self._size = ground_truth.shape[order_dim] + self._step = int(ground_truth.shape[order_dim] / n_test) + + def __iter__(self): + return self + + def __next__(self): + if self._s == self._n: + raise StopIteration + + cfilter = (np.arange(self._size) - self._step * self._s) % self._size < self._step + + test_index = np.zeros_like(self._gt, dtype=np.bool) + view = np.moveaxis(test_index, self._d, 0) + view[cfilter] = True + + unclassified = self._gt == 0 + train_index = ~test_index & ~unclassified + + if self._r: + test_index &= ~unclassified + + #ipdb.set_trace() + xtrain = self._att[train_index] + xtest = self._att[test_index] + ytrain = self._gt[train_index] + ytest = self._gt[test_index] + + self._s += 1 + + return xtrain, xtest, ytrain, ytest, test_index + + +class CVG_legacy: + def __init__(self, attributes, ground_truth, n_test=2, order_dim=0): + self._order = order_dim + self._ntests = n_test + self._actual_ntest = 0 + self._size = attributes.shape[order_dim] + self._att = attributes + self._gt = ground_truth + + if attributes.shape[0] != ground_truth.shape[0] or \ + attributes.shape[1] != ground_truth.shape[1] : + raise ValueError('attributes and ground_truth must have the same 2D shape') + + def __iter__(self): + return self + + def __next__(self): + if self._actual_ntest == self._ntests: + raise StopIteration + + step = self._size / self._ntests + train_filter = (np.arange(self._size) - step * self._actual_ntest) % self._size < step + + if self._order == 0: + Xtrain = self._att[train_filter].reshape(-1, self._att.shape[2]) + Xtest = self._att[train_filter == False].reshape(-1, self._att.shape[2]) + Ytrain = self._gt[train_filter].reshape(-1) + Ytest = self._gt[train_filter == False].reshape(-1) + else: + Xtrain = self._att[:,train_filter].reshape(-1, self._att.shape[2]) + Xtest = self._att[:,train_filter == False].reshape(-1, self._att.shape[2]) + Ytrain = self._gt[:,train_filter].reshape(-1) + Ytest = self._gt[:,train_filter == False].reshape(-1) + + + self._actual_ntest += 1 + + return (Xtrain, Xtest, Ytrain, Ytest, train_filter) + +class APsCVG: + """Cross Validation Generator for Attribute Profiles Descriptors""" + def __init__(self, ground_truth, attributes, n_test=5, label_ignore=None): + self._gt = ground_truth + self._att = attributes + self._cv_count = n_test + self._actual_count = 0 + + if attributes.shape[0] != ground_truth.shape[0] or \ + attributes.shape[1] != ground_truth.shape[1] : + raise ValueError('attributes and ground_truth must have the same 2D shape') + + def __iter__(self): + return self + + def __next__(self): + if self._cv_count == self._actual_count: + raise StopIteration + + split_map = semantic_cvg(self._gt, self._cv_count, self._actual_count) + xtrain = self._att[split_map == 1].reshape(-1, self._att.shape[2]) + xtest = self._att[split_map == 2].reshape(-1, self._att.shape[2]) + ytrain = self._gt[split_map == 1].reshape(-1) + ytest = self._gt[split_map == 2].reshape(-1) + test_index = split_map == 2 + + self._actual_count += 1 + + return xtrain, xtest, ytrain, ytest, test_index + +def semantic_cvg(gt, nb_split, step=0): + count = np.unique(gt, return_counts=True) + + test_part = 1 / nb_split + + split = np.zeros_like(gt) + + for lbli, lblc in zip(count[0][1:], count[1][1:]): + treshold = int(lblc * test_part) + #print('lbli:{}, count:{}, train:{}'.format(lbli, lblc, treshold)) + f = np.nonzero(gt == lbli) + t_int, t_ext = treshold * step, treshold * (step + 1) + split[f[0], f[1]] = 1 + split[f[0][t_int:t_ext], f[1][t_int:t_ext]] = 2 + + return split diff --git a/descriptors/dfc_aps.py b/descriptors/dfc_aps.py index c632f9a..1c4de33 100644 --- a/descriptors/dfc_aps.py +++ b/descriptors/dfc_aps.py @@ -1,30 +1,62 @@ import numpy as np -import yaml - -import sys -sys.path.append('..') import ld2dap -def run(rasters, treshold=1e4, areas=None, sd=None, moi=None): +def run(rasters, treshold=1e4, areas=None, sd=None, moi=None, split=1, split_dim=0): + """DFC Attribute Profiles + + Compute description vectors for parameters. Rasters can be splitted along + `split_dim` before description proceeds. + + """ + # Parse attribute type treshold = float(treshold) areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int) sd = None if sd is None else np.array(sd).astype(np.float) moi = None if moi is None else np.array(moi).astype(np.float) - # APs Pipelines + # Load and filter loader = ld2dap.LoadTIFF(rasters) dfc_filter = ld2dap.Treshold(treshold) - dfc_filter.input = loader - aps = ld2dap.AttributeProfiles(area=areas, sd=sd, moi=moi) - aps.input = dfc_filter - out_vectors = ld2dap.RawOutput() - out_vectors.input = aps + normalize = ld2dap.Normalize(dtype=np.uint8) + raw_out = ld2dap.RawOutput() - # Compute vectors - out_vectors.run() - - return out_vectors.data + raw_out.input = normalize + normalize.input = dfc_filter + dfc_filter.input = loader + raw_out.run() + + # Split + n = split; d = split_dim + + step = int(raw_out.data.shape[d] / n) + view = np.moveaxis(raw_out.data, d, 0) + cuts = list() + for i in range(n): + cut = np.moveaxis(view[i*step:(i+1)*step+1], 0, d) + cuts.append(cut) + + # Describe + dcuts = list() + for cut in cuts: + rinp = ld2dap.RawInput(cut, raw_out.metadata) + aps = ld2dap.AttributeProfiles(areas, sd, moi, normalize_to_dtype=False) + vout = ld2dap.RawOutput() + + vout.input = aps + aps.input = rinp + vout.run() + + dcuts.append(vout.data) + + # Merge + descriptors = np.zeros(raw_out.data.shape[:2] + (dcuts[0].shape[-1],)) + view = np.moveaxis(descriptors, d, 0) + + for i, cut in enumerate(dcuts): + view[i*step:(i+1)*step+1] = np.moveaxis(cut, 0, d) + + return descriptors def version(): - return 'v0.0' \ No newline at end of file + return 'v0.0' diff --git a/descriptors/dfc_daps.py b/descriptors/dfc_daps.py index 12c59fb..2791439 100644 --- a/descriptors/dfc_daps.py +++ b/descriptors/dfc_daps.py @@ -9,33 +9,66 @@ # TODO details import numpy as np - -import sys -sys.path.append('..') import ld2dap -def run(rasters, treshold=1e4, areas=None, sd=None, moi=None): - # Parse parameters type +def run(rasters, treshold=1e4, areas=None, sd=None, moi=None, split=1, split_dim=0): + """DFC Differential Attribute Profiles + + Compute description vectors for parameters. Rasters can be splitted along + `split_dim` before description proceeds. + + """ + + # Parse attribute type treshold = float(treshold) areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int) sd = None if sd is None else np.array(sd).astype(np.float) moi = None if moi is None else np.array(moi).astype(np.float) - # Pipelines + # Load and filter loader = ld2dap.LoadTIFF(rasters) dfc_filter = ld2dap.Treshold(treshold) + normalize = ld2dap.Normalize(dtype=np.uint8) + raw_out = ld2dap.RawOutput() + + raw_out.input = normalize + normalize.input = dfc_filter dfc_filter.input = loader - aps = ld2dap.AttributeProfiles(area=areas, sd=sd, moi=moi) - aps.input = dfc_filter - differential = ld2dap.Differential() - differential.input = aps - out_vectors = ld2dap.RawOutput() - out_vectors.input = differential + raw_out.run() - # Compute vectors - out_vectors.run() + # Split + n = split; d = split_dim - return out_vectors.data + step = int(raw_out.data.shape[d] / n) + view = np.moveaxis(raw_out.data, d, 0) + cuts = list() + for i in range(n): + cut = np.moveaxis(view[i*step:(i+1)*step+1], 0, d) + cuts.append(cut) + + # Describe + dcuts = list() + for cut in cuts: + rinp = ld2dap.RawInput(cut, raw_out.metadata) + aps = ld2dap.AttributeProfiles(areas, sd, moi, normalize_to_dtype=False) + diff = ld2dap.Differential() + vout = ld2dap.RawOutput() + + vout.input = diff + diff.input = aps + aps.input = rinp + vout.run() + + dcuts.append(vout.data) + + # Merge + descriptors = np.zeros(raw_out.data.shape[:2] + (dcuts[0].shape[-1],)) + view = np.moveaxis(descriptors, d, 0) + + for i, cut in enumerate(dcuts): + view[i*step:(i+1)*step+1] = np.moveaxis(cut, 0, d) + + return descriptors def version(): return 'v0.0' diff --git a/descriptors/dfc_dsdaps.py b/descriptors/dfc_dsdaps.py index dca083d..16f335b 100644 --- a/descriptors/dfc_dsdaps.py +++ b/descriptors/dfc_dsdaps.py @@ -1,41 +1,74 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -# \file dfc_dsdaps.py +# \file dfc_daps.py # \brief TODO # \author Florent Guiotte # \version 0.1 -# \date 28 août 2018 +# \date 27 août 2018 # # TODO details import numpy as np - -import sys -sys.path.append('..') import ld2dap -def run(rasters, treshold=1e4, areas=None, sd=None, moi=None): - # Parse parameters type +def run(rasters, treshold=1e4, areas=None, sd=None, moi=None, split=1, split_dim=0): + """DFC Differential Self Dual Attribute Profiles + + Compute description vectors for parameters. Rasters can be splitted along + `split_dim` before description proceeds. + + """ + + # Parse attribute type treshold = float(treshold) areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int) sd = None if sd is None else np.array(sd).astype(np.float) moi = None if moi is None else np.array(moi).astype(np.float) - # Pipelines + # Load and filter loader = ld2dap.LoadTIFF(rasters) dfc_filter = ld2dap.Treshold(treshold) + normalize = ld2dap.Normalize(dtype=np.uint8) + raw_out = ld2dap.RawOutput() + + raw_out.input = normalize + normalize.input = dfc_filter dfc_filter.input = loader - sdaps = ld2dap.SelfDualAttributeProfiles(area=areas, sd=sd, moi=moi) - sdaps.input = dfc_filter - differential = ld2dap.Differential() - differential.input = sdaps - out_vectors = ld2dap.RawOutput() - out_vectors.input = differential + raw_out.run() - # Compute vectors - out_vectors.run() + # Split + n = split; d = split_dim - return out_vectors.data + step = int(raw_out.data.shape[d] / n) + view = np.moveaxis(raw_out.data, d, 0) + cuts = list() + for i in range(n): + cut = np.moveaxis(view[i*step:(i+1)*step+1], 0, d) + cuts.append(cut) + + # Describe + dcuts = list() + for cut in cuts: + rinp = ld2dap.RawInput(cut, raw_out.metadata) + aps = ld2dap.SelfDualAttributeProfiles(areas, sd, moi, normalize_to_dtype=False) + diff = ld2dap.Differential() + vout = ld2dap.RawOutput() + + vout.input = diff + diff.input = aps + aps.input = rinp + vout.run() + + dcuts.append(vout.data) + + # Merge + descriptors = np.zeros(raw_out.data.shape[:2] + (dcuts[0].shape[-1],)) + view = np.moveaxis(descriptors, d, 0) + + for i, cut in enumerate(dcuts): + view[i*step:(i+1)*step+1] = np.moveaxis(cut, 0, d) + + return descriptors def version(): return 'v0.0' diff --git a/descriptors/dfc_sdaps.py b/descriptors/dfc_sdaps.py index ab67dba..21f62ee 100644 --- a/descriptors/dfc_sdaps.py +++ b/descriptors/dfc_sdaps.py @@ -9,32 +9,64 @@ # TODO details import numpy as np - -import sys -sys.path.append('..') import ld2dap -def run(rasters, treshold=1e4, areas=None, sd=None, moi=None): - # Parse parameters type +def run(rasters, treshold=1e4, areas=None, sd=None, moi=None, split=1, split_dim=0): + """DFC Self Dual Attribute Profiles + + Compute description vectors for parameters. Rasters can be splitted along + `split_dim` before description proceeds. + + """ + + # Parse attribute type treshold = float(treshold) areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int) sd = None if sd is None else np.array(sd).astype(np.float) moi = None if moi is None else np.array(moi).astype(np.float) - # Pipelines + # Load and filter loader = ld2dap.LoadTIFF(rasters) dfc_filter = ld2dap.Treshold(treshold) + normalize = ld2dap.Normalize(dtype=np.uint8) + raw_out = ld2dap.RawOutput() + + raw_out.input = normalize + normalize.input = dfc_filter dfc_filter.input = loader - sdaps = ld2dap.SelfDualAttributeProfiles(area=areas, sd=sd, moi=moi) - sdaps.input = dfc_filter - out_vectors = ld2dap.RawOutput() - out_vectors.input = sdaps + raw_out.run() - # Compute vectors - out_vectors.run() + # Split + n = split; d = split_dim - return out_vectors.data + step = int(raw_out.data.shape[d] / n) + view = np.moveaxis(raw_out.data, d, 0) + cuts = list() + for i in range(n): + cut = np.moveaxis(view[i*step:(i+1)*step+1], 0, d) + cuts.append(cut) + + # Describe + dcuts = list() + for cut in cuts: + rinp = ld2dap.RawInput(cut, raw_out.metadata) + aps = ld2dap.SelfDualAttributeProfiles(areas, sd, moi, normalize_to_dtype=False) + vout = ld2dap.RawOutput() + + vout.input = aps + aps.input = rinp + vout.run() + + dcuts.append(vout.data) + + # Merge + descriptors = np.zeros(raw_out.data.shape[:2] + (dcuts[0].shape[-1],)) + view = np.moveaxis(descriptors, d, 0) + + for i, cut in enumerate(dcuts): + view[i*step:(i+1)*step+1] = np.moveaxis(cut, 0, d) + + return descriptors def version(): return 'v0.0' - diff --git a/test_mockup.yml b/test_mockup.yml index 503caee..b6b5a1b 100644 --- a/test_mockup.yml +++ b/test_mockup.yml @@ -15,28 +15,31 @@ expe: raster: ./Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif meta_labels: ./Data/ground_truth/jurse_meta_idx.csv descriptors_script: - name: descriptors.dfc_aps + name: descriptors.dfc_sdaps parameters: + split: 4 areas: - 100 - 1000 + - 1e4 moi: - 0.5 + - 0.7 - 0.9 rasters: - ./Data/dfc_rasters/DEM+B_C123/UH17_GEM051_TR.tif - ./Data/dfc_rasters/DEM_C123_3msr/UH17_GEG051_TR.tif treshold: 1e4 cross_validation: - name: APsCVG - package: CVGenerators + name: Split + package: cvgenerators.jurse parameters: - n_test: 2 + n_test: 4 classifier: name: RandomForestClassifier package: sklearn.ensemble parameters: min_samples_leaf: 10 - n_estimators: 10 + n_estimators: 100 n_jobs: -1 random_state: 0