Abort JurseSF, upgrade scripts and new cvgenerator
This commit is contained in:
parent
80bea9972a
commit
2d6c399acd
0
cvgenerators/__init__.py
Normal file
0
cvgenerators/__init__.py
Normal file
150
cvgenerators/jurse.py
Normal file
150
cvgenerators/jurse.py
Normal file
@ -0,0 +1,150 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
# \file CrossValidationGenerator.py
|
||||
# \brief TODO
|
||||
# \author Florent Guiotte <florent.guiotte@gmail.com>
|
||||
# \version 0.1
|
||||
# \date 28 Mar 2018
|
||||
#
|
||||
# TODO details
|
||||
|
||||
import numpy as np
|
||||
import ipdb
|
||||
|
||||
class Split:
|
||||
"""Geographic split cross validation generator.
|
||||
|
||||
Split `n_test` times along given dimension. One split is for test, the
|
||||
others are used in training.
|
||||
|
||||
If used with a split first description, make sure you use compatible
|
||||
settings.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, ground_truth, attributes, n_test=2, order_dim=0, remove_unclassified=True):
|
||||
self._att = attributes
|
||||
self._gt = ground_truth
|
||||
self._n = n_test
|
||||
self._d = order_dim
|
||||
self._s = 0
|
||||
self._r = remove_unclassified
|
||||
|
||||
self._size = ground_truth.shape[order_dim]
|
||||
self._step = int(ground_truth.shape[order_dim] / n_test)
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
if self._s == self._n:
|
||||
raise StopIteration
|
||||
|
||||
cfilter = (np.arange(self._size) - self._step * self._s) % self._size < self._step
|
||||
|
||||
test_index = np.zeros_like(self._gt, dtype=np.bool)
|
||||
view = np.moveaxis(test_index, self._d, 0)
|
||||
view[cfilter] = True
|
||||
|
||||
unclassified = self._gt == 0
|
||||
train_index = ~test_index & ~unclassified
|
||||
|
||||
if self._r:
|
||||
test_index &= ~unclassified
|
||||
|
||||
#ipdb.set_trace()
|
||||
xtrain = self._att[train_index]
|
||||
xtest = self._att[test_index]
|
||||
ytrain = self._gt[train_index]
|
||||
ytest = self._gt[test_index]
|
||||
|
||||
self._s += 1
|
||||
|
||||
return xtrain, xtest, ytrain, ytest, test_index
|
||||
|
||||
|
||||
class CVG_legacy:
|
||||
def __init__(self, attributes, ground_truth, n_test=2, order_dim=0):
|
||||
self._order = order_dim
|
||||
self._ntests = n_test
|
||||
self._actual_ntest = 0
|
||||
self._size = attributes.shape[order_dim]
|
||||
self._att = attributes
|
||||
self._gt = ground_truth
|
||||
|
||||
if attributes.shape[0] != ground_truth.shape[0] or \
|
||||
attributes.shape[1] != ground_truth.shape[1] :
|
||||
raise ValueError('attributes and ground_truth must have the same 2D shape')
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
if self._actual_ntest == self._ntests:
|
||||
raise StopIteration
|
||||
|
||||
step = self._size / self._ntests
|
||||
train_filter = (np.arange(self._size) - step * self._actual_ntest) % self._size < step
|
||||
|
||||
if self._order == 0:
|
||||
Xtrain = self._att[train_filter].reshape(-1, self._att.shape[2])
|
||||
Xtest = self._att[train_filter == False].reshape(-1, self._att.shape[2])
|
||||
Ytrain = self._gt[train_filter].reshape(-1)
|
||||
Ytest = self._gt[train_filter == False].reshape(-1)
|
||||
else:
|
||||
Xtrain = self._att[:,train_filter].reshape(-1, self._att.shape[2])
|
||||
Xtest = self._att[:,train_filter == False].reshape(-1, self._att.shape[2])
|
||||
Ytrain = self._gt[:,train_filter].reshape(-1)
|
||||
Ytest = self._gt[:,train_filter == False].reshape(-1)
|
||||
|
||||
|
||||
self._actual_ntest += 1
|
||||
|
||||
return (Xtrain, Xtest, Ytrain, Ytest, train_filter)
|
||||
|
||||
class APsCVG:
|
||||
"""Cross Validation Generator for Attribute Profiles Descriptors"""
|
||||
def __init__(self, ground_truth, attributes, n_test=5, label_ignore=None):
|
||||
self._gt = ground_truth
|
||||
self._att = attributes
|
||||
self._cv_count = n_test
|
||||
self._actual_count = 0
|
||||
|
||||
if attributes.shape[0] != ground_truth.shape[0] or \
|
||||
attributes.shape[1] != ground_truth.shape[1] :
|
||||
raise ValueError('attributes and ground_truth must have the same 2D shape')
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
if self._cv_count == self._actual_count:
|
||||
raise StopIteration
|
||||
|
||||
split_map = semantic_cvg(self._gt, self._cv_count, self._actual_count)
|
||||
xtrain = self._att[split_map == 1].reshape(-1, self._att.shape[2])
|
||||
xtest = self._att[split_map == 2].reshape(-1, self._att.shape[2])
|
||||
ytrain = self._gt[split_map == 1].reshape(-1)
|
||||
ytest = self._gt[split_map == 2].reshape(-1)
|
||||
test_index = split_map == 2
|
||||
|
||||
self._actual_count += 1
|
||||
|
||||
return xtrain, xtest, ytrain, ytest, test_index
|
||||
|
||||
def semantic_cvg(gt, nb_split, step=0):
|
||||
count = np.unique(gt, return_counts=True)
|
||||
|
||||
test_part = 1 / nb_split
|
||||
|
||||
split = np.zeros_like(gt)
|
||||
|
||||
for lbli, lblc in zip(count[0][1:], count[1][1:]):
|
||||
treshold = int(lblc * test_part)
|
||||
#print('lbli:{}, count:{}, train:{}'.format(lbli, lblc, treshold))
|
||||
f = np.nonzero(gt == lbli)
|
||||
t_int, t_ext = treshold * step, treshold * (step + 1)
|
||||
split[f[0], f[1]] = 1
|
||||
split[f[0][t_int:t_ext], f[1][t_int:t_ext]] = 2
|
||||
|
||||
return split
|
||||
@ -5,26 +5,62 @@ import sys
|
||||
sys.path.append('..')
|
||||
import ld2dap
|
||||
|
||||
def run(rasters, treshold=1e4, areas=None, sd=None, moi=None):
|
||||
def run(rasters, treshold=1e4, areas=None, sd=None, moi=None, split=1, split_dim=0):
|
||||
"""DFC Attribute Profiles
|
||||
|
||||
Compute description vectors for parameters. Rasters can be splitted along
|
||||
`split_dim` before description proceeds.
|
||||
|
||||
"""
|
||||
|
||||
# Parse attribute type
|
||||
treshold = float(treshold)
|
||||
areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int)
|
||||
sd = None if sd is None else np.array(sd).astype(np.float)
|
||||
moi = None if moi is None else np.array(moi).astype(np.float)
|
||||
|
||||
# APs Pipelines
|
||||
# Load and filter
|
||||
loader = ld2dap.LoadTIFF(rasters)
|
||||
dfc_filter = ld2dap.Treshold(treshold)
|
||||
dfc_filter.input = loader
|
||||
aps = ld2dap.AttributeProfiles(area=areas, sd=sd, moi=moi)
|
||||
aps.input = dfc_filter
|
||||
out_vectors = ld2dap.RawOutput()
|
||||
out_vectors.input = aps
|
||||
normalize = ld2dap.Normalize(dtype=np.uint8)
|
||||
raw_out = ld2dap.RawOutput()
|
||||
|
||||
# Compute vectors
|
||||
out_vectors.run()
|
||||
|
||||
return out_vectors.data
|
||||
raw_out.input = normalize
|
||||
normalize.input = dfc_filter
|
||||
dfc_filter.input = loader
|
||||
raw_out.run()
|
||||
|
||||
# Split
|
||||
n = split; d = split_dim
|
||||
|
||||
step = int(raw_out.data.shape[d] / n)
|
||||
view = np.moveaxis(raw_out.data, d, 0)
|
||||
cuts = list()
|
||||
for i in range(n):
|
||||
cut = np.moveaxis(view[i*step:(i+1)*step+1], 0, d)
|
||||
cuts.append(cut)
|
||||
|
||||
# Describe
|
||||
dcuts = list()
|
||||
for cut in cuts:
|
||||
rinp = ld2dap.RawInput(cut, raw_out.metadata)
|
||||
aps = ld2dap.AttributeProfiles(areas, sd, moi, normalize_to_dtype=False)
|
||||
vout = ld2dap.RawOutput()
|
||||
|
||||
vout.input = aps
|
||||
aps.input = rinp
|
||||
vout.run()
|
||||
|
||||
dcuts.append(vout.data)
|
||||
|
||||
# Merge
|
||||
descriptors = np.zeros(raw_out.data.shape[:2] + (dcuts[0].shape[-1],))
|
||||
view = np.moveaxis(descriptors, d, 0)
|
||||
|
||||
for i, cut in enumerate(dcuts):
|
||||
view[i*step:(i+1)*step+1] = np.moveaxis(cut, 0, d)
|
||||
|
||||
return descriptors
|
||||
|
||||
def version():
|
||||
return 'v0.0'
|
||||
return 'v0.0'
|
||||
|
||||
@ -18,94 +18,6 @@ import triskele
|
||||
from .protocol import Protocol, TestError
|
||||
|
||||
|
||||
class JurseSF(Jurse):
|
||||
"""Second JURSE "split first" protocol for LiDAR classification with 2D maps.
|
||||
|
||||
This second protocol split the data set before computing the attribute
|
||||
profiles to assure the classification is unbiased.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, expe):
|
||||
super().__init__(expe, self.__class__.__name__)
|
||||
|
||||
def _run(self):
|
||||
self._log.info('Compute descriptors')
|
||||
try:
|
||||
descriptors = self._compute_descriptors()
|
||||
except Exception:
|
||||
raise TestError('Error occured during description')
|
||||
self._time('description')
|
||||
|
||||
self._log.info('Classify data')
|
||||
try:
|
||||
classification = self._compute_classification(descriptors)
|
||||
except Exception:
|
||||
raise TestError('Error occured during classification')
|
||||
self._time('classification')
|
||||
|
||||
self._log.info('Run metrics')
|
||||
metrics = self._run_metrics(classification, descriptors)
|
||||
self._time('metrics')
|
||||
|
||||
cmap = str(self._results_base_name) + '.tif'
|
||||
self._log.info('Saving classification map {}'.format(cmap))
|
||||
triskele.write(cmap, classification)
|
||||
|
||||
results = OrderedDict()
|
||||
results['classification'] = cmap
|
||||
results['metrics'] = metrics
|
||||
self._results = results
|
||||
|
||||
def _compute_descriptors(self):
|
||||
script = self._expe['descriptors_script']
|
||||
|
||||
desc = importlib.import_module(script['name'])
|
||||
att = desc.run(**script['parameters'])
|
||||
|
||||
return att
|
||||
|
||||
def _compute_classification(self, descriptors):
|
||||
# Ground truth
|
||||
gt = self._get_ground_truth()
|
||||
|
||||
# CrossVal and ML
|
||||
cv = self._expe['cross_validation']
|
||||
cl = self._expe['classifier']
|
||||
|
||||
cross_val = getattr(importlib.import_module(cv['package']), cv['name'])
|
||||
classifier = getattr(importlib.import_module(cl['package']), cl['name'])
|
||||
|
||||
prediction = np.zeros_like(gt, dtype=np.uint8)
|
||||
|
||||
for xt, xv, yt, yv, ti in cross_val(gt, descriptors, **cv['parameters']):
|
||||
rfc = classifier(**cl['parameters'])
|
||||
rfc.fit(xt, yt)
|
||||
|
||||
ypred = rfc.predict(xv)
|
||||
|
||||
prediction[ti] = ypred
|
||||
|
||||
return prediction
|
||||
|
||||
def _get_results(self):
|
||||
return self._results
|
||||
|
||||
def _run_metrics(self, classification, descriptors):
|
||||
gt = self._get_ground_truth()
|
||||
|
||||
f = np.nonzero(classification)
|
||||
pred = classification[f].ravel()
|
||||
gt = gt[f].ravel()
|
||||
|
||||
results = OrderedDict()
|
||||
results['dimensions'] = descriptors.shape[-1]
|
||||
results['overall_accuracy'] = float(metrics.accuracy_score(gt, pred))
|
||||
results['cohen_kappa'] = float(metrics.cohen_kappa_score(gt, pred))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
class Jurse(Protocol):
|
||||
"""First JURSE test protocol for LiDAR classification with 2D maps.
|
||||
|
||||
|
||||
@ -17,6 +17,7 @@ expe:
|
||||
descriptors_script:
|
||||
name: descriptors.dfc_aps
|
||||
parameters:
|
||||
split: 5
|
||||
areas:
|
||||
- 100
|
||||
- 1000
|
||||
@ -28,10 +29,10 @@ expe:
|
||||
- ./Data/dfc_rasters/DEM_C123_3msr/UH17_GEG051_TR.tif
|
||||
treshold: 1e4
|
||||
cross_validation:
|
||||
name: APsCVG
|
||||
package: CVGenerators
|
||||
name: Split
|
||||
package: cvgenerators.jurse
|
||||
parameters:
|
||||
n_test: 2
|
||||
n_test: 5
|
||||
classifier:
|
||||
name: RandomForestClassifier
|
||||
package: sklearn.ensemble
|
||||
|
||||
Loading…
Reference in New Issue
Block a user