Abort JurseSF, upgrade scripts and new cvgenerator

This commit is contained in:
Florent Guiotte 2018-09-13 12:17:20 +02:00
parent 80bea9972a
commit 2d6c399acd
5 changed files with 202 additions and 103 deletions

0
cvgenerators/__init__.py Normal file
View File

150
cvgenerators/jurse.py Normal file
View File

@ -0,0 +1,150 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# \file CrossValidationGenerator.py
# \brief TODO
# \author Florent Guiotte <florent.guiotte@gmail.com>
# \version 0.1
# \date 28 Mar 2018
#
# TODO details
import numpy as np
import ipdb
class Split:
"""Geographic split cross validation generator.
Split `n_test` times along given dimension. One split is for test, the
others are used in training.
If used with a split first description, make sure you use compatible
settings.
"""
def __init__(self, ground_truth, attributes, n_test=2, order_dim=0, remove_unclassified=True):
self._att = attributes
self._gt = ground_truth
self._n = n_test
self._d = order_dim
self._s = 0
self._r = remove_unclassified
self._size = ground_truth.shape[order_dim]
self._step = int(ground_truth.shape[order_dim] / n_test)
def __iter__(self):
return self
def __next__(self):
if self._s == self._n:
raise StopIteration
cfilter = (np.arange(self._size) - self._step * self._s) % self._size < self._step
test_index = np.zeros_like(self._gt, dtype=np.bool)
view = np.moveaxis(test_index, self._d, 0)
view[cfilter] = True
unclassified = self._gt == 0
train_index = ~test_index & ~unclassified
if self._r:
test_index &= ~unclassified
#ipdb.set_trace()
xtrain = self._att[train_index]
xtest = self._att[test_index]
ytrain = self._gt[train_index]
ytest = self._gt[test_index]
self._s += 1
return xtrain, xtest, ytrain, ytest, test_index
class CVG_legacy:
def __init__(self, attributes, ground_truth, n_test=2, order_dim=0):
self._order = order_dim
self._ntests = n_test
self._actual_ntest = 0
self._size = attributes.shape[order_dim]
self._att = attributes
self._gt = ground_truth
if attributes.shape[0] != ground_truth.shape[0] or \
attributes.shape[1] != ground_truth.shape[1] :
raise ValueError('attributes and ground_truth must have the same 2D shape')
def __iter__(self):
return self
def __next__(self):
if self._actual_ntest == self._ntests:
raise StopIteration
step = self._size / self._ntests
train_filter = (np.arange(self._size) - step * self._actual_ntest) % self._size < step
if self._order == 0:
Xtrain = self._att[train_filter].reshape(-1, self._att.shape[2])
Xtest = self._att[train_filter == False].reshape(-1, self._att.shape[2])
Ytrain = self._gt[train_filter].reshape(-1)
Ytest = self._gt[train_filter == False].reshape(-1)
else:
Xtrain = self._att[:,train_filter].reshape(-1, self._att.shape[2])
Xtest = self._att[:,train_filter == False].reshape(-1, self._att.shape[2])
Ytrain = self._gt[:,train_filter].reshape(-1)
Ytest = self._gt[:,train_filter == False].reshape(-1)
self._actual_ntest += 1
return (Xtrain, Xtest, Ytrain, Ytest, train_filter)
class APsCVG:
"""Cross Validation Generator for Attribute Profiles Descriptors"""
def __init__(self, ground_truth, attributes, n_test=5, label_ignore=None):
self._gt = ground_truth
self._att = attributes
self._cv_count = n_test
self._actual_count = 0
if attributes.shape[0] != ground_truth.shape[0] or \
attributes.shape[1] != ground_truth.shape[1] :
raise ValueError('attributes and ground_truth must have the same 2D shape')
def __iter__(self):
return self
def __next__(self):
if self._cv_count == self._actual_count:
raise StopIteration
split_map = semantic_cvg(self._gt, self._cv_count, self._actual_count)
xtrain = self._att[split_map == 1].reshape(-1, self._att.shape[2])
xtest = self._att[split_map == 2].reshape(-1, self._att.shape[2])
ytrain = self._gt[split_map == 1].reshape(-1)
ytest = self._gt[split_map == 2].reshape(-1)
test_index = split_map == 2
self._actual_count += 1
return xtrain, xtest, ytrain, ytest, test_index
def semantic_cvg(gt, nb_split, step=0):
count = np.unique(gt, return_counts=True)
test_part = 1 / nb_split
split = np.zeros_like(gt)
for lbli, lblc in zip(count[0][1:], count[1][1:]):
treshold = int(lblc * test_part)
#print('lbli:{}, count:{}, train:{}'.format(lbli, lblc, treshold))
f = np.nonzero(gt == lbli)
t_int, t_ext = treshold * step, treshold * (step + 1)
split[f[0], f[1]] = 1
split[f[0][t_int:t_ext], f[1][t_int:t_ext]] = 2
return split

View File

@ -5,26 +5,62 @@ import sys
sys.path.append('..')
import ld2dap
def run(rasters, treshold=1e4, areas=None, sd=None, moi=None):
def run(rasters, treshold=1e4, areas=None, sd=None, moi=None, split=1, split_dim=0):
"""DFC Attribute Profiles
Compute description vectors for parameters. Rasters can be splitted along
`split_dim` before description proceeds.
"""
# Parse attribute type
treshold = float(treshold)
areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int)
sd = None if sd is None else np.array(sd).astype(np.float)
moi = None if moi is None else np.array(moi).astype(np.float)
# APs Pipelines
# Load and filter
loader = ld2dap.LoadTIFF(rasters)
dfc_filter = ld2dap.Treshold(treshold)
dfc_filter.input = loader
aps = ld2dap.AttributeProfiles(area=areas, sd=sd, moi=moi)
aps.input = dfc_filter
out_vectors = ld2dap.RawOutput()
out_vectors.input = aps
normalize = ld2dap.Normalize(dtype=np.uint8)
raw_out = ld2dap.RawOutput()
# Compute vectors
out_vectors.run()
return out_vectors.data
raw_out.input = normalize
normalize.input = dfc_filter
dfc_filter.input = loader
raw_out.run()
# Split
n = split; d = split_dim
step = int(raw_out.data.shape[d] / n)
view = np.moveaxis(raw_out.data, d, 0)
cuts = list()
for i in range(n):
cut = np.moveaxis(view[i*step:(i+1)*step+1], 0, d)
cuts.append(cut)
# Describe
dcuts = list()
for cut in cuts:
rinp = ld2dap.RawInput(cut, raw_out.metadata)
aps = ld2dap.AttributeProfiles(areas, sd, moi, normalize_to_dtype=False)
vout = ld2dap.RawOutput()
vout.input = aps
aps.input = rinp
vout.run()
dcuts.append(vout.data)
# Merge
descriptors = np.zeros(raw_out.data.shape[:2] + (dcuts[0].shape[-1],))
view = np.moveaxis(descriptors, d, 0)
for i, cut in enumerate(dcuts):
view[i*step:(i+1)*step+1] = np.moveaxis(cut, 0, d)
return descriptors
def version():
return 'v0.0'
return 'v0.0'

View File

@ -18,94 +18,6 @@ import triskele
from .protocol import Protocol, TestError
class JurseSF(Jurse):
"""Second JURSE "split first" protocol for LiDAR classification with 2D maps.
This second protocol split the data set before computing the attribute
profiles to assure the classification is unbiased.
"""
def __init__(self, expe):
super().__init__(expe, self.__class__.__name__)
def _run(self):
self._log.info('Compute descriptors')
try:
descriptors = self._compute_descriptors()
except Exception:
raise TestError('Error occured during description')
self._time('description')
self._log.info('Classify data')
try:
classification = self._compute_classification(descriptors)
except Exception:
raise TestError('Error occured during classification')
self._time('classification')
self._log.info('Run metrics')
metrics = self._run_metrics(classification, descriptors)
self._time('metrics')
cmap = str(self._results_base_name) + '.tif'
self._log.info('Saving classification map {}'.format(cmap))
triskele.write(cmap, classification)
results = OrderedDict()
results['classification'] = cmap
results['metrics'] = metrics
self._results = results
def _compute_descriptors(self):
script = self._expe['descriptors_script']
desc = importlib.import_module(script['name'])
att = desc.run(**script['parameters'])
return att
def _compute_classification(self, descriptors):
# Ground truth
gt = self._get_ground_truth()
# CrossVal and ML
cv = self._expe['cross_validation']
cl = self._expe['classifier']
cross_val = getattr(importlib.import_module(cv['package']), cv['name'])
classifier = getattr(importlib.import_module(cl['package']), cl['name'])
prediction = np.zeros_like(gt, dtype=np.uint8)
for xt, xv, yt, yv, ti in cross_val(gt, descriptors, **cv['parameters']):
rfc = classifier(**cl['parameters'])
rfc.fit(xt, yt)
ypred = rfc.predict(xv)
prediction[ti] = ypred
return prediction
def _get_results(self):
return self._results
def _run_metrics(self, classification, descriptors):
gt = self._get_ground_truth()
f = np.nonzero(classification)
pred = classification[f].ravel()
gt = gt[f].ravel()
results = OrderedDict()
results['dimensions'] = descriptors.shape[-1]
results['overall_accuracy'] = float(metrics.accuracy_score(gt, pred))
results['cohen_kappa'] = float(metrics.cohen_kappa_score(gt, pred))
return results
class Jurse(Protocol):
"""First JURSE test protocol for LiDAR classification with 2D maps.

View File

@ -17,6 +17,7 @@ expe:
descriptors_script:
name: descriptors.dfc_aps
parameters:
split: 5
areas:
- 100
- 1000
@ -28,10 +29,10 @@ expe:
- ./Data/dfc_rasters/DEM_C123_3msr/UH17_GEG051_TR.tif
treshold: 1e4
cross_validation:
name: APsCVG
package: CVGenerators
name: Split
package: cvgenerators.jurse
parameters:
n_test: 2
n_test: 5
classifier:
name: RandomForestClassifier
package: sklearn.ensemble