From 2d6c399acd3ea4f4ccce09f7dc68a5c4b9cf3c77 Mon Sep 17 00:00:00 2001
From: Karamaz0V1 <florent.guiotte@gmail.com>
Date: Thu, 13 Sep 2018 12:17:20 +0200
Subject: [PATCH] Abort JurseSF, upgrade scripts and new cvgenerator

---
 cvgenerators/__init__.py |   0
 cvgenerators/jurse.py    | 150 +++++++++++++++++++++++++++++++++++++++
 descriptors/dfc_aps.py   |  60 ++++++++++++----
 protocols/jurse.py       |  88 -----------------------
 test_mockup.yml          |   7 +-
 5 files changed, 202 insertions(+), 103 deletions(-)
 create mode 100644 cvgenerators/__init__.py
 create mode 100644 cvgenerators/jurse.py

diff --git a/cvgenerators/__init__.py b/cvgenerators/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/cvgenerators/jurse.py b/cvgenerators/jurse.py
new file mode 100644
index 0000000..1f6948e
--- /dev/null
+++ b/cvgenerators/jurse.py
@@ -0,0 +1,150 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# \file CrossValidationGenerator.py
+# \brief TODO
+# \author Florent Guiotte <florent.guiotte@gmail.com>
+# \version 0.1
+# \date 28 Mar 2018
+#
+# TODO details
+
+import numpy as np
+import ipdb
+
+class Split:
+    """Geographic split cross validation generator.
+
+    Split `n_test` times along given dimension. One split is for test, the
+    others are used in training. 
+
+    If used with a split first description, make sure you use compatible
+    settings.
+
+    """
+
+    def __init__(self, ground_truth, attributes, n_test=2, order_dim=0, remove_unclassified=True):
+        self._att = attributes
+        self._gt  = ground_truth
+        self._n   = n_test
+        self._d   = order_dim
+        self._s   = 0
+        self._r   = remove_unclassified
+
+        self._size = ground_truth.shape[order_dim]
+        self._step = int(ground_truth.shape[order_dim] / n_test)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self._s == self._n:
+            raise StopIteration
+        
+        cfilter = (np.arange(self._size) - self._step * self._s) % self._size < self._step
+
+        test_index = np.zeros_like(self._gt, dtype=np.bool)
+        view = np.moveaxis(test_index, self._d, 0)
+        view[cfilter] = True 
+
+        unclassified = self._gt == 0
+        train_index = ~test_index & ~unclassified
+
+        if self._r:
+            test_index &= ~unclassified
+
+        #ipdb.set_trace()
+        xtrain = self._att[train_index]
+        xtest  = self._att[test_index]
+        ytrain = self._gt[train_index]
+        ytest  = self._gt[test_index]
+
+        self._s += 1
+
+        return xtrain, xtest, ytrain, ytest, test_index
+
+
+class CVG_legacy:
+    def __init__(self, attributes, ground_truth, n_test=2, order_dim=0):
+        self._order        = order_dim
+        self._ntests       = n_test
+        self._actual_ntest = 0
+        self._size         = attributes.shape[order_dim]
+        self._att          = attributes
+        self._gt           = ground_truth
+        
+        if attributes.shape[0] != ground_truth.shape[0] or \
+           attributes.shape[1] != ground_truth.shape[1] :
+                raise ValueError('attributes and ground_truth must have the same 2D shape')
+        
+    def __iter__(self):
+        return self
+    
+    def __next__(self):
+        if self._actual_ntest == self._ntests:
+            raise StopIteration
+        
+        step = self._size / self._ntests
+        train_filter = (np.arange(self._size) - step * self._actual_ntest) % self._size < step
+        
+        if self._order == 0:
+            Xtrain = self._att[train_filter].reshape(-1, self._att.shape[2])
+            Xtest  = self._att[train_filter == False].reshape(-1, self._att.shape[2])
+            Ytrain = self._gt[train_filter].reshape(-1)
+            Ytest  = self._gt[train_filter == False].reshape(-1)
+        else:
+            Xtrain = self._att[:,train_filter].reshape(-1, self._att.shape[2])
+            Xtest  = self._att[:,train_filter == False].reshape(-1, self._att.shape[2])
+            Ytrain = self._gt[:,train_filter].reshape(-1)
+            Ytest  = self._gt[:,train_filter == False].reshape(-1)
+
+        
+        self._actual_ntest += 1
+        
+        return (Xtrain, Xtest, Ytrain, Ytest, train_filter)
+
+class APsCVG:
+    """Cross Validation Generator for Attribute Profiles Descriptors"""
+    def __init__(self, ground_truth, attributes, n_test=5, label_ignore=None):
+        self._gt = ground_truth
+        self._att = attributes
+        self._cv_count = n_test
+        self._actual_count = 0
+        
+        if attributes.shape[0] != ground_truth.shape[0] or \
+           attributes.shape[1] != ground_truth.shape[1] :
+                raise ValueError('attributes and ground_truth must have the same 2D shape')
+        
+    def __iter__(self):
+        return self
+    
+    def __next__(self):
+        if self._cv_count == self._actual_count:
+            raise StopIteration
+        
+        split_map = semantic_cvg(self._gt, self._cv_count, self._actual_count)
+        xtrain = self._att[split_map == 1].reshape(-1, self._att.shape[2])
+        xtest  = self._att[split_map == 2].reshape(-1, self._att.shape[2])
+        ytrain = self._gt[split_map == 1].reshape(-1)
+        ytest  = self._gt[split_map == 2].reshape(-1)
+        test_index = split_map == 2
+        
+        self._actual_count += 1
+       
+        return xtrain, xtest, ytrain, ytest, test_index
+    
+def semantic_cvg(gt, nb_split, step=0):
+    count = np.unique(gt, return_counts=True)
+
+    test_part = 1 / nb_split
+
+    split = np.zeros_like(gt)
+
+    for lbli, lblc in zip(count[0][1:], count[1][1:]):
+        treshold = int(lblc * test_part)
+        #print('lbli:{}, count:{}, train:{}'.format(lbli, lblc, treshold))
+        f = np.nonzero(gt == lbli)
+        t_int, t_ext = treshold * step, treshold * (step + 1)
+        split[f[0], f[1]] = 1
+        split[f[0][t_int:t_ext], f[1][t_int:t_ext]] = 2
+
+    return split
diff --git a/descriptors/dfc_aps.py b/descriptors/dfc_aps.py
index c632f9a..860c81c 100644
--- a/descriptors/dfc_aps.py
+++ b/descriptors/dfc_aps.py
@@ -5,26 +5,62 @@ import sys
 sys.path.append('..')
 import ld2dap
 
-def run(rasters, treshold=1e4, areas=None, sd=None, moi=None):
+def run(rasters, treshold=1e4, areas=None, sd=None, moi=None, split=1, split_dim=0):
+    """DFC Attribute Profiles
+
+    Compute description vectors for parameters. Rasters can be splitted along
+    `split_dim` before description proceeds.
+
+    """
+
     # Parse attribute type
     treshold = float(treshold)
     areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int)
     sd = None if sd is None else np.array(sd).astype(np.float)
     moi = None if moi is None else np.array(moi).astype(np.float)
 
-    # APs Pipelines
+    # Load and filter 
     loader = ld2dap.LoadTIFF(rasters)
     dfc_filter = ld2dap.Treshold(treshold)
-    dfc_filter.input = loader
-    aps = ld2dap.AttributeProfiles(area=areas, sd=sd, moi=moi)
-    aps.input = dfc_filter
-    out_vectors = ld2dap.RawOutput()
-    out_vectors.input = aps
+    normalize = ld2dap.Normalize(dtype=np.uint8)
+    raw_out = ld2dap.RawOutput()
 
-    # Compute vectors
-    out_vectors.run()
-    
-    return out_vectors.data
+    raw_out.input = normalize
+    normalize.input = dfc_filter
+    dfc_filter.input = loader
+    raw_out.run()
+
+    # Split
+    n = split; d = split_dim
+
+    step = int(raw_out.data.shape[d] / n)
+    view = np.moveaxis(raw_out.data, d, 0)
+    cuts = list()
+    for i in range(n):
+        cut = np.moveaxis(view[i*step:(i+1)*step+1], 0, d)
+        cuts.append(cut)
+
+    # Describe
+    dcuts = list()
+    for cut in cuts:
+        rinp = ld2dap.RawInput(cut, raw_out.metadata)
+        aps = ld2dap.AttributeProfiles(areas, sd, moi, normalize_to_dtype=False)
+        vout = ld2dap.RawOutput()
+        
+        vout.input = aps
+        aps.input = rinp
+        vout.run()
+        
+        dcuts.append(vout.data)
+
+    # Merge
+    descriptors = np.zeros(raw_out.data.shape[:2] + (dcuts[0].shape[-1],))
+    view = np.moveaxis(descriptors, d, 0)
+
+    for i, cut in enumerate(dcuts):
+        view[i*step:(i+1)*step+1] = np.moveaxis(cut, 0, d)
+
+    return descriptors
 
 def version():
-    return 'v0.0'
\ No newline at end of file
+    return 'v0.0'
diff --git a/protocols/jurse.py b/protocols/jurse.py
index 2c6dfbc..1f41485 100644
--- a/protocols/jurse.py
+++ b/protocols/jurse.py
@@ -18,94 +18,6 @@ import triskele
 from .protocol import Protocol, TestError
 
 
-class JurseSF(Jurse):
-    """Second JURSE "split first" protocol for LiDAR classification with 2D maps.
-
-    This second protocol split the data set before computing the attribute
-    profiles to assure the classification is unbiased.
-
-    """
-
-    def __init__(self, expe):
-        super().__init__(expe, self.__class__.__name__)
-
-    def _run(self):
-        self._log.info('Compute descriptors')
-        try:
-            descriptors = self._compute_descriptors()
-        except Exception:
-            raise TestError('Error occured during description')
-        self._time('description')
-
-        self._log.info('Classify data')
-        try:
-            classification = self._compute_classification(descriptors)
-        except Exception:
-            raise TestError('Error occured during classification')
-        self._time('classification')
-
-        self._log.info('Run metrics')
-        metrics = self._run_metrics(classification, descriptors)
-        self._time('metrics')
-
-        cmap = str(self._results_base_name) + '.tif'
-        self._log.info('Saving classification map {}'.format(cmap))
-        triskele.write(cmap, classification)
-
-        results = OrderedDict()
-        results['classification'] = cmap
-        results['metrics'] = metrics
-        self._results = results
-
-    def _compute_descriptors(self):
-        script = self._expe['descriptors_script']
-
-        desc = importlib.import_module(script['name'])
-        att = desc.run(**script['parameters'])
-
-        return att
-
-    def _compute_classification(self, descriptors):
-        # Ground truth
-        gt = self._get_ground_truth()
-
-        # CrossVal and ML
-        cv = self._expe['cross_validation']
-        cl = self._expe['classifier']
-
-        cross_val = getattr(importlib.import_module(cv['package']), cv['name'])
-        classifier = getattr(importlib.import_module(cl['package']), cl['name'])
-
-        prediction = np.zeros_like(gt, dtype=np.uint8)
-
-        for xt, xv, yt, yv, ti in cross_val(gt, descriptors, **cv['parameters']):
-            rfc = classifier(**cl['parameters'])
-            rfc.fit(xt, yt)
-
-            ypred = rfc.predict(xv)
-
-            prediction[ti] = ypred
-
-        return prediction
-
-    def _get_results(self):
-        return self._results
-
-    def _run_metrics(self, classification, descriptors):
-        gt = self._get_ground_truth()
-
-        f = np.nonzero(classification)
-        pred = classification[f].ravel()
-        gt = gt[f].ravel()
-
-        results = OrderedDict()
-        results['dimensions'] = descriptors.shape[-1]
-        results['overall_accuracy'] = float(metrics.accuracy_score(gt, pred))
-        results['cohen_kappa'] = float(metrics.cohen_kappa_score(gt, pred))
-
-        return results
-
-
 class Jurse(Protocol):
     """First JURSE test protocol for LiDAR classification with 2D maps.
 
diff --git a/test_mockup.yml b/test_mockup.yml
index 503caee..4f9a518 100644
--- a/test_mockup.yml
+++ b/test_mockup.yml
@@ -17,6 +17,7 @@ expe:
   descriptors_script:
     name: descriptors.dfc_aps
     parameters:
+      split: 5
       areas:
       - 100
       - 1000
@@ -28,10 +29,10 @@ expe:
       - ./Data/dfc_rasters/DEM_C123_3msr/UH17_GEG051_TR.tif
       treshold: 1e4
   cross_validation:
-    name: APsCVG
-    package: CVGenerators
+    name: Split
+    package: cvgenerators.jurse
     parameters:
-      n_test: 2
+      n_test: 5
   classifier:
     name: RandomForestClassifier
     package: sklearn.ensemble