Merge branch 'develop'

2018-09-14 16:46:28 +02:00 · 2018-09-14 16:46:28 +02:00 · 347dec5e51
commit 347dec5e51
parent 15f6960794 14e1d92c68
7 changed files with 350 additions and 67 deletions
--- a/cvgenerators/init.py
+++ b/cvgenerators/init.py
--- a/cvgenerators/jurse.py
+++ b/cvgenerators/jurse.py
@ -0,0 +1,150 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# \file CrossValidationGenerator.py
+# \brief TODO
+# \author Florent Guiotte <florent.guiotte@gmail.com>
+# \version 0.1
+# \date 28 Mar 2018
+#
+# TODO details
+
+import numpy as np
+import ipdb
+
+class Split:
+    """Geographic split cross validation generator.
+
+    Split `n_test` times along given dimension. One split is for test, the
+    others are used in training. 
+
+    If used with a split first description, make sure you use compatible
+    settings.
+
+    """
+
+    def __init__(self, ground_truth, attributes, n_test=2, order_dim=0, remove_unclassified=True):
+        self._att = attributes
+        self._gt  = ground_truth
+        self._n   = n_test
+        self._d   = order_dim
+        self._s   = 0
+        self._r   = remove_unclassified
+
+        self._size = ground_truth.shape[order_dim]
+        self._step = int(ground_truth.shape[order_dim] / n_test)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self._s == self._n:
+            raise StopIteration
+        
+        cfilter = (np.arange(self._size) - self._step * self._s) % self._size < self._step
+
+        test_index = np.zeros_like(self._gt, dtype=np.bool)
+        view = np.moveaxis(test_index, self._d, 0)
+        view[cfilter] = True 
+
+        unclassified = self._gt == 0
+        train_index = ~test_index & ~unclassified
+
+        if self._r:
+            test_index &= ~unclassified
+
+        #ipdb.set_trace()
+        xtrain = self._att[train_index]
+        xtest  = self._att[test_index]
+        ytrain = self._gt[train_index]
+        ytest  = self._gt[test_index]
+
+        self._s += 1
+
+        return xtrain, xtest, ytrain, ytest, test_index
+
+
+class CVG_legacy:
+    def __init__(self, attributes, ground_truth, n_test=2, order_dim=0):
+        self._order        = order_dim
+        self._ntests       = n_test
+        self._actual_ntest = 0
+        self._size         = attributes.shape[order_dim]
+        self._att          = attributes
+        self._gt           = ground_truth
+        
+        if attributes.shape[0] != ground_truth.shape[0] or \
+           attributes.shape[1] != ground_truth.shape[1] :
+                raise ValueError('attributes and ground_truth must have the same 2D shape')
+        
+    def __iter__(self):
+        return self
+    
+    def __next__(self):
+        if self._actual_ntest == self._ntests:
+            raise StopIteration
+        
+        step = self._size / self._ntests
+        train_filter = (np.arange(self._size) - step * self._actual_ntest) % self._size < step
+        
+        if self._order == 0:
+            Xtrain = self._att[train_filter].reshape(-1, self._att.shape[2])
+            Xtest  = self._att[train_filter == False].reshape(-1, self._att.shape[2])
+            Ytrain = self._gt[train_filter].reshape(-1)
+            Ytest  = self._gt[train_filter == False].reshape(-1)
+        else:
+            Xtrain = self._att[:,train_filter].reshape(-1, self._att.shape[2])
+            Xtest  = self._att[:,train_filter == False].reshape(-1, self._att.shape[2])
+            Ytrain = self._gt[:,train_filter].reshape(-1)
+            Ytest  = self._gt[:,train_filter == False].reshape(-1)
+
+        
+        self._actual_ntest += 1
+        
+        return (Xtrain, Xtest, Ytrain, Ytest, train_filter)
+
+class APsCVG:
+    """Cross Validation Generator for Attribute Profiles Descriptors"""
+    def __init__(self, ground_truth, attributes, n_test=5, label_ignore=None):
+        self._gt = ground_truth
+        self._att = attributes
+        self._cv_count = n_test
+        self._actual_count = 0
+        
+        if attributes.shape[0] != ground_truth.shape[0] or \
+           attributes.shape[1] != ground_truth.shape[1] :
+                raise ValueError('attributes and ground_truth must have the same 2D shape')
+        
+    def __iter__(self):
+        return self
+    
+    def __next__(self):
+        if self._cv_count == self._actual_count:
+            raise StopIteration
+        
+        split_map = semantic_cvg(self._gt, self._cv_count, self._actual_count)
+        xtrain = self._att[split_map == 1].reshape(-1, self._att.shape[2])
+        xtest  = self._att[split_map == 2].reshape(-1, self._att.shape[2])
+        ytrain = self._gt[split_map == 1].reshape(-1)
+        ytest  = self._gt[split_map == 2].reshape(-1)
+        test_index = split_map == 2
+        
+        self._actual_count += 1
+       
+        return xtrain, xtest, ytrain, ytest, test_index
+    
+def semantic_cvg(gt, nb_split, step=0):
+    count = np.unique(gt, return_counts=True)
+
+    test_part = 1 / nb_split
+
+    split = np.zeros_like(gt)
+
+    for lbli, lblc in zip(count[0][1:], count[1][1:]):
+        treshold = int(lblc * test_part)
+        #print('lbli:{}, count:{}, train:{}'.format(lbli, lblc, treshold))
+        f = np.nonzero(gt == lbli)
+        t_int, t_ext = treshold * step, treshold * (step + 1)
+        split[f[0], f[1]] = 1
+        split[f[0][t_int:t_ext], f[1][t_int:t_ext]] = 2
+
+    return split
--- a/descriptors/dfc_aps.py
+++ b/descriptors/dfc_aps.py
@ -1,30 +1,62 @@
 import numpy as np
-import yaml
-
-import sys
-sys.path.append('..')
 import ld2dap

-def run(rasters, treshold=1e4, areas=None, sd=None, moi=None):
+def run(rasters, treshold=1e4, areas=None, sd=None, moi=None, split=1, split_dim=0):
+    """DFC Attribute Profiles
+
+    Compute description vectors for parameters. Rasters can be splitted along
+    `split_dim` before description proceeds.
+
+    """
+
    # Parse attribute type
    treshold = float(treshold)
    areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int)
    sd = None if sd is None else np.array(sd).astype(np.float)
    moi = None if moi is None else np.array(moi).astype(np.float)

-    # APs Pipelines
+    # Load and filter 
    loader = ld2dap.LoadTIFF(rasters)
    dfc_filter = ld2dap.Treshold(treshold)
-    dfc_filter.input = loader
-    aps = ld2dap.AttributeProfiles(area=areas, sd=sd, moi=moi)
-    aps.input = dfc_filter
-    out_vectors = ld2dap.RawOutput()
-    out_vectors.input = aps
+    normalize = ld2dap.Normalize(dtype=np.uint8)
+    raw_out = ld2dap.RawOutput()

-    # Compute vectors
-    out_vectors.run()
-    
-    return out_vectors.data
+    raw_out.input = normalize
+    normalize.input = dfc_filter
+    dfc_filter.input = loader
+    raw_out.run()
+
+    # Split
+    n = split; d = split_dim
+
+    step = int(raw_out.data.shape[d] / n)
+    view = np.moveaxis(raw_out.data, d, 0)
+    cuts = list()
+    for i in range(n):
+        cut = np.moveaxis(view[i*step:(i+1)*step+1], 0, d)
+        cuts.append(cut)
+
+    # Describe
+    dcuts = list()
+    for cut in cuts:
+        rinp = ld2dap.RawInput(cut, raw_out.metadata)
+        aps = ld2dap.AttributeProfiles(areas, sd, moi, normalize_to_dtype=False)
+        vout = ld2dap.RawOutput()
+        
+        vout.input = aps
+        aps.input = rinp
+        vout.run()
+        
+        dcuts.append(vout.data)
+
+    # Merge
+    descriptors = np.zeros(raw_out.data.shape[:2] + (dcuts[0].shape[-1],))
+    view = np.moveaxis(descriptors, d, 0)
+
+    for i, cut in enumerate(dcuts):
+        view[i*step:(i+1)*step+1] = np.moveaxis(cut, 0, d)
+
+    return descriptors

 def version():
-    return 'v0.0'
+    return 'v0.0'
--- a/descriptors/dfc_daps.py
+++ b/descriptors/dfc_daps.py
@ -9,33 +9,66 @@
 # TODO details

 import numpy as np
-
-import sys
-sys.path.append('..')
 import ld2dap

-def run(rasters, treshold=1e4, areas=None, sd=None, moi=None):
-    # Parse parameters type
+def run(rasters, treshold=1e4, areas=None, sd=None, moi=None, split=1, split_dim=0):
+    """DFC Differential Attribute Profiles
+
+    Compute description vectors for parameters. Rasters can be splitted along
+    `split_dim` before description proceeds.
+
+    """
+
+    # Parse attribute type
    treshold = float(treshold)
    areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int)
    sd = None if sd is None else np.array(sd).astype(np.float)
    moi = None if moi is None else np.array(moi).astype(np.float)

-    # Pipelines
+    # Load and filter 
    loader = ld2dap.LoadTIFF(rasters)
    dfc_filter = ld2dap.Treshold(treshold)
+    normalize = ld2dap.Normalize(dtype=np.uint8)
+    raw_out = ld2dap.RawOutput()
+
+    raw_out.input = normalize
+    normalize.input = dfc_filter
    dfc_filter.input = loader
-    aps = ld2dap.AttributeProfiles(area=areas, sd=sd, moi=moi)
-    aps.input = dfc_filter
-    differential = ld2dap.Differential()
-    differential.input = aps
-    out_vectors = ld2dap.RawOutput()
-    out_vectors.input = differential
+    raw_out.run()

-    # Compute vectors
-    out_vectors.run()
+    # Split
+    n = split; d = split_dim

-    return out_vectors.data
+    step = int(raw_out.data.shape[d] / n)
+    view = np.moveaxis(raw_out.data, d, 0)
+    cuts = list()
+    for i in range(n):
+        cut = np.moveaxis(view[i*step:(i+1)*step+1], 0, d)
+        cuts.append(cut)
+
+    # Describe
+    dcuts = list()
+    for cut in cuts:
+        rinp = ld2dap.RawInput(cut, raw_out.metadata)
+        aps = ld2dap.AttributeProfiles(areas, sd, moi, normalize_to_dtype=False)
+        diff = ld2dap.Differential()
+        vout = ld2dap.RawOutput()
+        
+        vout.input = diff
+        diff.input = aps
+        aps.input = rinp
+        vout.run()
+        
+        dcuts.append(vout.data)
+
+    # Merge
+    descriptors = np.zeros(raw_out.data.shape[:2] + (dcuts[0].shape[-1],))
+    view = np.moveaxis(descriptors, d, 0)
+
+    for i, cut in enumerate(dcuts):
+        view[i*step:(i+1)*step+1] = np.moveaxis(cut, 0, d)
+
+    return descriptors

 def version():
    return 'v0.0'
--- a/descriptors/dfc_dsdaps.py
+++ b/descriptors/dfc_dsdaps.py
@ -1,41 +1,74 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
-# \file dfc_dsdaps.py
+# \file dfc_daps.py
 # \brief TODO
 # \author Florent Guiotte <florent.guiotte@gmail.com>
 # \version 0.1
-# \date 28 août 2018
+# \date 27 août 2018
 #
 # TODO details

 import numpy as np
-
-import sys
-sys.path.append('..')
 import ld2dap

-def run(rasters, treshold=1e4, areas=None, sd=None, moi=None):
-    # Parse parameters type
+def run(rasters, treshold=1e4, areas=None, sd=None, moi=None, split=1, split_dim=0):
+    """DFC Differential Self Dual Attribute Profiles
+
+    Compute description vectors for parameters. Rasters can be splitted along
+    `split_dim` before description proceeds.
+
+    """
+
+    # Parse attribute type
    treshold = float(treshold)
    areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int)
    sd = None if sd is None else np.array(sd).astype(np.float)
    moi = None if moi is None else np.array(moi).astype(np.float)

-    # Pipelines
+    # Load and filter 
    loader = ld2dap.LoadTIFF(rasters)
    dfc_filter = ld2dap.Treshold(treshold)
+    normalize = ld2dap.Normalize(dtype=np.uint8)
+    raw_out = ld2dap.RawOutput()
+
+    raw_out.input = normalize
+    normalize.input = dfc_filter
    dfc_filter.input = loader
-    sdaps = ld2dap.SelfDualAttributeProfiles(area=areas, sd=sd, moi=moi)
-    sdaps.input = dfc_filter
-    differential = ld2dap.Differential()
-    differential.input = sdaps
-    out_vectors = ld2dap.RawOutput()
-    out_vectors.input = differential
+    raw_out.run()

-    # Compute vectors
-    out_vectors.run()
+    # Split
+    n = split; d = split_dim

-    return out_vectors.data
+    step = int(raw_out.data.shape[d] / n)
+    view = np.moveaxis(raw_out.data, d, 0)
+    cuts = list()
+    for i in range(n):
+        cut = np.moveaxis(view[i*step:(i+1)*step+1], 0, d)
+        cuts.append(cut)
+
+    # Describe
+    dcuts = list()
+    for cut in cuts:
+        rinp = ld2dap.RawInput(cut, raw_out.metadata)
+        aps = ld2dap.SelfDualAttributeProfiles(areas, sd, moi, normalize_to_dtype=False)
+        diff = ld2dap.Differential()
+        vout = ld2dap.RawOutput()
+        
+        vout.input = diff
+        diff.input = aps
+        aps.input = rinp
+        vout.run()
+        
+        dcuts.append(vout.data)
+
+    # Merge
+    descriptors = np.zeros(raw_out.data.shape[:2] + (dcuts[0].shape[-1],))
+    view = np.moveaxis(descriptors, d, 0)
+
+    for i, cut in enumerate(dcuts):
+        view[i*step:(i+1)*step+1] = np.moveaxis(cut, 0, d)
+
+    return descriptors

 def version():
    return 'v0.0'
--- a/descriptors/dfc_sdaps.py
+++ b/descriptors/dfc_sdaps.py
@ -9,32 +9,64 @@
 # TODO details

 import numpy as np
-
-import sys
-sys.path.append('..')
 import ld2dap

-def run(rasters, treshold=1e4, areas=None, sd=None, moi=None):
-    # Parse parameters type
+def run(rasters, treshold=1e4, areas=None, sd=None, moi=None, split=1, split_dim=0):
+    """DFC Self Dual Attribute Profiles
+
+    Compute description vectors for parameters. Rasters can be splitted along
+    `split_dim` before description proceeds.
+
+    """
+
+    # Parse attribute type
    treshold = float(treshold)
    areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int)
    sd = None if sd is None else np.array(sd).astype(np.float)
    moi = None if moi is None else np.array(moi).astype(np.float)

-    # Pipelines
+    # Load and filter 
    loader = ld2dap.LoadTIFF(rasters)
    dfc_filter = ld2dap.Treshold(treshold)
+    normalize = ld2dap.Normalize(dtype=np.uint8)
+    raw_out = ld2dap.RawOutput()
+
+    raw_out.input = normalize
+    normalize.input = dfc_filter
    dfc_filter.input = loader
-    sdaps = ld2dap.SelfDualAttributeProfiles(area=areas, sd=sd, moi=moi)
-    sdaps.input = dfc_filter
-    out_vectors = ld2dap.RawOutput()
-    out_vectors.input = sdaps
+    raw_out.run()

-    # Compute vectors
-    out_vectors.run()
+    # Split
+    n = split; d = split_dim

-    return out_vectors.data
+    step = int(raw_out.data.shape[d] / n)
+    view = np.moveaxis(raw_out.data, d, 0)
+    cuts = list()
+    for i in range(n):
+        cut = np.moveaxis(view[i*step:(i+1)*step+1], 0, d)
+        cuts.append(cut)
+
+    # Describe
+    dcuts = list()
+    for cut in cuts:
+        rinp = ld2dap.RawInput(cut, raw_out.metadata)
+        aps = ld2dap.SelfDualAttributeProfiles(areas, sd, moi, normalize_to_dtype=False)
+        vout = ld2dap.RawOutput()
+        
+        vout.input = aps
+        aps.input = rinp
+        vout.run()
+        
+        dcuts.append(vout.data)
+
+    # Merge
+    descriptors = np.zeros(raw_out.data.shape[:2] + (dcuts[0].shape[-1],))
+    view = np.moveaxis(descriptors, d, 0)
+
+    for i, cut in enumerate(dcuts):
+        view[i*step:(i+1)*step+1] = np.moveaxis(cut, 0, d)
+
+    return descriptors

 def version():
    return 'v0.0'
-
--- a/test_mockup.yml
+++ b/test_mockup.yml
@ -15,28 +15,31 @@ expe:
    raster: ./Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif
    meta_labels: ./Data/ground_truth/jurse_meta_idx.csv
  descriptors_script:
-    name: descriptors.dfc_aps
+    name: descriptors.dfc_sdaps
    parameters:
+      split: 4
      areas:
      - 100
      - 1000
+      - 1e4
      moi:
      - 0.5
+      - 0.7
      - 0.9
      rasters:
      - ./Data/dfc_rasters/DEM+B_C123/UH17_GEM051_TR.tif
      - ./Data/dfc_rasters/DEM_C123_3msr/UH17_GEG051_TR.tif
      treshold: 1e4
  cross_validation:
-    name: APsCVG
-    package: CVGenerators
+    name: Split
+    package: cvgenerators.jurse
    parameters:
-      n_test: 2
+      n_test: 4
  classifier:
    name: RandomForestClassifier
    package: sklearn.ensemble
    parameters:
      min_samples_leaf: 10
-      n_estimators: 10
+      n_estimators: 100
      n_jobs: -1
      random_state: 0