#!/usr/bin/python # -*- coding: utf-8 -*- # \file CrossValidationGenerator.py # \brief TODO # \author Florent Guiotte # \version 0.1 # \date 28 Mar 2018 # # TODO details import numpy as np class Split: """Geographic split cross validation generator. Split `n_test` times along given dimension. One split is for test, the others are used in training. If used with a split first description, make sure you use compatible settings. """ def __init__(self, ground_truth, attributes, n_test=2, order_dim=0, remove_unclassified=True): self._att = attributes self._gt = ground_truth self._n = n_test self._d = order_dim self._s = 0 self._r = remove_unclassified self._size = ground_truth.shape[order_dim] self._step = int(ground_truth.shape[order_dim] / n_test) def __iter__(self): return self def __next__(self): if self._s == self._n: raise StopIteration cfilter = (np.arange(self._size) - self._step * self._s) % self._size < self._step test_index = np.zeros_like(self._gt, dtype=np.bool) view = np.moveaxis(test_index, self._d, 0) view[cfilter] = True unclassified = self._gt == 0 train_index = ~test_index & ~unclassified if self._r: test_index &= ~unclassified #ipdb.set_trace() xtrain = self._att[train_index] xtest = self._att[test_index] ytrain = self._gt[train_index] ytest = self._gt[test_index] self._s += 1 return xtrain, xtest, ytrain, ytest, test_index class CVG_legacy: def __init__(self, attributes, ground_truth, n_test=2, order_dim=0): self._order = order_dim self._ntests = n_test self._actual_ntest = 0 self._size = attributes.shape[order_dim] self._att = attributes self._gt = ground_truth if attributes.shape[0] != ground_truth.shape[0] or \ attributes.shape[1] != ground_truth.shape[1] : raise ValueError('attributes and ground_truth must have the same 2D shape') def __iter__(self): return self def __next__(self): if self._actual_ntest == self._ntests: raise StopIteration step = self._size / self._ntests train_filter = (np.arange(self._size) - step * self._actual_ntest) % self._size < step if self._order == 0: Xtrain = self._att[train_filter].reshape(-1, self._att.shape[2]) Xtest = self._att[train_filter == False].reshape(-1, self._att.shape[2]) Ytrain = self._gt[train_filter].reshape(-1) Ytest = self._gt[train_filter == False].reshape(-1) else: Xtrain = self._att[:,train_filter].reshape(-1, self._att.shape[2]) Xtest = self._att[:,train_filter == False].reshape(-1, self._att.shape[2]) Ytrain = self._gt[:,train_filter].reshape(-1) Ytest = self._gt[:,train_filter == False].reshape(-1) self._actual_ntest += 1 return (Xtrain, Xtest, Ytrain, Ytest, train_filter) class APsCVG: """Cross Validation Generator for Attribute Profiles Descriptors""" def __init__(self, ground_truth, attributes, n_test=5, label_ignore=None): self._gt = ground_truth self._att = attributes self._cv_count = n_test self._actual_count = 0 if attributes.shape[0] != ground_truth.shape[0] or \ attributes.shape[1] != ground_truth.shape[1] : raise ValueError('attributes and ground_truth must have the same 2D shape') def __iter__(self): return self def __next__(self): if self._cv_count == self._actual_count: raise StopIteration split_map = semantic_cvg(self._gt, self._cv_count, self._actual_count) xtrain = self._att[split_map == 1].reshape(-1, self._att.shape[2]) xtest = self._att[split_map == 2].reshape(-1, self._att.shape[2]) ytrain = self._gt[split_map == 1].reshape(-1) ytest = self._gt[split_map == 2].reshape(-1) test_index = split_map == 2 self._actual_count += 1 return xtrain, xtest, ytrain, ytest, test_index def semantic_cvg(gt, nb_split, step=0): count = np.unique(gt, return_counts=True) test_part = 1 / nb_split split = np.zeros_like(gt) for lbli, lblc in zip(count[0][1:], count[1][1:]): treshold = int(lblc * test_part) #print('lbli:{}, count:{}, train:{}'.format(lbli, lblc, treshold)) f = np.nonzero(gt == lbli) t_int, t_ext = treshold * step, treshold * (step + 1) split[f[0], f[1]] = 1 split[f[0][t_int:t_ext], f[1][t_int:t_ext]] = 2 return split