minigrida/cvgenerators/jurse.py

#!/usr/bin/python
# -*- coding: utf-8 -*-
# \file CrossValidationGenerator.py
# \brief TODO
# \author Florent Guiotte <florent.guiotte@gmail.com>
# \version 0.1
# \date 28 Mar 2018
#
# TODO details

import numpy as np

class Split:
    """Geographic split cross validation generator.

    Split `n_test` times along given dimension. One split is for test, the
    others are used in training.

    If used with a split first description, make sure you use compatible
    settings.

    Use `sub_sample (and `random_state`) parameters to sub sample the training
    set.

    return xtrain, xtest, ytrain, ytest, test_index

    """

    def __init__(self, ground_truth, attributes, n_test=2, order_dim=0, sub_sample=1.0, random_state=0, remove_unclassified=True):
        self._att = attributes
        self._gt  = ground_truth
        self._n   = n_test
        self._d   = order_dim
        self._s   = 0
        self._r   = remove_unclassified
        self._ssp = sub_sample
        self._rs  = random_state

        self._size = ground_truth.shape[order_dim]
        self._step = int(ground_truth.shape[order_dim] / n_test)

    def __iter__(self):
        return self

    def __next__(self):
        if self._s == self._n:
            raise StopIteration

        cfilter = (np.arange(self._size) - self._step * self._s) % self._size < self._step

        test_index = np.zeros_like(self._gt, dtype=np.bool)
        view = np.moveaxis(test_index, self._d, 0)
        view[cfilter] = True

        unclassified = self._gt == 0
        train_index = ~test_index & ~unclassified

        # Sub sample training
        np.random.seed(self._rs)
        train_index &= np.random.random(train_index.shape) < self._ssp

        # Remove unclassified
        if self._r:
            test_index &= ~unclassified

        xtrain = self._att[train_index]
        xtest  = self._att[test_index]
        ytrain = self._gt[train_index]
        ytest  = self._gt[test_index]

        self._s += 1

        return xtrain, xtest, ytrain, ytest, test_index


class CVG_legacy:
    def __init__(self, attributes, ground_truth, n_test=2, order_dim=0):
        self._order        = order_dim
        self._ntests       = n_test
        self._actual_ntest = 0
        self._size         = attributes.shape[order_dim]
        self._att          = attributes
        self._gt           = ground_truth

        if attributes.shape[0] != ground_truth.shape[0] or \
           attributes.shape[1] != ground_truth.shape[1] :
                raise ValueError('attributes and ground_truth must have the same 2D shape')

    def __iter__(self):
        return self

    def __next__(self):
        if self._actual_ntest == self._ntests:
            raise StopIteration

        step = self._size / self._ntests
        train_filter = (np.arange(self._size) - step * self._actual_ntest) % self._size < step

        if self._order == 0:
            Xtrain = self._att[train_filter].reshape(-1, self._att.shape[2])
            Xtest  = self._att[train_filter == False].reshape(-1, self._att.shape[2])
            Ytrain = self._gt[train_filter].reshape(-1)
            Ytest  = self._gt[train_filter == False].reshape(-1)
        else:
            Xtrain = self._att[:,train_filter].reshape(-1, self._att.shape[2])
            Xtest  = self._att[:,train_filter == False].reshape(-1, self._att.shape[2])
            Ytrain = self._gt[:,train_filter].reshape(-1)
            Ytest  = self._gt[:,train_filter == False].reshape(-1)


        self._actual_ntest += 1

        return (Xtrain, Xtest, Ytrain, Ytest, train_filter)

class APsCVG:
    """Cross Validation Generator for Attribute Profiles Descriptors"""
    def __init__(self, ground_truth, attributes, n_test=5, label_ignore=None):
        self._gt = ground_truth
        self._att = attributes
        self._cv_count = n_test
        self._actual_count = 0

        if attributes.shape[0] != ground_truth.shape[0] or \
           attributes.shape[1] != ground_truth.shape[1] :
                raise ValueError('attributes and ground_truth must have the same 2D shape')

    def __iter__(self):
        return self

    def __next__(self):
        if self._cv_count == self._actual_count:
            raise StopIteration

        split_map = semantic_cvg(self._gt, self._cv_count, self._actual_count)
        xtrain = self._att[split_map == 1].reshape(-1, self._att.shape[2])
        xtest  = self._att[split_map == 2].reshape(-1, self._att.shape[2])
        ytrain = self._gt[split_map == 1].reshape(-1)
        ytest  = self._gt[split_map == 2].reshape(-1)
        test_index = split_map == 2

        self._actual_count += 1

        return xtrain, xtest, ytrain, ytest, test_index

def semantic_cvg(gt, nb_split, step=0):
    count = np.unique(gt, return_counts=True)

    test_part = 1 / nb_split

    split = np.zeros_like(gt)

    for lbli, lblc in zip(count[0][1:], count[1][1:]):
        treshold = int(lblc * test_part)
        #print('lbli:{}, count:{}, train:{}'.format(lbli, lblc, treshold))
        f = np.nonzero(gt == lbli)
        t_int, t_ext = treshold * step, treshold * (step + 1)
        split[f[0], f[1]] = 1
        split[f[0][t_int:t_ext], f[1][t_int:t_ext]] = 2

    return split