diff --git a/Cross Validation Generator.ipynb b/Cross Validation Generator.ipynb new file mode 100644 index 0000000..6287810 --- /dev/null +++ b/Cross Validation Generator.ipynb @@ -0,0 +1,106 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class cvg:\n", + " def __init__(self, attributes, ground_truth, n_test=2, order_dim=0):\n", + " self._order = order_dim\n", + " self._ntests = n_test\n", + " self._actual_ntest = 0\n", + " self._size = attributes.shape[order_dim]\n", + " self._att = attributes\n", + " self._gt = ground_truth\n", + " \n", + " if attributes.shape[0] != ground_truth.shape[0] or \\\n", + " attributes.shape[1] != ground_truth.shape[1] :\n", + " raise ValueError('attributes and ground_truth must have the same 2D shape')\n", + " \n", + " def __iter__(self):\n", + " return self\n", + " \n", + " def __next__(self):\n", + " if self._actual_ntest == self._ntests:\n", + " raise StopIteration\n", + " \n", + " step = self._size / self._ntests\n", + " train_filter = (np.arange(self._size) - step * self._actual_ntest) % self._size < step\n", + " \n", + " if self._order == 0:\n", + " Xtrain = self._att[train_filter].reshape(-1, self._att.shape[2])\n", + " Xtest = self._att[train_filter == False].reshape(-1, self._att.shape[2])\n", + " Ytrain = self._gt[train_filter].reshape(-1, 1)\n", + " Ytest = self._gt[train_filter == False].reshape(-1, 1)\n", + " else:\n", + " Xtrain = self._att[:,train_filter].reshape(-1, self._att.shape[2])\n", + " Xtest = self._att[:,train_filter == False].reshape(-1, self._att.shape[2])\n", + " Ytrain = self._gt[:,train_filter].reshape(-1, 1)\n", + " Ytest = self._gt[:,train_filter == False].reshape(-1, 1)\n", + "\n", + " \n", + " self._actual_ntest += 1\n", + " \n", + " return (Xtrain, Xtest, Ytrain, Ytest, train_filter)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X = np.arange(100*200*10).reshape(100, 200, 10)\n", + "Y = np.arange(100 * 200).reshape(100, 200)\n", + "\n", + "for xn, xt, yn, yt, t in cvg(X, Y, 10, 1):\n", + " disp = np.zeros(Y.shape)\n", + " disp[:,t] = 1.\n", + " plt.imshow(disp)\n", + " plt.show()\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from CrossValidationGenerator import CVG" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/CrossValidationGenerator.py b/CrossValidationGenerator.py new file mode 100644 index 0000000..aa51079 --- /dev/null +++ b/CrossValidationGenerator.py @@ -0,0 +1,50 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# \file CrossValidationGenerator.py +# \brief TODO +# \author Florent Guiotte +# \version 0.1 +# \date 28 Mar 2018 +# +# TODO details + +import numpy as np + +class CVG: + def __init__(self, attributes, ground_truth, n_test=2, order_dim=0): + self._order = order_dim + self._ntests = n_test + self._actual_ntest = 0 + self._size = attributes.shape[order_dim] + self._att = attributes + self._gt = ground_truth + + if attributes.shape[0] != ground_truth.shape[0] or \ + attributes.shape[1] != ground_truth.shape[1] : + raise ValueError('attributes and ground_truth must have the same 2D shape') + + def __iter__(self): + return self + + def __next__(self): + if self._actual_ntest == self._ntests: + raise StopIteration + + step = self._size / self._ntests + train_filter = (np.arange(self._size) - step * self._actual_ntest) % self._size < step + + if self._order == 0: + Xtrain = self._att[train_filter].reshape(-1, self._att.shape[2]) + Xtest = self._att[train_filter == False].reshape(-1, self._att.shape[2]) + Ytrain = self._gt[train_filter].reshape(-1) + Ytest = self._gt[train_filter == False].reshape(-1) + else: + Xtrain = self._att[:,train_filter].reshape(-1, self._att.shape[2]) + Xtest = self._att[:,train_filter == False].reshape(-1, self._att.shape[2]) + Ytrain = self._gt[:,train_filter].reshape(-1) + Ytest = self._gt[:,train_filter == False].reshape(-1) + + + self._actual_ntest += 1 + + return (Xtrain, Xtest, Ytrain, Ytest, train_filter) diff --git a/Notebooks/Attribute Profiles Classifier.ipynb b/Notebooks/Attribute Profiles Classifier.ipynb index f7d5688..8b020e1 100644 --- a/Notebooks/Attribute Profiles Classifier.ipynb +++ b/Notebooks/Attribute Profiles Classifier.ipynb @@ -9,7 +9,6 @@ "import sys\n", "from pathlib import Path\n", "import numpy as np\n", - "import libtiff\n", "import matplotlib.pyplot as plt\n", "\n", "triskele_path = Path('../triskele/python/')\n", @@ -37,7 +36,8 @@ " '../Data/phase1_rasters/DSM_C12/UH17c_GEF051_TR.tif',\n", " '../Data/phase1_rasters/Intensity_C1/UH17_GI1F051_TR.tif',\n", " '../Data/phase1_rasters/Intensity_C2/UH17_GI2F051_TR.tif',\n", - " '../Data/phase1_rasters/Intensity_C3/UH17_GI3F051_TR.tif'\n", + " '../Data/phase1_rasters/Intensity_C3/UH17_GI3F051_TR.tif',\n", + " #'../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif'\n", "]" ] }, @@ -78,7 +78,7 @@ "\n", "for file in layers_files:\n", " print('Loading {}'.format(file))\n", - " layer = libtiff.TIFF.open(file).read_image()\n", + " layer = triskele.read(file)\n", " DFC_filter(layer)\n", " layers.append(layer)\n", "\n", @@ -119,114 +119,281 @@ "metadata": {}, "outputs": [], "source": [ - "t = triskele.Triskele(layers_stack[0], verbose=False)\n", - "attributes = t.filter()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ + "area = np.array([10, 100, 1e3, 1e4, 1e5])\n", + "sd = np.array([0.5,0.9,0.99,0.999,0.9999])#,1e4,1e5,5e5])\n", + "moi = np.array([0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.99])\n", + "\n", + "t = triskele.Triskele(layers_stack[:,:,:], verbose=False)\n", + "attributes = t.filter(tree='tos-tree',\n", + " area=area,\n", + " standard_deviation=sd,\n", + " moment_of_inertia=moi\n", + " )\n", "attributes.shape" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "layers_stack.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "test = libtiff.TIFF.open('../Res/test.tif', mode='w')\n", - "test.write_image(np.rollaxis(layers_stack, 2).astype(np.float32))\n", - "test = None" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "layers_stack.shape[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "test = libtiff.TIFF.open('../Res/test.tif', mode='w')\n", - "test.\n", - "#test = None\n", - "#test.write_tiles(layers_stack[:,:,0].astype(np.uint8), tile_width=layers_stack.shape[1], tile_height=layers_stack.shape[0])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.rollaxis(layers_stack, 2).shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tiff = libtiff.TIFFimage(np.rollaxis(layers_stack, 1), description='BDQ')\n", - "tiff.write_file('../Res/test.tif')\n", - "del tiff" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "test = libtiff.TIFF.open('../Res/test.tif')\n", - "for image in test.iter_images():\n", - " print(image.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "test = libtiff.TIFFimage(layers_stack)\n", - "test.write_file('../Res/test.tif')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for i in range(attributes.shape[2]):\n", + "for i in range(attributes.shape[2]-1):\n", " plt.figure(figsize=(16*2,3*2))\n", " plt.imshow(attributes[:,:,i])\n", " plt.colorbar()\n", - " plt.title(layers_files[i])\n", + " plt.show()\n", + " plt.figure(figsize=(16*2,3*2))\n", + " plt.imshow(attributes[:,:,i+1].astype(np.float) - attributes[:,:,i])\n", + " plt.colorbar()\n", + " #plt.title(layers_files[i])\n", + "plt.show()\n", + "plt.figure(figsize=(16*2,3*2))\n", + "plt.imshow(attributes[:,:,-1])\n", + "plt.colorbar()\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Classification vectors" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X = attributes.reshape(-1, attributes.shape[2])\n", + "\n", + "(attributes[0,0] == X[0]).all()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "labels_file = Path('../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif')\n", + "labels = triskele.read(labels_file)\n", + "display(labels.shape)\n", + "\n", + "plt.figure(figsize=(16*2,3*2))\n", + "plt.imshow(labels)\n", + "plt.colorbar()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Y = labels.reshape(-1)\n", + "\n", + "X.shape, Y.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Random Forest Classifier" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import importlib\n", + "from sklearn import metrics\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "import pickle\n", + "sys.path.insert(0, '..')\n", + "import CrossValidationGenerator as cvg" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "importlib.reload(cvg)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn import metrics\n", + "import pandas as pd\n", + "\n", + "\n", + "def scores(actual, prediction):\n", + " ct = pd.crosstab(prediction, actual,\n", + " rownames=['Prediction'], colnames=['Reference'],\n", + " margins=True, margins_name='Total',\n", + " normalize=False # all, index, columns\n", + " )\n", + " display(ct)\n", + " \n", + " scores = metrics.precision_recall_fscore_support(actual, prediction)\n", + " print(metrics.classification_report(actual, prediction)) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cv_labels = np.zeros(labels[:].shape)\n", + "\n", + "for xtrain, xtest, ytrain, ytest, train_index in cvg.CVG(attributes[:], labels[:], 10, 1): \n", + " rfc = RandomForestClassifier(n_jobs=-1, random_state=0, n_estimators=100, verbose=True)\n", + " rfc.fit(xtrain, ytrain)\n", + " \n", + " ypred = rfc.predict(xtest)\n", + " \n", + " display(ytest.shape, ypred.shape)\n", + " \n", + " scores(ytest, ypred)\n", + " \n", + " cv_labels[:,train_index == False] = ypred.reshape(cv_labels.shape[0], -1)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def show(im):\n", + " plt.figure(figsize=(16*2,3*2))\n", + " plt.imshow(im)\n", + " plt.colorbar()\n", " plt.show()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "show(labels)\n", + "show(cv_labels)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "labels.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.arange(238400).reshape(-1, 4768)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open('../Res/classifier_0.pkl', 'wb') as f:\n", + " pickle.dump(rfc, f)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Yp = Y.copy()\n", + "\n", + "Yp[training == False] = rfc.predict(X[training == False])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(16*2,3*2))\n", + "plt.imshow(Y.reshape(labels.shape))\n", + "plt.colorbar()\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(16*2,3*2))\n", + "plt.imshow(Yp.reshape(labels.shape))\n", + "plt.colorbar()\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(16*2,3*2))\n", + "plt.imshow(Yp.reshape(labels.shape).astype(np.float) - labels)\n", + "plt.colorbar()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class cvg:\n", + " def __init__(self, attributes, ground_truth, order_dim=0, n_test=2): \n", + " self._tests_left = n_test\n", + " \n", + " if attributes.shape[0] != ground_truth.shape[0] or \\\n", + " attributes.shape[1] != ground_truth.shape[1] :\n", + " raise ValueError('attributes and ground_truth must have the same 2D shape')\n", + " \n", + " def __iter__(self):\n", + " return self\n", + " \n", + " def __next__(self):\n", + " if self._tests_left == 0:\n", + " raise StopIteration\n", + " \n", + " train_filter = np.arange(attributes.shape) < (Y.size * .50)\n", + "\n", + " Xtrain = 42\n", + " Xtest = 432\n", + " Ytrain = 12\n", + " Ytest = 123\n", + " \n", + " return (Xtrain, Xtest, Ytrain, Ytest)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cvg(attributes, labels[:,:-1])" + ] } ], "metadata": {