CVGenerator & Notebooks

2018-03-29 11:58:09 +02:00 · 2018-03-29 11:58:09 +02:00 · 2ea561b4ba
commit 2ea561b4ba
parent c110ea1d27
3 changed files with 423 additions and 100 deletions
--- a/Generator.ipynb
+++ b/Generator.ipynb
@ -0,0 +1,106 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class cvg:\n",
+    "    def __init__(self, attributes, ground_truth, n_test=2, order_dim=0):\n",
+    "        self._order        = order_dim\n",
+    "        self._ntests       = n_test\n",
+    "        self._actual_ntest = 0\n",
+    "        self._size         = attributes.shape[order_dim]\n",
+    "        self._att          = attributes\n",
+    "        self._gt           = ground_truth\n",
+    "        \n",
+    "        if attributes.shape[0] != ground_truth.shape[0] or \\\n",
+    "           attributes.shape[1] != ground_truth.shape[1] :\n",
+    "                raise ValueError('attributes and ground_truth must have the same 2D shape')\n",
+    "        \n",
+    "    def __iter__(self):\n",
+    "        return self\n",
+    "    \n",
+    "    def __next__(self):\n",
+    "        if self._actual_ntest == self._ntests:\n",
+    "            raise StopIteration\n",
+    "        \n",
+    "        step = self._size / self._ntests\n",
+    "        train_filter = (np.arange(self._size) - step * self._actual_ntest) % self._size < step\n",
+    "        \n",
+    "        if self._order == 0:\n",
+    "            Xtrain = self._att[train_filter].reshape(-1, self._att.shape[2])\n",
+    "            Xtest  = self._att[train_filter == False].reshape(-1, self._att.shape[2])\n",
+    "            Ytrain = self._gt[train_filter].reshape(-1, 1)\n",
+    "            Ytest  = self._gt[train_filter == False].reshape(-1, 1)\n",
+    "        else:\n",
+    "            Xtrain = self._att[:,train_filter].reshape(-1, self._att.shape[2])\n",
+    "            Xtest  = self._att[:,train_filter == False].reshape(-1, self._att.shape[2])\n",
+    "            Ytrain = self._gt[:,train_filter].reshape(-1, 1)\n",
+    "            Ytest  = self._gt[:,train_filter == False].reshape(-1, 1)\n",
+    "\n",
+    "        \n",
+    "        self._actual_ntest += 1\n",
+    "        \n",
+    "        return (Xtrain, Xtest, Ytrain, Ytest, train_filter)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = np.arange(100*200*10).reshape(100, 200, 10)\n",
+    "Y = np.arange(100 * 200).reshape(100, 200)\n",
+    "\n",
+    "for xn, xt, yn, yt, t in cvg(X, Y, 10, 1):\n",
+    "        disp = np.zeros(Y.shape)\n",
+    "        disp[:,t] = 1.\n",
+    "        plt.imshow(disp)\n",
+    "        plt.show()\n",
+    "        \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from CrossValidationGenerator import CVG"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/CrossValidationGenerator.py
+++ b/CrossValidationGenerator.py
@ -0,0 +1,50 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# \file CrossValidationGenerator.py
+# \brief TODO
+# \author Florent Guiotte <florent.guiotte@gmail.com>
+# \version 0.1
+# \date 28 Mar 2018
+#
+# TODO details
+
+import numpy as np
+
+class CVG:
+    def __init__(self, attributes, ground_truth, n_test=2, order_dim=0):
+        self._order        = order_dim
+        self._ntests       = n_test
+        self._actual_ntest = 0
+        self._size         = attributes.shape[order_dim]
+        self._att          = attributes
+        self._gt           = ground_truth
+        
+        if attributes.shape[0] != ground_truth.shape[0] or \
+           attributes.shape[1] != ground_truth.shape[1] :
+                raise ValueError('attributes and ground_truth must have the same 2D shape')
+        
+    def __iter__(self):
+        return self
+    
+    def __next__(self):
+        if self._actual_ntest == self._ntests:
+            raise StopIteration
+        
+        step = self._size / self._ntests
+        train_filter = (np.arange(self._size) - step * self._actual_ntest) % self._size < step
+        
+        if self._order == 0:
+            Xtrain = self._att[train_filter].reshape(-1, self._att.shape[2])
+            Xtest  = self._att[train_filter == False].reshape(-1, self._att.shape[2])
+            Ytrain = self._gt[train_filter].reshape(-1)
+            Ytest  = self._gt[train_filter == False].reshape(-1)
+        else:
+            Xtrain = self._att[:,train_filter].reshape(-1, self._att.shape[2])
+            Xtest  = self._att[:,train_filter == False].reshape(-1, self._att.shape[2])
+            Ytrain = self._gt[:,train_filter].reshape(-1)
+            Ytest  = self._gt[:,train_filter == False].reshape(-1)
+
+        
+        self._actual_ntest += 1
+        
+        return (Xtrain, Xtest, Ytrain, Ytest, train_filter)
--- a/Notebooks/Attribute
+++ b/Notebooks/Attribute
@ -9,7 +9,6 @@
    "import sys\n",
    "from pathlib import Path\n",
    "import numpy as np\n",
-    "import libtiff\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "triskele_path = Path('../triskele/python/')\n",
@ -37,7 +36,8 @@
    "    '../Data/phase1_rasters/DSM_C12/UH17c_GEF051_TR.tif',\n",
    "    '../Data/phase1_rasters/Intensity_C1/UH17_GI1F051_TR.tif',\n",
    "    '../Data/phase1_rasters/Intensity_C2/UH17_GI2F051_TR.tif',\n",
-    "    '../Data/phase1_rasters/Intensity_C3/UH17_GI3F051_TR.tif'\n",
+    "    '../Data/phase1_rasters/Intensity_C3/UH17_GI3F051_TR.tif',\n",
+    "    #'../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif'\n",
    "]"
   ]
  },
@ -78,7 +78,7 @@
    "\n",
    "for file in layers_files:\n",
    "    print('Loading {}'.format(file))\n",
-    "    layer  = libtiff.TIFF.open(file).read_image()\n",
+    "    layer = triskele.read(file)\n",
    "    DFC_filter(layer)\n",
    "    layers.append(layer)\n",
    "\n",
@ -119,114 +119,281 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "t = triskele.Triskele(layers_stack[0], verbose=False)\n",
-    "attributes = t.filter()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "area = np.array([10, 100, 1e3, 1e4, 1e5])\n",
+    "sd   = np.array([0.5,0.9,0.99,0.999,0.9999])#,1e4,1e5,5e5])\n",
+    "moi  = np.array([0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.99])\n",
+    "\n",
+    "t = triskele.Triskele(layers_stack[:,:,:], verbose=False)\n",
+    "attributes = t.filter(tree='tos-tree',\n",
+    "                      area=area,\n",
+    "                      standard_deviation=sd,\n",
+    "                      moment_of_inertia=moi\n",
+    "                     )\n",
    "attributes.shape"
   ]
  },
  {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
   "metadata": {},
-   "outputs": [],
   "source": [
-    "layers_stack.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "test = libtiff.TIFF.open('../Res/test.tif', mode='w')\n",
-    "test.write_image(np.rollaxis(layers_stack, 2).astype(np.float32))\n",
-    "test = None"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "layers_stack.shape[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "test = libtiff.TIFF.open('../Res/test.tif', mode='w')\n",
-    "test.\n",
-    "#test = None\n",
-    "#test.write_tiles(layers_stack[:,:,0].astype(np.uint8), tile_width=layers_stack.shape[1], tile_height=layers_stack.shape[0])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.rollaxis(layers_stack, 2).shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tiff = libtiff.TIFFimage(np.rollaxis(layers_stack, 1), description='BDQ')\n",
-    "tiff.write_file('../Res/test.tif')\n",
-    "del tiff"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "test = libtiff.TIFF.open('../Res/test.tif')\n",
-    "for image in test.iter_images():\n",
-    "    print(image.shape)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "test = libtiff.TIFFimage(layers_stack)\n",
-    "test.write_file('../Res/test.tif')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for i in range(attributes.shape[2]):\n",
+    "for i in range(attributes.shape[2]-1):\n",
    "    plt.figure(figsize=(16*2,3*2))\n",
    "    plt.imshow(attributes[:,:,i])\n",
    "    plt.colorbar()\n",
-    "    plt.title(layers_files[i])\n",
+    "    plt.show()\n",
+    "    plt.figure(figsize=(16*2,3*2))\n",
+    "    plt.imshow(attributes[:,:,i+1].astype(np.float) - attributes[:,:,i])\n",
+    "    plt.colorbar()\n",
+    "    #plt.title(layers_files[i])\n",
+    "plt.show()\n",
+    "plt.figure(figsize=(16*2,3*2))\n",
+    "plt.imshow(attributes[:,:,-1])\n",
+    "plt.colorbar()\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Classification vectors"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = attributes.reshape(-1, attributes.shape[2])\n",
+    "\n",
+    "(attributes[0,0] == X[0]).all()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "labels_file = Path('../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif')\n",
+    "labels = triskele.read(labels_file)\n",
+    "display(labels.shape)\n",
+    "\n",
+    "plt.figure(figsize=(16*2,3*2))\n",
+    "plt.imshow(labels)\n",
+    "plt.colorbar()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Y = labels.reshape(-1)\n",
+    "\n",
+    "X.shape, Y.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Random Forest Classifier"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import importlib\n",
+    "from sklearn import metrics\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "import pickle\n",
+    "sys.path.insert(0, '..')\n",
+    "import CrossValidationGenerator as cvg"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "importlib.reload(cvg)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn import metrics\n",
+    "import pandas as pd\n",
+    "\n",
+    "\n",
+    "def scores(actual, prediction):\n",
+    "    ct = pd.crosstab(prediction, actual,\n",
+    "            rownames=['Prediction'], colnames=['Reference'],\n",
+    "            margins=True, margins_name='Total',\n",
+    "            normalize=False # all, index, columns\n",
+    "            )\n",
+    "    display(ct)\n",
+    "    \n",
+    "    scores = metrics.precision_recall_fscore_support(actual, prediction)\n",
+    "    print(metrics.classification_report(actual, prediction))    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cv_labels = np.zeros(labels[:].shape)\n",
+    "\n",
+    "for xtrain, xtest, ytrain, ytest, train_index in cvg.CVG(attributes[:], labels[:], 10, 1): \n",
+    "    rfc = RandomForestClassifier(n_jobs=-1, random_state=0, n_estimators=100, verbose=True)\n",
+    "    rfc.fit(xtrain, ytrain)\n",
+    "    \n",
+    "    ypred = rfc.predict(xtest)\n",
+    "    \n",
+    "    display(ytest.shape, ypred.shape)\n",
+    "    \n",
+    "    scores(ytest, ypred)\n",
+    "    \n",
+    "    cv_labels[:,train_index == False] = ypred.reshape(cv_labels.shape[0], -1)\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def show(im):\n",
+    "    plt.figure(figsize=(16*2,3*2))\n",
+    "    plt.imshow(im)\n",
+    "    plt.colorbar()\n",
    "    plt.show()"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show(labels)\n",
+    "show(cv_labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "labels.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.arange(238400).reshape(-1, 4768)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('../Res/classifier_0.pkl', 'wb') as f:\n",
+    "    pickle.dump(rfc, f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Yp = Y.copy()\n",
+    "\n",
+    "Yp[training == False] = rfc.predict(X[training == False])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(16*2,3*2))\n",
+    "plt.imshow(Y.reshape(labels.shape))\n",
+    "plt.colorbar()\n",
+    "plt.show()\n",
+    "\n",
+    "plt.figure(figsize=(16*2,3*2))\n",
+    "plt.imshow(Yp.reshape(labels.shape))\n",
+    "plt.colorbar()\n",
+    "plt.show()\n",
+    "\n",
+    "plt.figure(figsize=(16*2,3*2))\n",
+    "plt.imshow(Yp.reshape(labels.shape).astype(np.float) - labels)\n",
+    "plt.colorbar()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class cvg:\n",
+    "    def __init__(self, attributes, ground_truth, order_dim=0, n_test=2):      \n",
+    "        self._tests_left = n_test\n",
+    "        \n",
+    "        if attributes.shape[0] != ground_truth.shape[0] or \\\n",
+    "           attributes.shape[1] != ground_truth.shape[1] :\n",
+    "                raise ValueError('attributes and ground_truth must have the same 2D shape')\n",
+    "        \n",
+    "    def __iter__(self):\n",
+    "        return self\n",
+    "    \n",
+    "    def __next__(self):\n",
+    "        if self._tests_left == 0:\n",
+    "            raise StopIteration\n",
+    "        \n",
+    "        train_filter = np.arange(attributes.shape) < (Y.size * .50)\n",
+    "\n",
+    "        Xtrain = 42\n",
+    "        Xtest = 432\n",
+    "        Ytrain = 12\n",
+    "        Ytest = 123\n",
+    "        \n",
+    "        return (Xtrain, Xtest, Ytrain, Ytest)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cvg(attributes, labels[:,:-1])"
+   ]
  }
 ],
 "metadata": {