CVGenerator & Notebooks

2018-03-29 11:58:09 +02:00 · 2018-03-29 11:58:09 +02:00 · 2ea561b4ba
commit 2ea561b4ba
parent c110ea1d27
3 changed files with 423 additions and 100 deletions
--- a/Generator.ipynb
+++ b/Generator.ipynb
@ -0,0 +1,106 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class cvg:\n",
    "    def __init__(self, attributes, ground_truth, n_test=2, order_dim=0):\n",
    "        self._order        = order_dim\n",
    "        self._ntests       = n_test\n",
    "        self._actual_ntest = 0\n",
    "        self._size         = attributes.shape[order_dim]\n",
    "        self._att          = attributes\n",
    "        self._gt           = ground_truth\n",
    "        \n",
    "        if attributes.shape[0] != ground_truth.shape[0] or \\\n",
    "           attributes.shape[1] != ground_truth.shape[1] :\n",
    "                raise ValueError('attributes and ground_truth must have the same 2D shape')\n",
    "        \n",
    "    def __iter__(self):\n",
    "        return self\n",
    "    \n",
    "    def __next__(self):\n",
    "        if self._actual_ntest == self._ntests:\n",
    "            raise StopIteration\n",
    "        \n",
    "        step = self._size / self._ntests\n",
    "        train_filter = (np.arange(self._size) - step * self._actual_ntest) % self._size < step\n",
    "        \n",
    "        if self._order == 0:\n",
    "            Xtrain = self._att[train_filter].reshape(-1, self._att.shape[2])\n",
    "            Xtest  = self._att[train_filter == False].reshape(-1, self._att.shape[2])\n",
    "            Ytrain = self._gt[train_filter].reshape(-1, 1)\n",
    "            Ytest  = self._gt[train_filter == False].reshape(-1, 1)\n",
    "        else:\n",
    "            Xtrain = self._att[:,train_filter].reshape(-1, self._att.shape[2])\n",
    "            Xtest  = self._att[:,train_filter == False].reshape(-1, self._att.shape[2])\n",
    "            Ytrain = self._gt[:,train_filter].reshape(-1, 1)\n",
    "            Ytest  = self._gt[:,train_filter == False].reshape(-1, 1)\n",
    "\n",
    "        \n",
    "        self._actual_ntest += 1\n",
    "        \n",
    "        return (Xtrain, Xtest, Ytrain, Ytest, train_filter)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = np.arange(100*200*10).reshape(100, 200, 10)\n",
    "Y = np.arange(100 * 200).reshape(100, 200)\n",
    "\n",
    "for xn, xt, yn, yt, t in cvg(X, Y, 10, 1):\n",
    "        disp = np.zeros(Y.shape)\n",
    "        disp[:,t] = 1.\n",
    "        plt.imshow(disp)\n",
    "        plt.show()\n",
    "        \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from CrossValidationGenerator import CVG"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/CrossValidationGenerator.py
+++ b/CrossValidationGenerator.py
@ -0,0 +1,50 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 # \file CrossValidationGenerator.py
 # \brief TODO
 # \author Florent Guiotte <florent.guiotte@gmail.com>
 # \version 0.1
 # \date 28 Mar 2018
 #
 # TODO details
 import numpy as np
 class CVG:
    def __init__(self, attributes, ground_truth, n_test=2, order_dim=0):
        self._order        = order_dim
        self._ntests       = n_test
        self._actual_ntest = 0
        self._size         = attributes.shape[order_dim]
        self._att          = attributes
        self._gt           = ground_truth
        if attributes.shape[0] != ground_truth.shape[0] or \
           attributes.shape[1] != ground_truth.shape[1] :
                raise ValueError('attributes and ground_truth must have the same 2D shape')
    def __iter__(self):
        return self
    def __next__(self):
        if self._actual_ntest == self._ntests:
            raise StopIteration
        step = self._size / self._ntests
        train_filter = (np.arange(self._size) - step * self._actual_ntest) % self._size < step
        if self._order == 0:
            Xtrain = self._att[train_filter].reshape(-1, self._att.shape[2])
            Xtest  = self._att[train_filter == False].reshape(-1, self._att.shape[2])
            Ytrain = self._gt[train_filter].reshape(-1)
            Ytest  = self._gt[train_filter == False].reshape(-1)
        else:
            Xtrain = self._att[:,train_filter].reshape(-1, self._att.shape[2])
            Xtest  = self._att[:,train_filter == False].reshape(-1, self._att.shape[2])
            Ytrain = self._gt[:,train_filter].reshape(-1)
            Ytest  = self._gt[:,train_filter == False].reshape(-1)
        self._actual_ntest += 1
        return (Xtrain, Xtest, Ytrain, Ytest, train_filter)
--- a/Notebooks/Attribute
+++ b/Notebooks/Attribute
@ -9,7 +9,6 @@
    "import sys\n",
    "from pathlib import Path\n",
    "import numpy as np\n",
    "import libtiff\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "triskele_path = Path('../triskele/python/')\n",
@ -37,7 +36,8 @@
    "    '../Data/phase1_rasters/DSM_C12/UH17c_GEF051_TR.tif',\n",
    "    '../Data/phase1_rasters/Intensity_C1/UH17_GI1F051_TR.tif',\n",
    "    '../Data/phase1_rasters/Intensity_C2/UH17_GI2F051_TR.tif',\n",
-    "    '../Data/phase1_rasters/Intensity_C3/UH17_GI3F051_TR.tif'\n",
+    "    '../Data/phase1_rasters/Intensity_C3/UH17_GI3F051_TR.tif',\n",
    "    #'../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif'\n",
    "]"
   ]
  },
@ -78,7 +78,7 @@
    "\n",
    "for file in layers_files:\n",
    "    print('Loading {}'.format(file))\n",
-    "    layer  = libtiff.TIFF.open(file).read_image()\n",
+    "    layer = triskele.read(file)\n",
    "    DFC_filter(layer)\n",
    "    layers.append(layer)\n",
    "\n",
@ -119,114 +119,281 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "t = triskele.Triskele(layers_stack[0], verbose=False)\n",
+    "area = np.array([10, 100, 1e3, 1e4, 1e5])\n",
-    "attributes = t.filter()"
+    "sd   = np.array([0.5,0.9,0.99,0.999,0.9999])#,1e4,1e5,5e5])\n",
-   ]
+    "moi  = np.array([0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.99])\n",
-  },
+    "\n",
-  {
+    "t = triskele.Triskele(layers_stack[:,:,:], verbose=False)\n",
-   "cell_type": "code",
+    "attributes = t.filter(tree='tos-tree',\n",
-   "execution_count": null,
+    "                      area=area,\n",
-   "metadata": {},
+    "                      standard_deviation=sd,\n",
-   "outputs": [],
+    "                      moment_of_inertia=moi\n",
-   "source": [
+    "                     )\n",
    "attributes.shape"
   ]
  },
  {
-   "cell_type": "code",
+   "cell_type": "markdown",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
-    "layers_stack.shape"
+    "for i in range(attributes.shape[2]-1):\n",
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test = libtiff.TIFF.open('../Res/test.tif', mode='w')\n",
    "test.write_image(np.rollaxis(layers_stack, 2).astype(np.float32))\n",
    "test = None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "layers_stack.shape[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test = libtiff.TIFF.open('../Res/test.tif', mode='w')\n",
    "test.\n",
    "#test = None\n",
    "#test.write_tiles(layers_stack[:,:,0].astype(np.uint8), tile_width=layers_stack.shape[1], tile_height=layers_stack.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.rollaxis(layers_stack, 2).shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tiff = libtiff.TIFFimage(np.rollaxis(layers_stack, 1), description='BDQ')\n",
    "tiff.write_file('../Res/test.tif')\n",
    "del tiff"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test = libtiff.TIFF.open('../Res/test.tif')\n",
    "for image in test.iter_images():\n",
    "    print(image.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test = libtiff.TIFFimage(layers_stack)\n",
    "test.write_file('../Res/test.tif')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(attributes.shape[2]):\n",
    "    plt.figure(figsize=(16*2,3*2))\n",
    "    plt.imshow(attributes[:,:,i])\n",
    "    plt.colorbar()\n",
-    "    plt.title(layers_files[i])\n",
+    "    plt.show()\n",
    "    plt.figure(figsize=(16*2,3*2))\n",
    "    plt.imshow(attributes[:,:,i+1].astype(np.float) - attributes[:,:,i])\n",
    "    plt.colorbar()\n",
    "    #plt.title(layers_files[i])\n",
    "plt.show()\n",
    "plt.figure(figsize=(16*2,3*2))\n",
    "plt.imshow(attributes[:,:,-1])\n",
    "plt.colorbar()\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Classification vectors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = attributes.reshape(-1, attributes.shape[2])\n",
    "\n",
    "(attributes[0,0] == X[0]).all()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "labels_file = Path('../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif')\n",
    "labels = triskele.read(labels_file)\n",
    "display(labels.shape)\n",
    "\n",
    "plt.figure(figsize=(16*2,3*2))\n",
    "plt.imshow(labels)\n",
    "plt.colorbar()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Y = labels.reshape(-1)\n",
    "\n",
    "X.shape, Y.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Random Forest Classifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import importlib\n",
    "from sklearn import metrics\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "import pickle\n",
    "sys.path.insert(0, '..')\n",
    "import CrossValidationGenerator as cvg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "importlib.reload(cvg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import metrics\n",
    "import pandas as pd\n",
    "\n",
    "\n",
    "def scores(actual, prediction):\n",
    "    ct = pd.crosstab(prediction, actual,\n",
    "            rownames=['Prediction'], colnames=['Reference'],\n",
    "            margins=True, margins_name='Total',\n",
    "            normalize=False # all, index, columns\n",
    "            )\n",
    "    display(ct)\n",
    "    \n",
    "    scores = metrics.precision_recall_fscore_support(actual, prediction)\n",
    "    print(metrics.classification_report(actual, prediction))    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cv_labels = np.zeros(labels[:].shape)\n",
    "\n",
    "for xtrain, xtest, ytrain, ytest, train_index in cvg.CVG(attributes[:], labels[:], 10, 1): \n",
    "    rfc = RandomForestClassifier(n_jobs=-1, random_state=0, n_estimators=100, verbose=True)\n",
    "    rfc.fit(xtrain, ytrain)\n",
    "    \n",
    "    ypred = rfc.predict(xtest)\n",
    "    \n",
    "    display(ytest.shape, ypred.shape)\n",
    "    \n",
    "    scores(ytest, ypred)\n",
    "    \n",
    "    cv_labels[:,train_index == False] = ypred.reshape(cv_labels.shape[0], -1)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def show(im):\n",
    "    plt.figure(figsize=(16*2,3*2))\n",
    "    plt.imshow(im)\n",
    "    plt.colorbar()\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "show(labels)\n",
    "show(cv_labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "labels.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.arange(238400).reshape(-1, 4768)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('../Res/classifier_0.pkl', 'wb') as f:\n",
    "    pickle.dump(rfc, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Yp = Y.copy()\n",
    "\n",
    "Yp[training == False] = rfc.predict(X[training == False])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(16*2,3*2))\n",
    "plt.imshow(Y.reshape(labels.shape))\n",
    "plt.colorbar()\n",
    "plt.show()\n",
    "\n",
    "plt.figure(figsize=(16*2,3*2))\n",
    "plt.imshow(Yp.reshape(labels.shape))\n",
    "plt.colorbar()\n",
    "plt.show()\n",
    "\n",
    "plt.figure(figsize=(16*2,3*2))\n",
    "plt.imshow(Yp.reshape(labels.shape).astype(np.float) - labels)\n",
    "plt.colorbar()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class cvg:\n",
    "    def __init__(self, attributes, ground_truth, order_dim=0, n_test=2):      \n",
    "        self._tests_left = n_test\n",
    "        \n",
    "        if attributes.shape[0] != ground_truth.shape[0] or \\\n",
    "           attributes.shape[1] != ground_truth.shape[1] :\n",
    "                raise ValueError('attributes and ground_truth must have the same 2D shape')\n",
    "        \n",
    "    def __iter__(self):\n",
    "        return self\n",
    "    \n",
    "    def __next__(self):\n",
    "        if self._tests_left == 0:\n",
    "            raise StopIteration\n",
    "        \n",
    "        train_filter = np.arange(attributes.shape) < (Y.size * .50)\n",
    "\n",
    "        Xtrain = 42\n",
    "        Xtest = 432\n",
    "        Ytrain = 12\n",
    "        Ytest = 123\n",
    "        \n",
    "        return (Xtrain, Xtest, Ytrain, Ytest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cvg(attributes, labels[:,:-1])"
   ]
  }
 ],
 "metadata": {