CVGenerator & Notebooks

This commit is contained in:
Florent Guiotte 2018-03-29 11:58:09 +02:00
parent c110ea1d27
commit 2ea561b4ba
3 changed files with 423 additions and 100 deletions

View File

@ -0,0 +1,106 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class cvg:\n",
" def __init__(self, attributes, ground_truth, n_test=2, order_dim=0):\n",
" self._order = order_dim\n",
" self._ntests = n_test\n",
" self._actual_ntest = 0\n",
" self._size = attributes.shape[order_dim]\n",
" self._att = attributes\n",
" self._gt = ground_truth\n",
" \n",
" if attributes.shape[0] != ground_truth.shape[0] or \\\n",
" attributes.shape[1] != ground_truth.shape[1] :\n",
" raise ValueError('attributes and ground_truth must have the same 2D shape')\n",
" \n",
" def __iter__(self):\n",
" return self\n",
" \n",
" def __next__(self):\n",
" if self._actual_ntest == self._ntests:\n",
" raise StopIteration\n",
" \n",
" step = self._size / self._ntests\n",
" train_filter = (np.arange(self._size) - step * self._actual_ntest) % self._size < step\n",
" \n",
" if self._order == 0:\n",
" Xtrain = self._att[train_filter].reshape(-1, self._att.shape[2])\n",
" Xtest = self._att[train_filter == False].reshape(-1, self._att.shape[2])\n",
" Ytrain = self._gt[train_filter].reshape(-1, 1)\n",
" Ytest = self._gt[train_filter == False].reshape(-1, 1)\n",
" else:\n",
" Xtrain = self._att[:,train_filter].reshape(-1, self._att.shape[2])\n",
" Xtest = self._att[:,train_filter == False].reshape(-1, self._att.shape[2])\n",
" Ytrain = self._gt[:,train_filter].reshape(-1, 1)\n",
" Ytest = self._gt[:,train_filter == False].reshape(-1, 1)\n",
"\n",
" \n",
" self._actual_ntest += 1\n",
" \n",
" return (Xtrain, Xtest, Ytrain, Ytest, train_filter)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"X = np.arange(100*200*10).reshape(100, 200, 10)\n",
"Y = np.arange(100 * 200).reshape(100, 200)\n",
"\n",
"for xn, xt, yn, yt, t in cvg(X, Y, 10, 1):\n",
" disp = np.zeros(Y.shape)\n",
" disp[:,t] = 1.\n",
" plt.imshow(disp)\n",
" plt.show()\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from CrossValidationGenerator import CVG"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -0,0 +1,50 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# \file CrossValidationGenerator.py
# \brief TODO
# \author Florent Guiotte <florent.guiotte@gmail.com>
# \version 0.1
# \date 28 Mar 2018
#
# TODO details
import numpy as np
class CVG:
def __init__(self, attributes, ground_truth, n_test=2, order_dim=0):
self._order = order_dim
self._ntests = n_test
self._actual_ntest = 0
self._size = attributes.shape[order_dim]
self._att = attributes
self._gt = ground_truth
if attributes.shape[0] != ground_truth.shape[0] or \
attributes.shape[1] != ground_truth.shape[1] :
raise ValueError('attributes and ground_truth must have the same 2D shape')
def __iter__(self):
return self
def __next__(self):
if self._actual_ntest == self._ntests:
raise StopIteration
step = self._size / self._ntests
train_filter = (np.arange(self._size) - step * self._actual_ntest) % self._size < step
if self._order == 0:
Xtrain = self._att[train_filter].reshape(-1, self._att.shape[2])
Xtest = self._att[train_filter == False].reshape(-1, self._att.shape[2])
Ytrain = self._gt[train_filter].reshape(-1)
Ytest = self._gt[train_filter == False].reshape(-1)
else:
Xtrain = self._att[:,train_filter].reshape(-1, self._att.shape[2])
Xtest = self._att[:,train_filter == False].reshape(-1, self._att.shape[2])
Ytrain = self._gt[:,train_filter].reshape(-1)
Ytest = self._gt[:,train_filter == False].reshape(-1)
self._actual_ntest += 1
return (Xtrain, Xtest, Ytrain, Ytest, train_filter)

View File

@ -9,7 +9,6 @@
"import sys\n",
"from pathlib import Path\n",
"import numpy as np\n",
"import libtiff\n",
"import matplotlib.pyplot as plt\n",
"\n",
"triskele_path = Path('../triskele/python/')\n",
@ -37,7 +36,8 @@
" '../Data/phase1_rasters/DSM_C12/UH17c_GEF051_TR.tif',\n",
" '../Data/phase1_rasters/Intensity_C1/UH17_GI1F051_TR.tif',\n",
" '../Data/phase1_rasters/Intensity_C2/UH17_GI2F051_TR.tif',\n",
" '../Data/phase1_rasters/Intensity_C3/UH17_GI3F051_TR.tif'\n",
" '../Data/phase1_rasters/Intensity_C3/UH17_GI3F051_TR.tif',\n",
" #'../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif'\n",
"]"
]
},
@ -78,7 +78,7 @@
"\n",
"for file in layers_files:\n",
" print('Loading {}'.format(file))\n",
" layer = libtiff.TIFF.open(file).read_image()\n",
" layer = triskele.read(file)\n",
" DFC_filter(layer)\n",
" layers.append(layer)\n",
"\n",
@ -119,114 +119,281 @@
"metadata": {},
"outputs": [],
"source": [
"t = triskele.Triskele(layers_stack[0], verbose=False)\n",
"attributes = t.filter()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"area = np.array([10, 100, 1e3, 1e4, 1e5])\n",
"sd = np.array([0.5,0.9,0.99,0.999,0.9999])#,1e4,1e5,5e5])\n",
"moi = np.array([0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.99])\n",
"\n",
"t = triskele.Triskele(layers_stack[:,:,:], verbose=False)\n",
"attributes = t.filter(tree='tos-tree',\n",
" area=area,\n",
" standard_deviation=sd,\n",
" moment_of_inertia=moi\n",
" )\n",
"attributes.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"cell_type": "markdown",
"metadata": {},
"outputs": [],
"source": [
"layers_stack.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test = libtiff.TIFF.open('../Res/test.tif', mode='w')\n",
"test.write_image(np.rollaxis(layers_stack, 2).astype(np.float32))\n",
"test = None"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"layers_stack.shape[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test = libtiff.TIFF.open('../Res/test.tif', mode='w')\n",
"test.\n",
"#test = None\n",
"#test.write_tiles(layers_stack[:,:,0].astype(np.uint8), tile_width=layers_stack.shape[1], tile_height=layers_stack.shape[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"np.rollaxis(layers_stack, 2).shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tiff = libtiff.TIFFimage(np.rollaxis(layers_stack, 1), description='BDQ')\n",
"tiff.write_file('../Res/test.tif')\n",
"del tiff"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test = libtiff.TIFF.open('../Res/test.tif')\n",
"for image in test.iter_images():\n",
" print(image.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test = libtiff.TIFFimage(layers_stack)\n",
"test.write_file('../Res/test.tif')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for i in range(attributes.shape[2]):\n",
"for i in range(attributes.shape[2]-1):\n",
" plt.figure(figsize=(16*2,3*2))\n",
" plt.imshow(attributes[:,:,i])\n",
" plt.colorbar()\n",
" plt.title(layers_files[i])\n",
" plt.show()\n",
" plt.figure(figsize=(16*2,3*2))\n",
" plt.imshow(attributes[:,:,i+1].astype(np.float) - attributes[:,:,i])\n",
" plt.colorbar()\n",
" #plt.title(layers_files[i])\n",
"plt.show()\n",
"plt.figure(figsize=(16*2,3*2))\n",
"plt.imshow(attributes[:,:,-1])\n",
"plt.colorbar()\n",
"plt.show()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Classification vectors"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"X = attributes.reshape(-1, attributes.shape[2])\n",
"\n",
"(attributes[0,0] == X[0]).all()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"labels_file = Path('../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif')\n",
"labels = triskele.read(labels_file)\n",
"display(labels.shape)\n",
"\n",
"plt.figure(figsize=(16*2,3*2))\n",
"plt.imshow(labels)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"Y = labels.reshape(-1)\n",
"\n",
"X.shape, Y.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Random Forest Classifier"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import importlib\n",
"from sklearn import metrics\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"import pickle\n",
"sys.path.insert(0, '..')\n",
"import CrossValidationGenerator as cvg"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"importlib.reload(cvg)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn import metrics\n",
"import pandas as pd\n",
"\n",
"\n",
"def scores(actual, prediction):\n",
" ct = pd.crosstab(prediction, actual,\n",
" rownames=['Prediction'], colnames=['Reference'],\n",
" margins=True, margins_name='Total',\n",
" normalize=False # all, index, columns\n",
" )\n",
" display(ct)\n",
" \n",
" scores = metrics.precision_recall_fscore_support(actual, prediction)\n",
" print(metrics.classification_report(actual, prediction)) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cv_labels = np.zeros(labels[:].shape)\n",
"\n",
"for xtrain, xtest, ytrain, ytest, train_index in cvg.CVG(attributes[:], labels[:], 10, 1): \n",
" rfc = RandomForestClassifier(n_jobs=-1, random_state=0, n_estimators=100, verbose=True)\n",
" rfc.fit(xtrain, ytrain)\n",
" \n",
" ypred = rfc.predict(xtest)\n",
" \n",
" display(ytest.shape, ypred.shape)\n",
" \n",
" scores(ytest, ypred)\n",
" \n",
" cv_labels[:,train_index == False] = ypred.reshape(cv_labels.shape[0], -1)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def show(im):\n",
" plt.figure(figsize=(16*2,3*2))\n",
" plt.imshow(im)\n",
" plt.colorbar()\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"show(labels)\n",
"show(cv_labels)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"labels.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"np.arange(238400).reshape(-1, 4768)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('../Res/classifier_0.pkl', 'wb') as f:\n",
" pickle.dump(rfc, f)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"Yp = Y.copy()\n",
"\n",
"Yp[training == False] = rfc.predict(X[training == False])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(16*2,3*2))\n",
"plt.imshow(Y.reshape(labels.shape))\n",
"plt.colorbar()\n",
"plt.show()\n",
"\n",
"plt.figure(figsize=(16*2,3*2))\n",
"plt.imshow(Yp.reshape(labels.shape))\n",
"plt.colorbar()\n",
"plt.show()\n",
"\n",
"plt.figure(figsize=(16*2,3*2))\n",
"plt.imshow(Yp.reshape(labels.shape).astype(np.float) - labels)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class cvg:\n",
" def __init__(self, attributes, ground_truth, order_dim=0, n_test=2): \n",
" self._tests_left = n_test\n",
" \n",
" if attributes.shape[0] != ground_truth.shape[0] or \\\n",
" attributes.shape[1] != ground_truth.shape[1] :\n",
" raise ValueError('attributes and ground_truth must have the same 2D shape')\n",
" \n",
" def __iter__(self):\n",
" return self\n",
" \n",
" def __next__(self):\n",
" if self._tests_left == 0:\n",
" raise StopIteration\n",
" \n",
" train_filter = np.arange(attributes.shape) < (Y.size * .50)\n",
"\n",
" Xtrain = 42\n",
" Xtest = 432\n",
" Ytrain = 12\n",
" Ytest = 123\n",
" \n",
" return (Xtrain, Xtest, Ytrain, Ytest)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cvg(attributes, labels[:,:-1])"
]
}
],
"metadata": {