CVGenerator & Notebooks

This commit is contained in:
Florent Guiotte 2018-03-29 11:58:09 +02:00
parent c110ea1d27
commit 2ea561b4ba
3 changed files with 423 additions and 100 deletions

View File

@ -0,0 +1,106 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class cvg:\n",
" def __init__(self, attributes, ground_truth, n_test=2, order_dim=0):\n",
" self._order = order_dim\n",
" self._ntests = n_test\n",
" self._actual_ntest = 0\n",
" self._size = attributes.shape[order_dim]\n",
" self._att = attributes\n",
" self._gt = ground_truth\n",
" \n",
" if attributes.shape[0] != ground_truth.shape[0] or \\\n",
" attributes.shape[1] != ground_truth.shape[1] :\n",
" raise ValueError('attributes and ground_truth must have the same 2D shape')\n",
" \n",
" def __iter__(self):\n",
" return self\n",
" \n",
" def __next__(self):\n",
" if self._actual_ntest == self._ntests:\n",
" raise StopIteration\n",
" \n",
" step = self._size / self._ntests\n",
" train_filter = (np.arange(self._size) - step * self._actual_ntest) % self._size < step\n",
" \n",
" if self._order == 0:\n",
" Xtrain = self._att[train_filter].reshape(-1, self._att.shape[2])\n",
" Xtest = self._att[train_filter == False].reshape(-1, self._att.shape[2])\n",
" Ytrain = self._gt[train_filter].reshape(-1, 1)\n",
" Ytest = self._gt[train_filter == False].reshape(-1, 1)\n",
" else:\n",
" Xtrain = self._att[:,train_filter].reshape(-1, self._att.shape[2])\n",
" Xtest = self._att[:,train_filter == False].reshape(-1, self._att.shape[2])\n",
" Ytrain = self._gt[:,train_filter].reshape(-1, 1)\n",
" Ytest = self._gt[:,train_filter == False].reshape(-1, 1)\n",
"\n",
" \n",
" self._actual_ntest += 1\n",
" \n",
" return (Xtrain, Xtest, Ytrain, Ytest, train_filter)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"X = np.arange(100*200*10).reshape(100, 200, 10)\n",
"Y = np.arange(100 * 200).reshape(100, 200)\n",
"\n",
"for xn, xt, yn, yt, t in cvg(X, Y, 10, 1):\n",
" disp = np.zeros(Y.shape)\n",
" disp[:,t] = 1.\n",
" plt.imshow(disp)\n",
" plt.show()\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from CrossValidationGenerator import CVG"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -0,0 +1,50 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# \file CrossValidationGenerator.py
# \brief TODO
# \author Florent Guiotte <florent.guiotte@gmail.com>
# \version 0.1
# \date 28 Mar 2018
#
# TODO details
import numpy as np
class CVG:
def __init__(self, attributes, ground_truth, n_test=2, order_dim=0):
self._order = order_dim
self._ntests = n_test
self._actual_ntest = 0
self._size = attributes.shape[order_dim]
self._att = attributes
self._gt = ground_truth
if attributes.shape[0] != ground_truth.shape[0] or \
attributes.shape[1] != ground_truth.shape[1] :
raise ValueError('attributes and ground_truth must have the same 2D shape')
def __iter__(self):
return self
def __next__(self):
if self._actual_ntest == self._ntests:
raise StopIteration
step = self._size / self._ntests
train_filter = (np.arange(self._size) - step * self._actual_ntest) % self._size < step
if self._order == 0:
Xtrain = self._att[train_filter].reshape(-1, self._att.shape[2])
Xtest = self._att[train_filter == False].reshape(-1, self._att.shape[2])
Ytrain = self._gt[train_filter].reshape(-1)
Ytest = self._gt[train_filter == False].reshape(-1)
else:
Xtrain = self._att[:,train_filter].reshape(-1, self._att.shape[2])
Xtest = self._att[:,train_filter == False].reshape(-1, self._att.shape[2])
Ytrain = self._gt[:,train_filter].reshape(-1)
Ytest = self._gt[:,train_filter == False].reshape(-1)
self._actual_ntest += 1
return (Xtrain, Xtest, Ytrain, Ytest, train_filter)

View File

@ -9,7 +9,6 @@
"import sys\n", "import sys\n",
"from pathlib import Path\n", "from pathlib import Path\n",
"import numpy as np\n", "import numpy as np\n",
"import libtiff\n",
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
"\n", "\n",
"triskele_path = Path('../triskele/python/')\n", "triskele_path = Path('../triskele/python/')\n",
@ -37,7 +36,8 @@
" '../Data/phase1_rasters/DSM_C12/UH17c_GEF051_TR.tif',\n", " '../Data/phase1_rasters/DSM_C12/UH17c_GEF051_TR.tif',\n",
" '../Data/phase1_rasters/Intensity_C1/UH17_GI1F051_TR.tif',\n", " '../Data/phase1_rasters/Intensity_C1/UH17_GI1F051_TR.tif',\n",
" '../Data/phase1_rasters/Intensity_C2/UH17_GI2F051_TR.tif',\n", " '../Data/phase1_rasters/Intensity_C2/UH17_GI2F051_TR.tif',\n",
" '../Data/phase1_rasters/Intensity_C3/UH17_GI3F051_TR.tif'\n", " '../Data/phase1_rasters/Intensity_C3/UH17_GI3F051_TR.tif',\n",
" #'../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif'\n",
"]" "]"
] ]
}, },
@ -78,7 +78,7 @@
"\n", "\n",
"for file in layers_files:\n", "for file in layers_files:\n",
" print('Loading {}'.format(file))\n", " print('Loading {}'.format(file))\n",
" layer = libtiff.TIFF.open(file).read_image()\n", " layer = triskele.read(file)\n",
" DFC_filter(layer)\n", " DFC_filter(layer)\n",
" layers.append(layer)\n", " layers.append(layer)\n",
"\n", "\n",
@ -119,114 +119,281 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"t = triskele.Triskele(layers_stack[0], verbose=False)\n", "area = np.array([10, 100, 1e3, 1e4, 1e5])\n",
"attributes = t.filter()" "sd = np.array([0.5,0.9,0.99,0.999,0.9999])#,1e4,1e5,5e5])\n",
] "moi = np.array([0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.99])\n",
}, "\n",
{ "t = triskele.Triskele(layers_stack[:,:,:], verbose=False)\n",
"cell_type": "code", "attributes = t.filter(tree='tos-tree',\n",
"execution_count": null, " area=area,\n",
"metadata": {}, " standard_deviation=sd,\n",
"outputs": [], " moment_of_inertia=moi\n",
"source": [ " )\n",
"attributes.shape" "attributes.shape"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "markdown",
"execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [],
"source": [ "source": [
"layers_stack.shape" "for i in range(attributes.shape[2]-1):\n",
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test = libtiff.TIFF.open('../Res/test.tif', mode='w')\n",
"test.write_image(np.rollaxis(layers_stack, 2).astype(np.float32))\n",
"test = None"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"layers_stack.shape[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test = libtiff.TIFF.open('../Res/test.tif', mode='w')\n",
"test.\n",
"#test = None\n",
"#test.write_tiles(layers_stack[:,:,0].astype(np.uint8), tile_width=layers_stack.shape[1], tile_height=layers_stack.shape[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"np.rollaxis(layers_stack, 2).shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tiff = libtiff.TIFFimage(np.rollaxis(layers_stack, 1), description='BDQ')\n",
"tiff.write_file('../Res/test.tif')\n",
"del tiff"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test = libtiff.TIFF.open('../Res/test.tif')\n",
"for image in test.iter_images():\n",
" print(image.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test = libtiff.TIFFimage(layers_stack)\n",
"test.write_file('../Res/test.tif')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for i in range(attributes.shape[2]):\n",
" plt.figure(figsize=(16*2,3*2))\n", " plt.figure(figsize=(16*2,3*2))\n",
" plt.imshow(attributes[:,:,i])\n", " plt.imshow(attributes[:,:,i])\n",
" plt.colorbar()\n", " plt.colorbar()\n",
" plt.title(layers_files[i])\n", " plt.show()\n",
" plt.figure(figsize=(16*2,3*2))\n",
" plt.imshow(attributes[:,:,i+1].astype(np.float) - attributes[:,:,i])\n",
" plt.colorbar()\n",
" #plt.title(layers_files[i])\n",
"plt.show()\n",
"plt.figure(figsize=(16*2,3*2))\n",
"plt.imshow(attributes[:,:,-1])\n",
"plt.colorbar()\n",
"plt.show()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Classification vectors"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"X = attributes.reshape(-1, attributes.shape[2])\n",
"\n",
"(attributes[0,0] == X[0]).all()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"labels_file = Path('../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif')\n",
"labels = triskele.read(labels_file)\n",
"display(labels.shape)\n",
"\n",
"plt.figure(figsize=(16*2,3*2))\n",
"plt.imshow(labels)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"Y = labels.reshape(-1)\n",
"\n",
"X.shape, Y.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Random Forest Classifier"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import importlib\n",
"from sklearn import metrics\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"import pickle\n",
"sys.path.insert(0, '..')\n",
"import CrossValidationGenerator as cvg"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"importlib.reload(cvg)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn import metrics\n",
"import pandas as pd\n",
"\n",
"\n",
"def scores(actual, prediction):\n",
" ct = pd.crosstab(prediction, actual,\n",
" rownames=['Prediction'], colnames=['Reference'],\n",
" margins=True, margins_name='Total',\n",
" normalize=False # all, index, columns\n",
" )\n",
" display(ct)\n",
" \n",
" scores = metrics.precision_recall_fscore_support(actual, prediction)\n",
" print(metrics.classification_report(actual, prediction)) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cv_labels = np.zeros(labels[:].shape)\n",
"\n",
"for xtrain, xtest, ytrain, ytest, train_index in cvg.CVG(attributes[:], labels[:], 10, 1): \n",
" rfc = RandomForestClassifier(n_jobs=-1, random_state=0, n_estimators=100, verbose=True)\n",
" rfc.fit(xtrain, ytrain)\n",
" \n",
" ypred = rfc.predict(xtest)\n",
" \n",
" display(ytest.shape, ypred.shape)\n",
" \n",
" scores(ytest, ypred)\n",
" \n",
" cv_labels[:,train_index == False] = ypred.reshape(cv_labels.shape[0], -1)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def show(im):\n",
" plt.figure(figsize=(16*2,3*2))\n",
" plt.imshow(im)\n",
" plt.colorbar()\n",
" plt.show()" " plt.show()"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"show(labels)\n",
"show(cv_labels)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"labels.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"np.arange(238400).reshape(-1, 4768)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('../Res/classifier_0.pkl', 'wb') as f:\n",
" pickle.dump(rfc, f)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"Yp = Y.copy()\n",
"\n",
"Yp[training == False] = rfc.predict(X[training == False])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(16*2,3*2))\n",
"plt.imshow(Y.reshape(labels.shape))\n",
"plt.colorbar()\n",
"plt.show()\n",
"\n",
"plt.figure(figsize=(16*2,3*2))\n",
"plt.imshow(Yp.reshape(labels.shape))\n",
"plt.colorbar()\n",
"plt.show()\n",
"\n",
"plt.figure(figsize=(16*2,3*2))\n",
"plt.imshow(Yp.reshape(labels.shape).astype(np.float) - labels)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class cvg:\n",
" def __init__(self, attributes, ground_truth, order_dim=0, n_test=2): \n",
" self._tests_left = n_test\n",
" \n",
" if attributes.shape[0] != ground_truth.shape[0] or \\\n",
" attributes.shape[1] != ground_truth.shape[1] :\n",
" raise ValueError('attributes and ground_truth must have the same 2D shape')\n",
" \n",
" def __iter__(self):\n",
" return self\n",
" \n",
" def __next__(self):\n",
" if self._tests_left == 0:\n",
" raise StopIteration\n",
" \n",
" train_filter = np.arange(attributes.shape) < (Y.size * .50)\n",
"\n",
" Xtrain = 42\n",
" Xtest = 432\n",
" Ytrain = 12\n",
" Ytest = 123\n",
" \n",
" return (Xtrain, Xtest, Ytrain, Ytest)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cvg(attributes, labels[:,:-1])"
]
} }
], ],
"metadata": { "metadata": {