ld2daps/Notebooks/Attribute Profiles Classifier.ipynb

420 lines
10 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"from pathlib import Path\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"triskele_path = Path('../triskele/python/')\n",
"sys.path.append(str(triskele_path.resolve()))\n",
"import triskele"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## List raster files"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"layers_files = [\n",
" '../Data/phase1_rasters/DEM+B_C123/UH17_GEM051_TR.tif',\n",
" '../Data/phase1_rasters/DEM_C123_3msr/UH17_GEG051_TR.tif',\n",
" '../Data/phase1_rasters/DEM_C123_TLI/UH17_GEG05_TR.tif',\n",
" '../Data/phase1_rasters/DSM_C12/UH17c_GEF051_TR.tif',\n",
" '../Data/phase1_rasters/Intensity_C1/UH17_GI1F051_TR.tif',\n",
" '../Data/phase1_rasters/Intensity_C2/UH17_GI2F051_TR.tif',\n",
" '../Data/phase1_rasters/Intensity_C3/UH17_GI3F051_TR.tif',\n",
" #'../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif'\n",
"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Define dataset dependent raster filtering"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def DFC_filter(raster):\n",
" ## Remove extrem values\n",
" #raster[raster == raster.max()] = raster[raster != raster.max()].max()\n",
" raster[raster > 1e4] = raster[raster < 1e4].max()\n",
" #raster[raster == np.finfo(raster.dtype).max] = raster[raster != raster.max()].max()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load rasters data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"layers = list()\n",
"\n",
"for file in layers_files:\n",
" print('Loading {}'.format(file))\n",
" layer = triskele.read(file)\n",
" DFC_filter(layer)\n",
" layers.append(layer)\n",
"\n",
"layers_stack = np.stack(layers, axis=2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Display rasters"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for i in range(layers_stack.shape[2]):\n",
" plt.figure(figsize=(16*2,3*2))\n",
" plt.imshow(layers_stack[:,:,i])\n",
" plt.colorbar()\n",
" plt.title(layers_files[i])\n",
" plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Attributes filter with TRISKELE !"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"area = np.array([10, 100, 1e3, 1e4, 1e5])\n",
"sd = np.array([0.5,0.9,0.99,0.999,0.9999])#,1e4,1e5,5e5])\n",
"moi = np.array([0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.99])\n",
"\n",
"t = triskele.Triskele(layers_stack[:,:,:], verbose=False)\n",
"attributes = t.filter(tree='tos-tree',\n",
" area=area,\n",
" standard_deviation=sd,\n",
" moment_of_inertia=moi\n",
" )\n",
"attributes.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"for i in range(attributes.shape[2]-1):\n",
" plt.figure(figsize=(16*2,3*2))\n",
" plt.imshow(attributes[:,:,i])\n",
" plt.colorbar()\n",
" plt.show()\n",
" plt.figure(figsize=(16*2,3*2))\n",
" plt.imshow(attributes[:,:,i+1].astype(np.float) - attributes[:,:,i])\n",
" plt.colorbar()\n",
" #plt.title(layers_files[i])\n",
"plt.show()\n",
"plt.figure(figsize=(16*2,3*2))\n",
"plt.imshow(attributes[:,:,-1])\n",
"plt.colorbar()\n",
"plt.show()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Classification vectors"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"X = attributes.reshape(-1, attributes.shape[2])\n",
"\n",
"(attributes[0,0] == X[0]).all()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"labels_file = Path('../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif')\n",
"labels = triskele.read(labels_file)\n",
"display(labels.shape)\n",
"\n",
"plt.figure(figsize=(16*2,3*2))\n",
"plt.imshow(labels)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"Y = labels.reshape(-1)\n",
"\n",
"X.shape, Y.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Random Forest Classifier"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import importlib\n",
"from sklearn import metrics\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"import pickle\n",
"sys.path.insert(0, '..')\n",
"import CrossValidationGenerator as cvg"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"importlib.reload(cvg)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn import metrics\n",
"import pandas as pd\n",
"\n",
"\n",
"def scores(actual, prediction):\n",
" ct = pd.crosstab(prediction, actual,\n",
" rownames=['Prediction'], colnames=['Reference'],\n",
" margins=True, margins_name='Total',\n",
" normalize=False # all, index, columns\n",
" )\n",
" display(ct)\n",
" \n",
" scores = metrics.precision_recall_fscore_support(actual, prediction)\n",
" print(metrics.classification_report(actual, prediction)) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cv_labels = np.zeros(labels[:].shape)\n",
"\n",
"for xtrain, xtest, ytrain, ytest, train_index in cvg.CVG(attributes[:], labels[:], 10, 1): \n",
" rfc = RandomForestClassifier(n_jobs=-1, random_state=0, n_estimators=100, verbose=True)\n",
" rfc.fit(xtrain, ytrain)\n",
" \n",
" ypred = rfc.predict(xtest)\n",
" \n",
" display(ytest.shape, ypred.shape)\n",
" \n",
" scores(ytest, ypred)\n",
" \n",
" cv_labels[:,train_index == False] = ypred.reshape(cv_labels.shape[0], -1)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def show(im):\n",
" plt.figure(figsize=(16*2,3*2))\n",
" plt.imshow(im)\n",
" plt.colorbar()\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"show(labels)\n",
"show(cv_labels)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"labels.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"np.arange(238400).reshape(-1, 4768)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('../Res/classifier_0.pkl', 'wb') as f:\n",
" pickle.dump(rfc, f)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"Yp = Y.copy()\n",
"\n",
"Yp[training == False] = rfc.predict(X[training == False])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(16*2,3*2))\n",
"plt.imshow(Y.reshape(labels.shape))\n",
"plt.colorbar()\n",
"plt.show()\n",
"\n",
"plt.figure(figsize=(16*2,3*2))\n",
"plt.imshow(Yp.reshape(labels.shape))\n",
"plt.colorbar()\n",
"plt.show()\n",
"\n",
"plt.figure(figsize=(16*2,3*2))\n",
"plt.imshow(Yp.reshape(labels.shape).astype(np.float) - labels)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class cvg:\n",
" def __init__(self, attributes, ground_truth, order_dim=0, n_test=2): \n",
" self._tests_left = n_test\n",
" \n",
" if attributes.shape[0] != ground_truth.shape[0] or \\\n",
" attributes.shape[1] != ground_truth.shape[1] :\n",
" raise ValueError('attributes and ground_truth must have the same 2D shape')\n",
" \n",
" def __iter__(self):\n",
" return self\n",
" \n",
" def __next__(self):\n",
" if self._tests_left == 0:\n",
" raise StopIteration\n",
" \n",
" train_filter = np.arange(attributes.shape) < (Y.size * .50)\n",
"\n",
" Xtrain = 42\n",
" Xtest = 432\n",
" Ytrain = 12\n",
" Ytest = 123\n",
" \n",
" return (Xtrain, Xtest, Ytrain, Ytest)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cvg(attributes, labels[:,:-1])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}