From ad68cafe1e80ec2123be1b9ec16acbc6649d0ebc Mon Sep 17 00:00:00 2001 From: Karamaz0V1 Date: Fri, 13 Jul 2018 17:01:09 +0200 Subject: [PATCH] WIP on serialization --- Descriptors/__init__.py | 0 Descriptors/dfc_aps.py | 30 ++ Notebooks/Attribute Profiles Classifier.ipynb | 46 +-- Notebooks/Classification Scores-Copy1.ipynb | 298 ++++++++++++++++++ Notebooks/Classification Scores.ipynb | 2 +- Notebooks/YAML Serialization.ipynb | 190 ++++++++++- test.yml | 44 ++- 7 files changed, 547 insertions(+), 63 deletions(-) create mode 100644 Descriptors/__init__.py create mode 100644 Descriptors/dfc_aps.py create mode 100644 Notebooks/Classification Scores-Copy1.ipynb diff --git a/Descriptors/__init__.py b/Descriptors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Descriptors/dfc_aps.py b/Descriptors/dfc_aps.py new file mode 100644 index 0000000..c632f9a --- /dev/null +++ b/Descriptors/dfc_aps.py @@ -0,0 +1,30 @@ +import numpy as np +import yaml + +import sys +sys.path.append('..') +import ld2dap + +def run(rasters, treshold=1e4, areas=None, sd=None, moi=None): + # Parse attribute type + treshold = float(treshold) + areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int) + sd = None if sd is None else np.array(sd).astype(np.float) + moi = None if moi is None else np.array(moi).astype(np.float) + + # APs Pipelines + loader = ld2dap.LoadTIFF(rasters) + dfc_filter = ld2dap.Treshold(treshold) + dfc_filter.input = loader + aps = ld2dap.AttributeProfiles(area=areas, sd=sd, moi=moi) + aps.input = dfc_filter + out_vectors = ld2dap.RawOutput() + out_vectors.input = aps + + # Compute vectors + out_vectors.run() + + return out_vectors.data + +def version(): + return 'v0.0' \ No newline at end of file diff --git a/Notebooks/Attribute Profiles Classifier.ipynb b/Notebooks/Attribute Profiles Classifier.ipynb index b4ec61e..573d018 100644 --- a/Notebooks/Attribute Profiles Classifier.ipynb +++ b/Notebooks/Attribute Profiles Classifier.ipynb @@ -170,7 +170,7 @@ "outputs": [], "source": [ "areas = [10., 100.]\n", - "areas.extend([x * 1e3 for x in range(1,100,1)])\n", + "areas.extend([x * 1e3 for x in range(1,100,8)])\n", "plt.plot(areas, '.')\n", "plt.show()" ] @@ -267,11 +267,11 @@ "source": [ "prediction = np.zeros_like(gt)\n", "\n", - "for xt, xv, yt, yv, ti in APsCVG(gt, att, 5):\n", + "for xt, xv, yt, yv, ti in APsCVG(gt, att, 10):\n", " plt.imshow(ti * 1.)\n", " plt.show()\n", " \n", - " rfc = RandomForestClassifier(n_jobs=-1, random_state=0, n_estimators=100, verbose=True)\n", + " rfc = RandomForestClassifier(n_jobs=-1, random_state=0, n_estimators=100, min_samples_leaf=10, verbose=True)\n", " rfc.fit(xt, yt)\n", " \n", " ypred = rfc.predict(xv)\n", @@ -298,45 +298,7 @@ "source": [ "plt.imsave('../Res/tmppred.png', prediction)\n", "plt.imsave('../Res/gt.png', gt)\n", - "triskele.write('../Res/tmppred_8.tif', prediction)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "X = attributes.reshape(-1, attributes.shape[2])\n", - "\n", - "(attributes[0,0] == X[0]).all()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "labels_file = Path('../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif')\n", - "labels = triskele.read(labels_file)\n", - "display(labels.shape)\n", - "\n", - "plt.figure(figsize=(16*2,3*2))\n", - "plt.imshow(labels)\n", - "plt.colorbar()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "Y = labels.reshape(-1)\n", - "\n", - "X.shape, Y.shape" + "triskele.write('../Res/tmppred_8_10pleaf_3cv.tif', prediction)" ] }, { diff --git a/Notebooks/Classification Scores-Copy1.ipynb b/Notebooks/Classification Scores-Copy1.ipynb new file mode 100644 index 0000000..2a97809 --- /dev/null +++ b/Notebooks/Classification Scores-Copy1.ipynb @@ -0,0 +1,298 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Generic Classification Scores for DFC 2018 [TESTING]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from sklearn import metrics\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "\n", + "# Triskele\n", + "import sys\n", + "from pathlib import Path\n", + "triskele_path = Path('../triskele/python')\n", + "sys.path.append(str(triskele_path.resolve()))\n", + "import triskele\n", + "\n", + "figsize = np.array((16, 9))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Classes Metadata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_dfc_lbl = pd.read_csv('../labels.csv')\n", + "df_meta_idx = pd.read_csv('../metaclass_indexes.csv')\n", + "df_meta_lbl = pd.read_csv('../metaclass_labels.csv')\n", + "\n", + "df_dfc_lbl.merge(df_meta_idx).merge(df_meta_lbl)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "meta_idx = np.array(df_meta_idx['metaclass_index'], dtype=np.uint8)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Ground Truth and Prediction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gt = triskele.read('../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif')\n", + "pred = triskele.read('../Res/tmppred_8_10pleaf_3cv.tif')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Display Classes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, (ax_gt, ax_pred) = plt.subplots(2, figsize=figsize * 2)\n", + "ax_gt.imshow(gt)\n", + "ax_gt.set_title('Ground Truth')\n", + "ax_pred.imshow(pred)\n", + "ax_pred.set_title('Prediction')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Display Meta Classes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, (ax_gt, ax_pred) = plt.subplots(2, figsize=figsize * 2)\n", + "ax_gt.imshow(meta_idx[gt])\n", + "ax_gt.set_title('Ground Truth')\n", + "ax_pred.imshow(meta_idx[pred])\n", + "ax_pred.set_title('Prediction')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Metrics\n", + "\n", + "### Classes\n", + "\n", + "#### Confusion" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "f = np.nonzero(pred)\n", + "pred_s = pred[f].flatten()\n", + "gt_s = gt[f].flatten()\n", + "\n", + "ct = pd.crosstab(gt_s, pred_s,\n", + " rownames=['Prediction'], colnames=['Reference'],\n", + " margins=True, margins_name='Total',\n", + " normalize=False # all, index, columns\n", + " )\n", + "ct" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Scores\n", + "\n", + "##### Accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metrics.accuracy_score(gt_s, pred_s)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Kappa" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metrics.cohen_kappa_score(gt_s, pred_s)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Precision, Recall, f1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metrics.precision_recall_fscore_support(gt_s, pred_s)\n", + "print(metrics.classification_report(gt_s, pred_s))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Meta Classes\n", + "\n", + "#### Confusion" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "f = np.nonzero(pred)\n", + "m_pred_s = meta_idx[pred_s]\n", + "m_gt_s = meta_idx[gt_s]\n", + "\n", + "ct = pd.crosstab(m_gt_s, m_pred_s,\n", + " rownames=['Prediction'], colnames=['Reference'],\n", + " margins=True, margins_name='Total',\n", + " normalize=False # all, index, columns\n", + " )\n", + "ct" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Scores\n", + "\n", + "##### Accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metrics.accuracy_score(m_gt_s, m_pred_s)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Kappa" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metrics.cohen_kappa_score(m_gt_s, m_pred_s)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Precision, Recall, f1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metrics.precision_recall_fscore_support(m_gt_s, m_pred_s)\n", + "print(metrics.classification_report(m_gt_s, m_pred_s))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Notebooks/Classification Scores.ipynb b/Notebooks/Classification Scores.ipynb index 376f74f..18f465d 100644 --- a/Notebooks/Classification Scores.ipynb +++ b/Notebooks/Classification Scores.ipynb @@ -71,7 +71,7 @@ "outputs": [], "source": [ "gt = triskele.read('../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif')\n", - "pred = triskele.read('../Res/tmppred.tif')" + "pred = triskele.read('../Res/tmppred_8.tif')" ] }, { diff --git a/Notebooks/YAML Serialization.ipynb b/Notebooks/YAML Serialization.ipynb index de721cb..8dd087c 100644 --- a/Notebooks/YAML Serialization.ipynb +++ b/Notebooks/YAML Serialization.ipynb @@ -1,12 +1,27 @@ { "cells": [ { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "import yaml" + "# Serialize Attribute Profiles Classification\n", + "\n", + "- [X] Read a YAML recipe\n", + "- [X] Brew recipe\n", + "- [] Compute hashes\n", + "- [] Write hashes\n", + "- [] Time metrics\n", + "- [] Result metrics\n", + "- [] Write metrics\n", + "- [] Write/move results\n", + "- [] Watch folder\n", + "- [] Main loop\n", + "- [] Logs\n", + "- [] Catch errors\n", + "- [] Custom CVG\n", + "\n", + "\n", + "## Init" ] }, { @@ -15,9 +30,126 @@ "metadata": {}, "outputs": [], "source": [ + "import yaml\n", + "import numpy as np\n", + "import importlib\n", + "import sys\n", + "\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "\n", + "sys.path.append('..')\n", + "import Descriptors\n", + "from CrossValidationGenerator import APsCVG\n", + "\n", + "sys.path.append('../triskele/python')\n", + "import triskele" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Serial Classifier" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", "with open('../test.yml') as f:\n", - " expe = yaml.safe_load(f)\n", - "expe" + " expe = yaml.safe_load(f)['expe']\n", + "display(expe)\n", + "\n", + "# Ground truth\n", + "gt = triskele.read(expe['ground_truth'])\n", + "\n", + "# Descriptors\n", + "script = expe['descriptors_script']\n", + "desc = importlib.import_module(script['name'], package=Descriptors)\n", + "importlib.reload(Descriptors)\n", + "att = desc.run(**script['parameters'])\n", + "\n", + "# CrossVal and ML\n", + "cv = expe['cross_validation']\n", + "cl = expe['classifier']\n", + "\n", + "prediction = np.zeros_like(gt)\n", + "\n", + "for xt, xv, yt, yv, ti in APsCVG(gt, att, **cv['parameters']):\n", + " rfc = RandomForestClassifier(**cl['parameters'])\n", + " rfc.fit(xt, yt)\n", + " \n", + " ypred = rfc.predict(xv)\n", + " \n", + " prediction[ti] = ypred" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "plt.figure(figsize=(16, 9))\n", + "plt.imshow(prediction)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import from string module, class and instantiate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import importlib\n", + "module = importlib.import_module(module_name)\n", + "class_ = getattr(module, class_name)\n", + "instance = class_()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def run(rasters, treshold=1e4, areas=None, sd=None, moi=None):\n", + " treshold = float(treshold)\n", + " areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int)\n", + " sd = None if sd is None else np.array(sd).astype(np.float)\n", + " moi = None if moi is None else np.array(moi).astype(np.float)\n", + " return treshold, areas, sd, moi\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run(**expe['descriptors_param'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "desc." ] }, { @@ -56,6 +188,52 @@ "sorted(expe.items())" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "expe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "expe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.array(expe['descriptors_param']['areas']).astype(np.float).astype(np.int)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.array(None).astype(np.float).astype(np.int)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "desc = importlib.import_module(expe['descriptors_script']['path'])\n", + "desc.run(**expe['descriptors_param'])" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/test.yml b/test.yml index bcc0fdc..4b6747d 100644 --- a/test.yml +++ b/test.yml @@ -1,18 +1,34 @@ expe: name: Première expérience date: 9 juillet 2018 - rasters: - - './Data/phase1_rasters/DEM+B_C123/UH17_GEM051_TR.tif' - - './Data/phase1_rasters/DEM_C123_3msr/UH17_GEG051_TR.tif' - descriptors: - type: Attribute Profiles - areas: - - 10 - - 100 - - 1000 - moi: [.5, .7, .9] + priority: 1 + ground_truth: '../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif' + descriptors_script: + name: Descriptors.dfc_aps + parameters: + treshold: 1e4 + rasters: + - '../Data/phase1_rasters/DEM+B_C123/UH17_GEM051_TR.tif' + - '../Data/phase1_rasters/DEM_C123_3msr/UH17_GEG051_TR.tif' + areas: + - 10 + - 100 + - 1e4 + moi: [.5, .7, .9] + cross_validation: + name: CrossValidationGenerator.APsCVG + parameters: + n_test: 5 classifier: - name: Random Forest - cvsplit: 5 - hash: 000 - + name: sklearn.ensemble.RandomForestClassifier + parameters: + n_jobs: -1 + random_state: 0 + n_estimators: 100 + min_samples_leaf: 10 +expe_hashes: + ground_truth: XXX + descriptors_script: XXX + cross_validation: XXX + classifier: XXX + global: XXX \ No newline at end of file