From ad68cafe1e80ec2123be1b9ec16acbc6649d0ebc Mon Sep 17 00:00:00 2001
From: Karamaz0V1 <florent.guiotte@gmail.com>
Date: Fri, 13 Jul 2018 17:01:09 +0200
Subject: [PATCH] WIP on serialization

---
 Descriptors/__init__.py                       |   0
 Descriptors/dfc_aps.py                        |  30 ++
 Notebooks/Attribute Profiles Classifier.ipynb |  46 +--
 Notebooks/Classification Scores-Copy1.ipynb   | 298 ++++++++++++++++++
 Notebooks/Classification Scores.ipynb         |   2 +-
 Notebooks/YAML Serialization.ipynb            | 190 ++++++++++-
 test.yml                                      |  44 ++-
 7 files changed, 547 insertions(+), 63 deletions(-)
 create mode 100644 Descriptors/__init__.py
 create mode 100644 Descriptors/dfc_aps.py
 create mode 100644 Notebooks/Classification Scores-Copy1.ipynb

diff --git a/Descriptors/__init__.py b/Descriptors/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/Descriptors/dfc_aps.py b/Descriptors/dfc_aps.py
new file mode 100644
index 0000000..c632f9a
--- /dev/null
+++ b/Descriptors/dfc_aps.py
@@ -0,0 +1,30 @@
+import numpy as np
+import yaml
+
+import sys
+sys.path.append('..')
+import ld2dap
+
+def run(rasters, treshold=1e4, areas=None, sd=None, moi=None):
+    # Parse attribute type
+    treshold = float(treshold)
+    areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int)
+    sd = None if sd is None else np.array(sd).astype(np.float)
+    moi = None if moi is None else np.array(moi).astype(np.float)
+
+    # APs Pipelines
+    loader = ld2dap.LoadTIFF(rasters)
+    dfc_filter = ld2dap.Treshold(treshold)
+    dfc_filter.input = loader
+    aps = ld2dap.AttributeProfiles(area=areas, sd=sd, moi=moi)
+    aps.input = dfc_filter
+    out_vectors = ld2dap.RawOutput()
+    out_vectors.input = aps
+
+    # Compute vectors
+    out_vectors.run()
+    
+    return out_vectors.data
+
+def version():
+    return 'v0.0'
\ No newline at end of file
diff --git a/Notebooks/Attribute Profiles Classifier.ipynb b/Notebooks/Attribute Profiles Classifier.ipynb
index b4ec61e..573d018 100644
--- a/Notebooks/Attribute Profiles Classifier.ipynb	
+++ b/Notebooks/Attribute Profiles Classifier.ipynb	
@@ -170,7 +170,7 @@
    "outputs": [],
    "source": [
     "areas = [10., 100.]\n",
-    "areas.extend([x * 1e3 for x in range(1,100,1)])\n",
+    "areas.extend([x * 1e3 for x in range(1,100,8)])\n",
     "plt.plot(areas, '.')\n",
     "plt.show()"
    ]
@@ -267,11 +267,11 @@
    "source": [
     "prediction = np.zeros_like(gt)\n",
     "\n",
-    "for xt, xv, yt, yv, ti in APsCVG(gt, att, 5):\n",
+    "for xt, xv, yt, yv, ti in APsCVG(gt, att, 10):\n",
     "    plt.imshow(ti * 1.)\n",
     "    plt.show()\n",
     "    \n",
-    "    rfc = RandomForestClassifier(n_jobs=-1, random_state=0, n_estimators=100, verbose=True)\n",
+    "    rfc = RandomForestClassifier(n_jobs=-1, random_state=0, n_estimators=100, min_samples_leaf=10, verbose=True)\n",
     "    rfc.fit(xt, yt)\n",
     "    \n",
     "    ypred = rfc.predict(xv)\n",
@@ -298,45 +298,7 @@
    "source": [
     "plt.imsave('../Res/tmppred.png', prediction)\n",
     "plt.imsave('../Res/gt.png', gt)\n",
-    "triskele.write('../Res/tmppred_8.tif', prediction)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X = attributes.reshape(-1, attributes.shape[2])\n",
-    "\n",
-    "(attributes[0,0] == X[0]).all()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "labels_file = Path('../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif')\n",
-    "labels = triskele.read(labels_file)\n",
-    "display(labels.shape)\n",
-    "\n",
-    "plt.figure(figsize=(16*2,3*2))\n",
-    "plt.imshow(labels)\n",
-    "plt.colorbar()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Y = labels.reshape(-1)\n",
-    "\n",
-    "X.shape, Y.shape"
+    "triskele.write('../Res/tmppred_8_10pleaf_3cv.tif', prediction)"
    ]
   },
   {
diff --git a/Notebooks/Classification Scores-Copy1.ipynb b/Notebooks/Classification Scores-Copy1.ipynb
new file mode 100644
index 0000000..2a97809
--- /dev/null
+++ b/Notebooks/Classification Scores-Copy1.ipynb	
@@ -0,0 +1,298 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Generic Classification Scores for DFC 2018 [TESTING]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from sklearn import metrics\n",
+    "import matplotlib.pyplot as plt\n",
+    "import pandas as pd\n",
+    "\n",
+    "# Triskele\n",
+    "import sys\n",
+    "from pathlib import Path\n",
+    "triskele_path = Path('../triskele/python')\n",
+    "sys.path.append(str(triskele_path.resolve()))\n",
+    "import triskele\n",
+    "\n",
+    "figsize = np.array((16, 9))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load Classes Metadata"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_dfc_lbl  = pd.read_csv('../labels.csv')\n",
+    "df_meta_idx = pd.read_csv('../metaclass_indexes.csv')\n",
+    "df_meta_lbl = pd.read_csv('../metaclass_labels.csv')\n",
+    "\n",
+    "df_dfc_lbl.merge(df_meta_idx).merge(df_meta_lbl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "meta_idx = np.array(df_meta_idx['metaclass_index'], dtype=np.uint8)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load Ground Truth and Prediction"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gt = triskele.read('../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif')\n",
+    "pred = triskele.read('../Res/tmppred_8_10pleaf_3cv.tif')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Display Classes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, (ax_gt, ax_pred) = plt.subplots(2, figsize=figsize * 2)\n",
+    "ax_gt.imshow(gt)\n",
+    "ax_gt.set_title('Ground Truth')\n",
+    "ax_pred.imshow(pred)\n",
+    "ax_pred.set_title('Prediction')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Display Meta Classes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, (ax_gt, ax_pred) = plt.subplots(2, figsize=figsize * 2)\n",
+    "ax_gt.imshow(meta_idx[gt])\n",
+    "ax_gt.set_title('Ground Truth')\n",
+    "ax_pred.imshow(meta_idx[pred])\n",
+    "ax_pred.set_title('Prediction')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Metrics\n",
+    "\n",
+    "### Classes\n",
+    "\n",
+    "#### Confusion"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "f = np.nonzero(pred)\n",
+    "pred_s = pred[f].flatten()\n",
+    "gt_s = gt[f].flatten()\n",
+    "\n",
+    "ct = pd.crosstab(gt_s, pred_s,\n",
+    "        rownames=['Prediction'], colnames=['Reference'],\n",
+    "        margins=True, margins_name='Total',\n",
+    "        normalize=False # all, index, columns\n",
+    "        )\n",
+    "ct"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Scores\n",
+    "\n",
+    "##### Accuracy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metrics.accuracy_score(gt_s, pred_s)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##### Kappa"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metrics.cohen_kappa_score(gt_s, pred_s)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##### Precision, Recall, f1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metrics.precision_recall_fscore_support(gt_s, pred_s)\n",
+    "print(metrics.classification_report(gt_s, pred_s))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Meta Classes\n",
+    "\n",
+    "#### Confusion"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "f = np.nonzero(pred)\n",
+    "m_pred_s = meta_idx[pred_s]\n",
+    "m_gt_s = meta_idx[gt_s]\n",
+    "\n",
+    "ct = pd.crosstab(m_gt_s, m_pred_s,\n",
+    "        rownames=['Prediction'], colnames=['Reference'],\n",
+    "        margins=True, margins_name='Total',\n",
+    "        normalize=False # all, index, columns\n",
+    "        )\n",
+    "ct"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Scores\n",
+    "\n",
+    "##### Accuracy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metrics.accuracy_score(m_gt_s, m_pred_s)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##### Kappa"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metrics.cohen_kappa_score(m_gt_s, m_pred_s)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##### Precision, Recall, f1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metrics.precision_recall_fscore_support(m_gt_s, m_pred_s)\n",
+    "print(metrics.classification_report(m_gt_s, m_pred_s))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Notebooks/Classification Scores.ipynb b/Notebooks/Classification Scores.ipynb
index 376f74f..18f465d 100644
--- a/Notebooks/Classification Scores.ipynb	
+++ b/Notebooks/Classification Scores.ipynb	
@@ -71,7 +71,7 @@
    "outputs": [],
    "source": [
     "gt = triskele.read('../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif')\n",
-    "pred = triskele.read('../Res/tmppred.tif')"
+    "pred = triskele.read('../Res/tmppred_8.tif')"
    ]
   },
   {
diff --git a/Notebooks/YAML Serialization.ipynb b/Notebooks/YAML Serialization.ipynb
index de721cb..8dd087c 100644
--- a/Notebooks/YAML Serialization.ipynb	
+++ b/Notebooks/YAML Serialization.ipynb	
@@ -1,12 +1,27 @@
 {
  "cells": [
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "import yaml"
+    "# Serialize Attribute Profiles Classification\n",
+    "\n",
+    "- [X] Read a YAML recipe\n",
+    "- [X] Brew recipe\n",
+    "- [] Compute hashes\n",
+    "- [] Write hashes\n",
+    "- [] Time metrics\n",
+    "- [] Result metrics\n",
+    "- [] Write metrics\n",
+    "- [] Write/move results\n",
+    "- [] Watch folder\n",
+    "- [] Main loop\n",
+    "- [] Logs\n",
+    "- [] Catch errors\n",
+    "- [] Custom CVG\n",
+    "\n",
+    "\n",
+    "## Init"
    ]
   },
   {
@@ -15,9 +30,126 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import yaml\n",
+    "import numpy as np\n",
+    "import importlib\n",
+    "import sys\n",
+    "\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "\n",
+    "sys.path.append('..')\n",
+    "import Descriptors\n",
+    "from CrossValidationGenerator import APsCVG\n",
+    "\n",
+    "sys.path.append('../triskele/python')\n",
+    "import triskele"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Serial Classifier"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
     "with open('../test.yml') as f:\n",
-    "    expe = yaml.safe_load(f)\n",
-    "expe"
+    "    expe = yaml.safe_load(f)['expe']\n",
+    "display(expe)\n",
+    "\n",
+    "# Ground truth\n",
+    "gt = triskele.read(expe['ground_truth'])\n",
+    "\n",
+    "# Descriptors\n",
+    "script = expe['descriptors_script']\n",
+    "desc = importlib.import_module(script['name'], package=Descriptors)\n",
+    "importlib.reload(Descriptors)\n",
+    "att = desc.run(**script['parameters'])\n",
+    "\n",
+    "# CrossVal and ML\n",
+    "cv = expe['cross_validation']\n",
+    "cl = expe['classifier']\n",
+    "\n",
+    "prediction = np.zeros_like(gt)\n",
+    "\n",
+    "for xt, xv, yt, yv, ti in APsCVG(gt, att, **cv['parameters']):\n",
+    "    rfc = RandomForestClassifier(**cl['parameters'])\n",
+    "    rfc.fit(xt, yt)\n",
+    "    \n",
+    "    ypred = rfc.predict(xv)\n",
+    "    \n",
+    "    prediction[ti] = ypred"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "plt.figure(figsize=(16, 9))\n",
+    "plt.imshow(prediction)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import from string module, class and instantiate"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import importlib\n",
+    "module = importlib.import_module(module_name)\n",
+    "class_ = getattr(module, class_name)\n",
+    "instance = class_()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def run(rasters, treshold=1e4, areas=None, sd=None, moi=None):\n",
+    "    treshold = float(treshold)\n",
+    "    areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int)\n",
+    "    sd = None if sd is None else np.array(sd).astype(np.float)\n",
+    "    moi = None if moi is None else np.array(moi).astype(np.float)\n",
+    "    return treshold, areas, sd, moi\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run(**expe['descriptors_param'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "desc."
    ]
   },
   {
@@ -56,6 +188,52 @@
     "sorted(expe.items())"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "expe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "expe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.array(expe['descriptors_param']['areas']).astype(np.float).astype(np.int)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.array(None).astype(np.float).astype(np.int)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "desc = importlib.import_module(expe['descriptors_script']['path'])\n",
+    "desc.run(**expe['descriptors_param'])"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/test.yml b/test.yml
index bcc0fdc..4b6747d 100644
--- a/test.yml
+++ b/test.yml
@@ -1,18 +1,34 @@
 expe:
     name: Première expérience
     date: 9 juillet 2018
-    rasters:
-        - './Data/phase1_rasters/DEM+B_C123/UH17_GEM051_TR.tif'
-        - './Data/phase1_rasters/DEM_C123_3msr/UH17_GEG051_TR.tif'
-    descriptors: 
-        type: Attribute Profiles
-        areas:
-            - 10
-            - 100
-            - 1000
-        moi: [.5, .7, .9]
+    priority: 1
+    ground_truth: '../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif'
+    descriptors_script:
+        name: Descriptors.dfc_aps
+        parameters: 
+            treshold: 1e4
+            rasters:
+                - '../Data/phase1_rasters/DEM+B_C123/UH17_GEM051_TR.tif'
+                - '../Data/phase1_rasters/DEM_C123_3msr/UH17_GEG051_TR.tif'
+            areas:
+                - 10
+                - 100
+                - 1e4
+            moi: [.5, .7, .9]
+    cross_validation:
+        name: CrossValidationGenerator.APsCVG
+        parameters:
+            n_test: 5
     classifier:
-        name: Random Forest
-        cvsplit: 5
-    hash: 000
-
+        name: sklearn.ensemble.RandomForestClassifier
+        parameters:
+            n_jobs: -1
+            random_state: 0
+            n_estimators: 100
+            min_samples_leaf: 10
+expe_hashes:
+    ground_truth: XXX
+    descriptors_script: XXX
+    cross_validation: XXX
+    classifier: XXX
+    global: XXX
\ No newline at end of file