WIP on serialization

2018-07-13 17:01:09 +02:00 · 2018-07-13 17:01:09 +02:00 · ad68cafe1e
commit ad68cafe1e
parent 9e9dbb440e
7 changed files with 547 additions and 63 deletions
--- a/Descriptors/init.py
+++ b/Descriptors/init.py
--- a/Descriptors/dfc_aps.py
+++ b/Descriptors/dfc_aps.py
@ -0,0 +1,30 @@
+import numpy as np
+import yaml
+
+import sys
+sys.path.append('..')
+import ld2dap
+
+def run(rasters, treshold=1e4, areas=None, sd=None, moi=None):
+    # Parse attribute type
+    treshold = float(treshold)
+    areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int)
+    sd = None if sd is None else np.array(sd).astype(np.float)
+    moi = None if moi is None else np.array(moi).astype(np.float)
+
+    # APs Pipelines
+    loader = ld2dap.LoadTIFF(rasters)
+    dfc_filter = ld2dap.Treshold(treshold)
+    dfc_filter.input = loader
+    aps = ld2dap.AttributeProfiles(area=areas, sd=sd, moi=moi)
+    aps.input = dfc_filter
+    out_vectors = ld2dap.RawOutput()
+    out_vectors.input = aps
+
+    # Compute vectors
+    out_vectors.run()
+    
+    return out_vectors.data
+
+def version():
+    return 'v0.0'
--- a/Notebooks/Attribute
+++ b/Notebooks/Attribute
@ -170,7 +170,7 @@
   "outputs": [],
   "source": [
    "areas = [10., 100.]\n",
-    "areas.extend([x * 1e3 for x in range(1,100,1)])\n",
+    "areas.extend([x * 1e3 for x in range(1,100,8)])\n",
    "plt.plot(areas, '.')\n",
    "plt.show()"
   ]
@ -267,11 +267,11 @@
   "source": [
    "prediction = np.zeros_like(gt)\n",
    "\n",
-    "for xt, xv, yt, yv, ti in APsCVG(gt, att, 5):\n",
+    "for xt, xv, yt, yv, ti in APsCVG(gt, att, 10):\n",
    "    plt.imshow(ti * 1.)\n",
    "    plt.show()\n",
    "    \n",
-    "    rfc = RandomForestClassifier(n_jobs=-1, random_state=0, n_estimators=100, verbose=True)\n",
+    "    rfc = RandomForestClassifier(n_jobs=-1, random_state=0, n_estimators=100, min_samples_leaf=10, verbose=True)\n",
    "    rfc.fit(xt, yt)\n",
    "    \n",
    "    ypred = rfc.predict(xv)\n",
@ -298,45 +298,7 @@
   "source": [
    "plt.imsave('../Res/tmppred.png', prediction)\n",
    "plt.imsave('../Res/gt.png', gt)\n",
-    "triskele.write('../Res/tmppred_8.tif', prediction)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X = attributes.reshape(-1, attributes.shape[2])\n",
-    "\n",
-    "(attributes[0,0] == X[0]).all()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "labels_file = Path('../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif')\n",
-    "labels = triskele.read(labels_file)\n",
-    "display(labels.shape)\n",
-    "\n",
-    "plt.figure(figsize=(16*2,3*2))\n",
-    "plt.imshow(labels)\n",
-    "plt.colorbar()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Y = labels.reshape(-1)\n",
-    "\n",
-    "X.shape, Y.shape"
+    "triskele.write('../Res/tmppred_8_10pleaf_3cv.tif', prediction)"
   ]
  },
  {
--- a/Notebooks/Classification
+++ b/Notebooks/Classification
@ -0,0 +1,298 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Generic Classification Scores for DFC 2018 [TESTING]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from sklearn import metrics\n",
+    "import matplotlib.pyplot as plt\n",
+    "import pandas as pd\n",
+    "\n",
+    "# Triskele\n",
+    "import sys\n",
+    "from pathlib import Path\n",
+    "triskele_path = Path('../triskele/python')\n",
+    "sys.path.append(str(triskele_path.resolve()))\n",
+    "import triskele\n",
+    "\n",
+    "figsize = np.array((16, 9))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load Classes Metadata"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_dfc_lbl  = pd.read_csv('../labels.csv')\n",
+    "df_meta_idx = pd.read_csv('../metaclass_indexes.csv')\n",
+    "df_meta_lbl = pd.read_csv('../metaclass_labels.csv')\n",
+    "\n",
+    "df_dfc_lbl.merge(df_meta_idx).merge(df_meta_lbl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "meta_idx = np.array(df_meta_idx['metaclass_index'], dtype=np.uint8)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load Ground Truth and Prediction"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gt = triskele.read('../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif')\n",
+    "pred = triskele.read('../Res/tmppred_8_10pleaf_3cv.tif')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Display Classes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, (ax_gt, ax_pred) = plt.subplots(2, figsize=figsize * 2)\n",
+    "ax_gt.imshow(gt)\n",
+    "ax_gt.set_title('Ground Truth')\n",
+    "ax_pred.imshow(pred)\n",
+    "ax_pred.set_title('Prediction')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Display Meta Classes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, (ax_gt, ax_pred) = plt.subplots(2, figsize=figsize * 2)\n",
+    "ax_gt.imshow(meta_idx[gt])\n",
+    "ax_gt.set_title('Ground Truth')\n",
+    "ax_pred.imshow(meta_idx[pred])\n",
+    "ax_pred.set_title('Prediction')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Metrics\n",
+    "\n",
+    "### Classes\n",
+    "\n",
+    "#### Confusion"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "f = np.nonzero(pred)\n",
+    "pred_s = pred[f].flatten()\n",
+    "gt_s = gt[f].flatten()\n",
+    "\n",
+    "ct = pd.crosstab(gt_s, pred_s,\n",
+    "        rownames=['Prediction'], colnames=['Reference'],\n",
+    "        margins=True, margins_name='Total',\n",
+    "        normalize=False # all, index, columns\n",
+    "        )\n",
+    "ct"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Scores\n",
+    "\n",
+    "##### Accuracy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metrics.accuracy_score(gt_s, pred_s)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##### Kappa"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metrics.cohen_kappa_score(gt_s, pred_s)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##### Precision, Recall, f1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metrics.precision_recall_fscore_support(gt_s, pred_s)\n",
+    "print(metrics.classification_report(gt_s, pred_s))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Meta Classes\n",
+    "\n",
+    "#### Confusion"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "f = np.nonzero(pred)\n",
+    "m_pred_s = meta_idx[pred_s]\n",
+    "m_gt_s = meta_idx[gt_s]\n",
+    "\n",
+    "ct = pd.crosstab(m_gt_s, m_pred_s,\n",
+    "        rownames=['Prediction'], colnames=['Reference'],\n",
+    "        margins=True, margins_name='Total',\n",
+    "        normalize=False # all, index, columns\n",
+    "        )\n",
+    "ct"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Scores\n",
+    "\n",
+    "##### Accuracy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metrics.accuracy_score(m_gt_s, m_pred_s)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##### Kappa"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metrics.cohen_kappa_score(m_gt_s, m_pred_s)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##### Precision, Recall, f1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metrics.precision_recall_fscore_support(m_gt_s, m_pred_s)\n",
+    "print(metrics.classification_report(m_gt_s, m_pred_s))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/Notebooks/Classification
+++ b/Notebooks/Classification
@ -71,7 +71,7 @@
   "outputs": [],
   "source": [
    "gt = triskele.read('../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif')\n",
-    "pred = triskele.read('../Res/tmppred.tif')"
+    "pred = triskele.read('../Res/tmppred_8.tif')"
   ]
  },
  {
--- a/Serialization.ipynb
+++ b/Serialization.ipynb
@ -1,12 +1,27 @@
 {
 "cells": [
  {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
   "metadata": {},
-   "outputs": [],
   "source": [
-    "import yaml"
+    "# Serialize Attribute Profiles Classification\n",
+    "\n",
+    "- [X] Read a YAML recipe\n",
+    "- [X] Brew recipe\n",
+    "- [] Compute hashes\n",
+    "- [] Write hashes\n",
+    "- [] Time metrics\n",
+    "- [] Result metrics\n",
+    "- [] Write metrics\n",
+    "- [] Write/move results\n",
+    "- [] Watch folder\n",
+    "- [] Main loop\n",
+    "- [] Logs\n",
+    "- [] Catch errors\n",
+    "- [] Custom CVG\n",
+    "\n",
+    "\n",
+    "## Init"
   ]
  },
  {
@ -15,9 +30,126 @@
   "metadata": {},
   "outputs": [],
   "source": [
+    "import yaml\n",
+    "import numpy as np\n",
+    "import importlib\n",
+    "import sys\n",
+    "\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "\n",
+    "sys.path.append('..')\n",
+    "import Descriptors\n",
+    "from CrossValidationGenerator import APsCVG\n",
+    "\n",
+    "sys.path.append('../triskele/python')\n",
+    "import triskele"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Serial Classifier"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
    "with open('../test.yml') as f:\n",
-    "    expe = yaml.safe_load(f)\n",
-    "expe"
+    "    expe = yaml.safe_load(f)['expe']\n",
+    "display(expe)\n",
+    "\n",
+    "# Ground truth\n",
+    "gt = triskele.read(expe['ground_truth'])\n",
+    "\n",
+    "# Descriptors\n",
+    "script = expe['descriptors_script']\n",
+    "desc = importlib.import_module(script['name'], package=Descriptors)\n",
+    "importlib.reload(Descriptors)\n",
+    "att = desc.run(**script['parameters'])\n",
+    "\n",
+    "# CrossVal and ML\n",
+    "cv = expe['cross_validation']\n",
+    "cl = expe['classifier']\n",
+    "\n",
+    "prediction = np.zeros_like(gt)\n",
+    "\n",
+    "for xt, xv, yt, yv, ti in APsCVG(gt, att, **cv['parameters']):\n",
+    "    rfc = RandomForestClassifier(**cl['parameters'])\n",
+    "    rfc.fit(xt, yt)\n",
+    "    \n",
+    "    ypred = rfc.predict(xv)\n",
+    "    \n",
+    "    prediction[ti] = ypred"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "plt.figure(figsize=(16, 9))\n",
+    "plt.imshow(prediction)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import from string module, class and instantiate"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import importlib\n",
+    "module = importlib.import_module(module_name)\n",
+    "class_ = getattr(module, class_name)\n",
+    "instance = class_()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def run(rasters, treshold=1e4, areas=None, sd=None, moi=None):\n",
+    "    treshold = float(treshold)\n",
+    "    areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int)\n",
+    "    sd = None if sd is None else np.array(sd).astype(np.float)\n",
+    "    moi = None if moi is None else np.array(moi).astype(np.float)\n",
+    "    return treshold, areas, sd, moi\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run(**expe['descriptors_param'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "desc."
   ]
  },
  {
@ -56,6 +188,52 @@
    "sorted(expe.items())"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "expe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "expe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.array(expe['descriptors_param']['areas']).astype(np.float).astype(np.int)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.array(None).astype(np.float).astype(np.int)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "desc = importlib.import_module(expe['descriptors_script']['path'])\n",
+    "desc.run(**expe['descriptors_param'])"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
--- a/test.yml
+++ b/test.yml
@ -1,18 +1,34 @@
 expe:
    name: Première expérience
    date: 9 juillet 2018
+    priority: 1
+    ground_truth: '../Data/ground_truth/2018_IEEE_GRSS_DFC_GT_TR.tif'
+    descriptors_script:
+        name: Descriptors.dfc_aps
+        parameters: 
+            treshold: 1e4
            rasters:
-        - './Data/phase1_rasters/DEM+B_C123/UH17_GEM051_TR.tif'
-        - './Data/phase1_rasters/DEM_C123_3msr/UH17_GEG051_TR.tif'
-    descriptors: 
-        type: Attribute Profiles
+                - '../Data/phase1_rasters/DEM+B_C123/UH17_GEM051_TR.tif'
+                - '../Data/phase1_rasters/DEM_C123_3msr/UH17_GEG051_TR.tif'
            areas:
                - 10
                - 100
-            - 1000
+                - 1e4
            moi: [.5, .7, .9]
+    cross_validation:
+        name: CrossValidationGenerator.APsCVG
+        parameters:
+            n_test: 5
    classifier:
-        name: Random Forest
-        cvsplit: 5
-    hash: 000
-
+        name: sklearn.ensemble.RandomForestClassifier
+        parameters:
+            n_jobs: -1
+            random_state: 0
+            n_estimators: 100
+            min_samples_leaf: 10
+expe_hashes:
+    ground_truth: XXX
+    descriptors_script: XXX
+    cross_validation: XXX
+    classifier: XXX
+    global: XXX