From 22399f96186ca4a7d649b596704a87d4fded97b3 Mon Sep 17 00:00:00 2001 From: Karamaz0V1 Date: Fri, 31 Aug 2018 18:33:05 +0200 Subject: [PATCH] Refactor Supervisor with logs and error management --- .../CrossValidationGenerator.py | 0 CVGenerators/__init__.py | 1 + Notebooks/Raster DFC Tresholds C3.ipynb | 664 ++++++++++++++++++ Notebooks/YAML Serialization.ipynb | 244 +++---- logger.py | 51 ++ logging.yaml | 2 +- supervisor.py | 114 ++- 7 files changed, 932 insertions(+), 144 deletions(-) rename CrossValidationGenerator.py => CVGenerators/CrossValidationGenerator.py (100%) create mode 100644 CVGenerators/__init__.py create mode 100644 Notebooks/Raster DFC Tresholds C3.ipynb create mode 100644 logger.py diff --git a/CrossValidationGenerator.py b/CVGenerators/CrossValidationGenerator.py similarity index 100% rename from CrossValidationGenerator.py rename to CVGenerators/CrossValidationGenerator.py diff --git a/CVGenerators/__init__.py b/CVGenerators/__init__.py new file mode 100644 index 0000000..47d2d0c --- /dev/null +++ b/CVGenerators/__init__.py @@ -0,0 +1 @@ +from .CrossValidationGenerator import CVG_legacy, APsCVG \ No newline at end of file diff --git a/Notebooks/Raster DFC Tresholds C3.ipynb b/Notebooks/Raster DFC Tresholds C3.ipynb new file mode 100644 index 0000000..5803289 --- /dev/null +++ b/Notebooks/Raster DFC Tresholds C3.ipynb @@ -0,0 +1,664 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "sys.path.append(\"..\")\n", + "import rasterizer\n", + "import raster_assistant as ra\n", + "\n", + "sys.path.append('../triskele/python/')\n", + "import triskele\n", + "\n", + "figsize = np.array((16, 3)) * 1.5" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Tresholds for Custom Raster from DFC LiDAR data\n", + "\n", + "Compare our results with the DFC rasters and set the tresholds for the raster factory.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load DFC raster" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dfc_raster = triskele.read('../Data/phase1_rasters/Intensity_C3/UH17_GI3F051_TR.tif')\n", + "\n", + "plt.figure(figsize=figsize)\n", + "plt.imshow(dfc_raster)\n", + "plt.colorbar()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The raster from DFC dataset are noised with high value noise. We need to filter high values. We empirically set the treshold to 1e4." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.clip(dfc_raster, dfc_raster.min(), 1e4, out=dfc_raster)\n", + "\n", + "plt.figure(figsize=figsize)\n", + "plt.imshow(dfc_raster)\n", + "plt.colorbar()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set filtering and clipping treshold to process rasters from LiDAR" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load data without filtering or clipping" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "C3 = ra.bulk_load('../Data/lidar/C3', 'C3', filter_treshold=0, clip_treshold=0, dtype=np.float32)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we process the raster with the same resolution and a nearest interpolation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "raster_f0_c0 = ra.rasterize_cache('intensity', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n", + "\n", + "plt.figure(figsize=figsize)\n", + "plt.imshow(raster_f0_c0)\n", + "plt.colorbar()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We also have high value noise, but far better than the DFC noise." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load data without filtering and minimal clipping" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "C3 = ra.bulk_load('../Data/lidar/C3', 'C3', filter_treshold=0, clip_treshold=0.01, dtype=np.float32)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we process the raster with the same resolution and a nearest interpolation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "raster_f0_c0_01 = ra.rasterize_cache('intensity', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n", + "\n", + "plt.figure(figsize=figsize)\n", + "plt.imshow(raster_f0_c0_01)\n", + "plt.colorbar()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Clipping does not remove unwanted high value noise." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load data with minimal filtering and no clipping" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "C3 = ra.bulk_load('../Data/lidar/C1', 'C3', filter_treshold=0.01, clip_treshold=0, dtype=np.float32)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we process the raster with the same resolution and a nearest interpolation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "raster_f0_01_c0 = ra.rasterize_cache('intensity', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n", + "\n", + "plt.figure(figsize=figsize)\n", + "plt.imshow(raster_f0_01_c0)\n", + "plt.colorbar()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Filtering remove high value noise, but the tone mapping is bad (too dark)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load data with filtering and no clipping" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "C3 = ra.bulk_load('../Data/lidar/C3', 'C3', filter_treshold=0.1, clip_treshold=0, dtype=np.float32)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we process the raster with the same resolution and a nearest interpolation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "raster_f0_1_c0 = ra.rasterize_cache('intensity', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n", + "\n", + "plt.figure(figsize=figsize)\n", + "plt.imshow(raster_f0_1_c0)\n", + "plt.colorbar()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The tone mapping is correct, but interpolation artifacts appears where too much points are removed from filtering (e.g. in the stadium)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load data without filtering and with clipping" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "C3 = ra.bulk_load('../Data/lidar/C3', 'C3', filter_treshold=0, clip_treshold=0.1, dtype=np.float32)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we process the raster with the same resolution and a nearest interpolation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "raster_f0_c0_1 = ra.rasterize_cache('intensity', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n", + "\n", + "plt.figure(figsize=figsize)\n", + "plt.imshow(raster_f0_c0_1)\n", + "plt.colorbar()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The tone map is correct, no interpolation artifact but high noise sparkle the result." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load data with minimal filtering and minimal clipping" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "C3 = ra.bulk_load('../Data/lidar/C1', 'C3', filter_treshold=0.01, clip_treshold=0.01, dtype=np.float32)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we process the raster with the same resolution and a nearest interpolation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "raster_f0_01_c0_01 = ra.rasterize_cache('intensity', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n", + "\n", + "plt.figure(figsize=figsize)\n", + "plt.imshow(raster_f0_01_c0_01)\n", + "plt.colorbar()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The tone map is not correct." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load data with minimal filtering and normal clipping" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "C3 = ra.bulk_load('../Data/lidar/C3', 'C3', filter_treshold=0.2, clip_treshold=0.1, dtype=np.float32)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we process the raster with the same resolution and a nearest interpolation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "raster_f0_01_c0_1 = ra.rasterize_cache('z', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n", + "\n", + "plt.figure(figsize=figsize)\n", + "plt.imshow(raster_f0_01_c0_1)\n", + "plt.colorbar()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The tone map is correct, no interpolation artifact and low high noise in the result. We will now on choose " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load C123 data with minimal filtering and normal clipping" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "f = (0.01 + 0.01 + 0.2) / 3\n", + "f" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "C123 = ra.bulk_load('../Data/lidar/', 'C123', filter_treshold=0.08, clip_treshold=0.1, dtype=np.float32)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we process the raster with the same resolution and a nearest interpolation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "raster_C123 = ra.rasterize_cache('z', C123, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n", + "\n", + "plt.figure(figsize=figsize)\n", + "plt.imshow(raster_f0_01_c0_1)\n", + "plt.colorbar()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The tone map is correct, no interpolation artifact and low high noise in the result. We will now on choose " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Compare interpolation method\n", + "\n", + "### Nearest neighbour" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "raster_f0_01_c0_1_nearest = ra.rasterize_cache('intensity', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n", + "\n", + "plt.figure(figsize=figsize)\n", + "plt.imshow(raster_f0_01_c0_1_nearest)\n", + "plt.colorbar()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Linear interpolation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "raster_f0_01_c0_1 = ra.rasterize_cache('intensity', C3, .5, 'linear', False, cache_dir='../Res/enrichment_rasters')\n", + "\n", + "plt.figure(figsize=figsize)\n", + "plt.imshow(raster_f0_01_c0_1)\n", + "plt.colorbar()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Cubic interpolation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "raster_f0_01_c0_1 = ra.rasterize_cache('intensity', C3, .5, 'cubic', False, cache_dir='../Res/enrichment_rasters')\n", + "\n", + "plt.figure(figsize=figsize)\n", + "plt.imshow(raster_f0_01_c0_1)\n", + "plt.colorbar()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The cubic interpolation seems to create negative values, maybe at the same spots of the DFC high noise ?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=figsize)\n", + "plt.imshow((raster_f0_01_c0_1 < 0) * 1.)\n", + "plt.colorbar()\n", + "plt.title('Cubic low noise')\n", + "plt.show()\n", + "\n", + "dfc_raster_raw = triskele.read('../Data/phase1_rasters/Intensity_C1/UH17_GI1F051_TR.tif')\n", + "\n", + "plt.figure(figsize=figsize)\n", + "plt.imshow((dfc_raster_raw > 1e4) * 1.)\n", + "plt.colorbar()\n", + "plt.title('DFC high noise')\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=figsize)\n", + "plt.imshow(np.logical_and((dfc_raster_raw > 1e4), (raster_f0_01_c0_1 < 0)) * 1)\n", + "plt.colorbar()\n", + "plt.title('DFC high noise and Cubic low noise')\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=figsize)\n", + "plt.imshow((dfc_raster_raw > 1e4) * 1 - (raster_f0_01_c0_1 < 0) * 1)\n", + "plt.colorbar()\n", + "plt.title('DFC high noise minus Cubic low noise')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Numerous common noise pixel between DFC noise and our cubic interpolation.\n", + "\n", + "Let's try with our high noise." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=figsize)\n", + "plt.imshow((raster_f0_01_c0_1 > raster_f0_01_c0_1_nearest.max()) * 1.)\n", + "plt.colorbar()\n", + "plt.title('Cubic high noise')\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=figsize)\n", + "plt.imshow((dfc_raster_raw > 1e4) * 1.)\n", + "plt.colorbar()\n", + "plt.title('DFC high noise')\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=figsize)\n", + "plt.imshow(np.logical_and((dfc_raster_raw > 1e4), (raster_f0_01_c0_1 > raster_f0_01_c0_1_nearest.max())) * 1)\n", + "plt.colorbar()\n", + "plt.title('DFC high noise and Cubic low noise')\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=figsize)\n", + "plt.imshow((dfc_raster_raw > 1e4) * 1 - (raster_f0_01_c0_1 > raster_f0_01_c0_1_nearest.max()) * 1)\n", + "plt.colorbar()\n", + "plt.title('DFC high noise minus Cubic low noise')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Very low correlation between our raster and the DFC high noise.\n", + "\n", + "### Filter low and high interpolated values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "raster_f0_01_c0_1_postprocess = np.clip(raster_f0_01_c0_1, C3.intensity.min(), C3.intensity.max())\n", + "\n", + "plt.figure(figsize=figsize)\n", + "plt.imshow(raster_f0_01_c0_1_postprocess)\n", + "plt.colorbar()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TMP" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tmp = ra.rasterize_cache('intensity', C3, .5, 'cubic-clip', False, cache_dir='../Res/enrichment_rasters')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "C12 = ra.bulk_load(['../Data/lidar/C1', '../Data/lidar/C2'], 'C12', filter_treshold=1., clip_treshold=0.1, dtype=np.float32)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Notebooks/YAML Serialization.ipynb b/Notebooks/YAML Serialization.ipynb index f68222b..3e6ddc8 100644 --- a/Notebooks/YAML Serialization.ipynb +++ b/Notebooks/YAML Serialization.ipynb @@ -13,9 +13,9 @@ "- [X] Time metrics\n", "- [X] Result metrics\n", "- [X] Write metrics\n", - "- [ ] Write/move results\n", - "- [ ] Watch folder\n", - "- [ ] Main loop\n", + "- [X] Write/move results\n", + "- [X] Watch folder\n", + "- [X] Main loop\n", "- [ ] Logs\n", "- [ ] Catch errors\n", "- [ ] Custom CVG\n", @@ -194,123 +194,15 @@ "def compute_descriptors(expe):\n", " \"\"\"Compute descriptors from a standard expe recipe\"\"\"\n", " script = expe['descriptors_script']\n", - " desc = importlib.import_module(script['name'], package=Descriptors)\n", + " desc = importlib.import_module(script['name'])\n", " #importlib.reload(Descriptors)\n", " att = desc.run(**script['parameters'])\n", " \n", " return att\n", "\n", "att = compute_descriptors(expe)\n", - "kronos.time('description')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def cast_expand_i(X, dtype):\n", - " return ((X - X.min()) / (X.max() - X.min()) * np.iinfo(dtype).max).astype(dtype)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "att.shape, att.dtype" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "att = cast_expand_i(att, np.uint8)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "res = np.var(att, axis=-1)\n", - "res.shape, res.dtype" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "view = " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "(res - res.min()) / (res.max() - res.min())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "resd = ((res - res.min()) / (res.max() - res.min()) * np.iinfo(np.uint16).max).astype(np.uint16, casting='unsafe')\n", - "\n", - "plt.figure(figsize=(40,6))\n", - "plt.imshow(resd)\n", - "plt.colorbar()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "resd = res.astype(att.dtype, casting='unsafe')\n", - "\n", - "plt.figure(figsize=(40,6))\n", - "plt.imshow(resd)\n", - "plt.colorbar()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.imsave('../Res/glitch.png', resd)" + "kronos.time('description')\n", + "att.shape" ] }, { @@ -336,17 +228,23 @@ " cv = expe['cross_validation']\n", " cl = expe['classifier']\n", "\n", + " cross_val = getattr(importlib.import_module(cv['package']), cv['name'])\n", + " classifier = getattr(importlib.import_module(cl['package']), cl['name'])\n", + " \n", " prediction = np.zeros_like(gt)\n", "\n", - " for xt, xv, yt, yv, ti in APsCVG(gt, att, **cv['parameters']):\n", - " rfc = RandomForestClassifier(**cl['parameters'])\n", + " for xt, xv, yt, yv, ti in cross_val(gt, att, **cv['parameters']):\n", + " rfc = classifier(**cl['parameters'])\n", " rfc.fit(xt, yt)\n", "\n", " ypred = rfc.predict(xv)\n", "\n", " prediction[ti] = ypred\n", " \n", - " return prediction" + " return prediction\n", + "\n", + "classification = compute_classification(expe, att)\n", + "kronos.time('classification')" ] }, { @@ -355,8 +253,25 @@ "metadata": {}, "outputs": [], "source": [ - "classification = compute_classification(expe, att)\n", - "kronos.time('classification')" + "import sklearn.ensemble.RandomForestClassifier" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "importlib.import_module('RandomForestClassifier', package='sklearn.ensemble')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "getattr(importlib.import_module('sklearn.ensemble'), 'RandomForestClassifier')" ] }, { @@ -704,6 +619,97 @@ "E / ('qwer' + '.tif')" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "P = Path('../Enrichment/Tests/')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "len([f for f in P.iterdir()])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "list(P.glob('*_checkpointwes'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if not []:\n", + " print('yay')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "l = list()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "E = Exception('Nonte')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "str(E.with_traceback())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "len(I)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "I[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "yaml.dump({'test': 'I dont care\\\\n lel'}, open('../bdq.yml', 'w'))" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/logger.py b/logger.py new file mode 100644 index 0000000..14441fa --- /dev/null +++ b/logger.py @@ -0,0 +1,51 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# \file %filename%.py +# \brief TODO +# \author Florent Guiotte +# \version 0.1 +# \date 24 avril 2018 +# +# from https://fangpenlin.com/posts/2012/08/26/good-logging-practice-in-python/ + +import os +import logging.config +from pathlib import Path + +import yaml + +def setup_logging( + default_path='logging.yaml', + default_level=logging.WARN, + env_key='LOG_CFG' +): + """Setup logging configuration + + """ + path = default_path + value = os.getenv(env_key, None) + if value: + path = value + if os.path.exists(path): + with open(path, 'rt') as f: + config = yaml.safe_load(f.read()) + makedirs(config) + logging.config.dictConfig(config) + else: + logging.basicConfig(level=default_level) + + +def makedirs(dic): + files = finddirs(dic) + for f in files: + d = Path(*f.parts[:-1]) + d.mkdir(parents=True, exist_ok=True) + +def finddirs(dic, key='filename'): + r = list() + value = dic.get(key) + if value : r.append(Path(value)) + for k, v in dic.items(): + if isinstance(v, dict): + r.extend(finddirs(v)) + return r diff --git a/logging.yaml b/logging.yaml index cf50544..b18e6ed 100644 --- a/logging.yaml +++ b/logging.yaml @@ -7,7 +7,7 @@ formatters: handlers: console: class: logging.StreamHandler - level: DEBUG + level: INFO formatter: simple stream: ext://sys.stdout diff --git a/supervisor.py b/supervisor.py index 3c243a6..daad0fd 100644 --- a/supervisor.py +++ b/supervisor.py @@ -20,28 +20,44 @@ import datetime from sklearn import metrics from pathlib import Path from operator import itemgetter +import traceback from sklearn.ensemble import RandomForestClassifier -#sys.path.append('.') -import Descriptors -from CrossValidationGenerator import APsCVG - sys.path.append('./triskele/python') import triskele +import logging +import logger -### Keep yaml ordered +log = logging.getLogger('Supervisor [{}]'.format(os.uname()[1])) + +### Keep yaml ordered, newline string def setup_yaml(): - """ https://stackoverflow.com/a/8661021 """ - represent_dict_order = lambda self, data: self.represent_mapping('tag:yaml.org,2002:map', data.items()) - yaml.add_representer(OrderedDict, represent_dict_order) + """ https://stackoverflow.com/a/8661021 """ + represent_dict_order = lambda self, data: self.represent_mapping('tag:yaml.org,2002:map', data.items()) + yaml.add_representer(OrderedDict, represent_dict_order) + + """ https://stackoverflow.com/a/24291536 """ + yaml.Dumper.org_represent_str = yaml.Dumper.represent_str + yaml.add_representer(str, repr_str, Dumper=yaml.Dumper) + +def repr_str(dumper, data): + if '\n' in data: + return dumper.represent_scalar(u'tag:yaml.org,2002:str', data, style='|') + return dumper.org_represent_str(data) + + setup_yaml() enrichment_dir = Path('./Enrichment/') test_dir = enrichment_dir / 'Tests' staging_dir = enrichment_dir / 'Staging' result_dir = enrichment_dir / 'Results' +failed_dir = enrichment_dir / 'Failed' + +class TestError(Exception): + pass def update_queue(): tmp_queue = list() @@ -60,6 +76,7 @@ def get_priority(yml_file): def run(expe_file): + log.info('Run test {}'.format(expe_file)) with open(expe_file) as f: expe = OrderedDict(yaml.safe_load(f)['expe']) @@ -67,6 +84,7 @@ def run(expe_file): kronos = Kronos() ### Compute hashes + log.info('Computing hashes') expe_hashes = compute_hashes(expe) ### Create output names @@ -78,32 +96,67 @@ def run(expe_file): expe_report = create_report(kronos) ### Stage expe + log.info('Staging test') write_expe_file(staging_dir / oname_yml, expe, expe_hashes, expe_report) expe_file.unlink() ### Compute descriptors - descriptors = compute_descriptors(expe) + log.info('Compute descriptors') + try: + descriptors = compute_descriptors(expe) + except Exception as e: + kronos.time('description') + expe_report = create_report(kronos) + (staging_dir / oname_yml).unlink() + write_error(failed_dir / oname_yml, expe, expe_hashes, expe_report, 'description', e) + raise TestError('Error occured during description') + kronos.time('description') ### Compute classification - classification = compute_classification(expe, descriptors) + log.info('Classify data') + try: + classification = compute_classification(expe, descriptors) + except Exception as e: + kronos.time('classification') + expe_report = create_report(kronos) + (staging_dir / oname_yml).unlink() + write_error(failed_dir / oname_yml, expe, expe_hashes, expe_report, 'classification', e) + raise TestError('Error occured during classification') + kronos.time('classification') ### Metrics - metrics = run_metrics(expe, classification) + log.info('Run initial metrics') + metrics = run_metrics(expe, classification, descriptors) kronos.time('metrics') ### Create complete report + log.info('Write complete report') expe_report = create_report(kronos) - (staging_dir / oname_yml).unlink() ### Name and write prediction triskele.write(result_dir / oname_tif, classification) ### Write report and results + (staging_dir / oname_yml).unlink() write_expe_file(result_dir / oname_yml, expe, expe_hashes, expe_report, oname_tif, metrics) + + log.info('Test complete') +def write_error(file, expe, hashes=None, report=None, when='', e=Exception): + error = OrderedDict() + error['when'] = when + error['what'] = str(e) + error['where'] = traceback.format_exc() + with open(file, 'w') as of: + yaml.dump(OrderedDict({'expe': expe, + 'expe_hashes': hashes, + 'expe_report': report, + 'expe_error': error}), + of, default_flow_style=False, encoding=None, allow_unicode=True) + def write_expe_file(file, expe, hashes=None, report=None, classification=None, results=None): with open(file, 'w') as of: yaml.dump(OrderedDict({'expe': expe, @@ -130,7 +183,7 @@ def compute_hashes(expe): def compute_descriptors(expe): """Compute descriptors from a standard expe recipe""" script = expe['descriptors_script'] - desc = importlib.import_module(script['name'], package=Descriptors) + desc = importlib.import_module(script['name']) #importlib.reload(Descriptors) att = desc.run(**script['parameters']) @@ -147,10 +200,13 @@ def compute_classification(expe, descriptors): cv = expe['cross_validation'] cl = expe['classifier'] + cross_val = getattr(importlib.import_module(cv['package']), cv['name']) + classifier = getattr(importlib.import_module(cl['package']), cl['name']) + prediction = np.zeros_like(gt) - for xt, xv, yt, yv, ti in APsCVG(gt, descriptors, **cv['parameters']): - rfc = RandomForestClassifier(**cl['parameters']) + for xt, xv, yt, yv, ti in cross_val(gt, descriptors, **cv['parameters']): + rfc = classifier(**cl['parameters']) rfc.fit(xt, yt) ypred = rfc.predict(xv) @@ -160,25 +216,26 @@ def compute_classification(expe, descriptors): return prediction -def compute_metrics(ground_truth, classification): +def compute_metrics(ground_truth, classification, descriptors): """Return dict of metrics for ground_truth and classification prediction in parameters""" f = np.nonzero(classification) pred = classification[f].ravel() gt = ground_truth[f].ravel() results = OrderedDict() + results['dimension'] = descriptors.shape[-1] results['overall_accuracy'] = float(metrics.accuracy_score(gt, pred)) results['cohen_kappa'] = float(metrics.cohen_kappa_score(gt, pred)) return results -def run_metrics(expe, classification): +def run_metrics(expe, classification, descriptors): """Compute the metrics from a standard expe recipe and an given classification""" ### Extensible: meta-classes gt = triskele.read(expe['ground_truth']) - return compute_metrics(gt, classification) + return compute_metrics(gt, classification, descriptors) def create_report(kronos): @@ -219,24 +276,33 @@ class Kronos(object): def watch_folder(): - time.sleep(10) - + log.info('Waiting for test') + while not list(test_dir.glob('*.yml')): + time.sleep(10) def main(): while(True): try: queue = update_queue() except Exception: - print('ERROR: while updating work queue. Resuming.') - continue + log.error('Critical exception while updating work queue') + log.error(traceback.format_exc()) + log.warning('Resuming') + continue if not queue: watch_folder() continue try: run(queue.pop()['expe_file']) + except TestError: + log.warning('Test failed, error logged. Resuming') except Exception: - print('ERROR: while running test. Resuming.') + log.error('Critical exception while running test. Resuming') + log.error(traceback.format_exc()) + log.warning('Resuming') continue if __name__ == '__main__': - main() \ No newline at end of file + logger.setup_logging() + log.info('Starting supervisor') + main()