Refactor Supervisor with logs and error management

This commit is contained in:
Florent Guiotte 2018-08-31 18:33:05 +02:00
parent 62cac9f534
commit 22399f9618
7 changed files with 932 additions and 144 deletions

1
CVGenerators/__init__.py Normal file
View File

@ -0,0 +1 @@
from .CrossValidationGenerator import CVG_legacy, APsCVG

View File

@ -0,0 +1,664 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"sys.path.append(\"..\")\n",
"import rasterizer\n",
"import raster_assistant as ra\n",
"\n",
"sys.path.append('../triskele/python/')\n",
"import triskele\n",
"\n",
"figsize = np.array((16, 3)) * 1.5"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Tresholds for Custom Raster from DFC LiDAR data\n",
"\n",
"Compare our results with the DFC rasters and set the tresholds for the raster factory.\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load DFC raster"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dfc_raster = triskele.read('../Data/phase1_rasters/Intensity_C3/UH17_GI3F051_TR.tif')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(dfc_raster)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The raster from DFC dataset are noised with high value noise. We need to filter high values. We empirically set the treshold to 1e4."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"np.clip(dfc_raster, dfc_raster.min(), 1e4, out=dfc_raster)\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(dfc_raster)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Set filtering and clipping treshold to process rasters from LiDAR"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load data without filtering or clipping"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"C3 = ra.bulk_load('../Data/lidar/C3', 'C3', filter_treshold=0, clip_treshold=0, dtype=np.float32)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we process the raster with the same resolution and a nearest interpolation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_f0_c0 = ra.rasterize_cache('intensity', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_c0)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We also have high value noise, but far better than the DFC noise."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load data without filtering and minimal clipping"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"C3 = ra.bulk_load('../Data/lidar/C3', 'C3', filter_treshold=0, clip_treshold=0.01, dtype=np.float32)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we process the raster with the same resolution and a nearest interpolation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_f0_c0_01 = ra.rasterize_cache('intensity', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_c0_01)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Clipping does not remove unwanted high value noise."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load data with minimal filtering and no clipping"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"C3 = ra.bulk_load('../Data/lidar/C1', 'C3', filter_treshold=0.01, clip_treshold=0, dtype=np.float32)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we process the raster with the same resolution and a nearest interpolation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_f0_01_c0 = ra.rasterize_cache('intensity', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_01_c0)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Filtering remove high value noise, but the tone mapping is bad (too dark)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load data with filtering and no clipping"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"C3 = ra.bulk_load('../Data/lidar/C3', 'C3', filter_treshold=0.1, clip_treshold=0, dtype=np.float32)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we process the raster with the same resolution and a nearest interpolation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_f0_1_c0 = ra.rasterize_cache('intensity', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_1_c0)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The tone mapping is correct, but interpolation artifacts appears where too much points are removed from filtering (e.g. in the stadium)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load data without filtering and with clipping"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"C3 = ra.bulk_load('../Data/lidar/C3', 'C3', filter_treshold=0, clip_treshold=0.1, dtype=np.float32)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we process the raster with the same resolution and a nearest interpolation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_f0_c0_1 = ra.rasterize_cache('intensity', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_c0_1)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The tone map is correct, no interpolation artifact but high noise sparkle the result."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load data with minimal filtering and minimal clipping"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"C3 = ra.bulk_load('../Data/lidar/C1', 'C3', filter_treshold=0.01, clip_treshold=0.01, dtype=np.float32)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we process the raster with the same resolution and a nearest interpolation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_f0_01_c0_01 = ra.rasterize_cache('intensity', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_01_c0_01)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The tone map is not correct."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load data with minimal filtering and normal clipping"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"C3 = ra.bulk_load('../Data/lidar/C3', 'C3', filter_treshold=0.2, clip_treshold=0.1, dtype=np.float32)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we process the raster with the same resolution and a nearest interpolation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_f0_01_c0_1 = ra.rasterize_cache('z', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_01_c0_1)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The tone map is correct, no interpolation artifact and low high noise in the result. We will now on choose "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load C123 data with minimal filtering and normal clipping"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"f = (0.01 + 0.01 + 0.2) / 3\n",
"f"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"C123 = ra.bulk_load('../Data/lidar/', 'C123', filter_treshold=0.08, clip_treshold=0.1, dtype=np.float32)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we process the raster with the same resolution and a nearest interpolation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_C123 = ra.rasterize_cache('z', C123, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_01_c0_1)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The tone map is correct, no interpolation artifact and low high noise in the result. We will now on choose "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Compare interpolation method\n",
"\n",
"### Nearest neighbour"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_f0_01_c0_1_nearest = ra.rasterize_cache('intensity', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_01_c0_1_nearest)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Linear interpolation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_f0_01_c0_1 = ra.rasterize_cache('intensity', C3, .5, 'linear', False, cache_dir='../Res/enrichment_rasters')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_01_c0_1)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Cubic interpolation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_f0_01_c0_1 = ra.rasterize_cache('intensity', C3, .5, 'cubic', False, cache_dir='../Res/enrichment_rasters')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_01_c0_1)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The cubic interpolation seems to create negative values, maybe at the same spots of the DFC high noise ?"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=figsize)\n",
"plt.imshow((raster_f0_01_c0_1 < 0) * 1.)\n",
"plt.colorbar()\n",
"plt.title('Cubic low noise')\n",
"plt.show()\n",
"\n",
"dfc_raster_raw = triskele.read('../Data/phase1_rasters/Intensity_C1/UH17_GI1F051_TR.tif')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow((dfc_raster_raw > 1e4) * 1.)\n",
"plt.colorbar()\n",
"plt.title('DFC high noise')\n",
"plt.show()\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(np.logical_and((dfc_raster_raw > 1e4), (raster_f0_01_c0_1 < 0)) * 1)\n",
"plt.colorbar()\n",
"plt.title('DFC high noise and Cubic low noise')\n",
"plt.show()\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow((dfc_raster_raw > 1e4) * 1 - (raster_f0_01_c0_1 < 0) * 1)\n",
"plt.colorbar()\n",
"plt.title('DFC high noise minus Cubic low noise')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Numerous common noise pixel between DFC noise and our cubic interpolation.\n",
"\n",
"Let's try with our high noise."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=figsize)\n",
"plt.imshow((raster_f0_01_c0_1 > raster_f0_01_c0_1_nearest.max()) * 1.)\n",
"plt.colorbar()\n",
"plt.title('Cubic high noise')\n",
"plt.show()\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow((dfc_raster_raw > 1e4) * 1.)\n",
"plt.colorbar()\n",
"plt.title('DFC high noise')\n",
"plt.show()\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(np.logical_and((dfc_raster_raw > 1e4), (raster_f0_01_c0_1 > raster_f0_01_c0_1_nearest.max())) * 1)\n",
"plt.colorbar()\n",
"plt.title('DFC high noise and Cubic low noise')\n",
"plt.show()\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow((dfc_raster_raw > 1e4) * 1 - (raster_f0_01_c0_1 > raster_f0_01_c0_1_nearest.max()) * 1)\n",
"plt.colorbar()\n",
"plt.title('DFC high noise minus Cubic low noise')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Very low correlation between our raster and the DFC high noise.\n",
"\n",
"### Filter low and high interpolated values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_f0_01_c0_1_postprocess = np.clip(raster_f0_01_c0_1, C3.intensity.min(), C3.intensity.max())\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_01_c0_1_postprocess)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# TMP"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tmp = ra.rasterize_cache('intensity', C3, .5, 'cubic-clip', False, cache_dir='../Res/enrichment_rasters')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"C12 = ra.bulk_load(['../Data/lidar/C1', '../Data/lidar/C2'], 'C12', filter_treshold=1., clip_treshold=0.1, dtype=np.float32)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -13,9 +13,9 @@
"- [X] Time metrics\n",
"- [X] Result metrics\n",
"- [X] Write metrics\n",
"- [ ] Write/move results\n",
"- [ ] Watch folder\n",
"- [ ] Main loop\n",
"- [X] Write/move results\n",
"- [X] Watch folder\n",
"- [X] Main loop\n",
"- [ ] Logs\n",
"- [ ] Catch errors\n",
"- [ ] Custom CVG\n",
@ -194,123 +194,15 @@
"def compute_descriptors(expe):\n",
" \"\"\"Compute descriptors from a standard expe recipe\"\"\"\n",
" script = expe['descriptors_script']\n",
" desc = importlib.import_module(script['name'], package=Descriptors)\n",
" desc = importlib.import_module(script['name'])\n",
" #importlib.reload(Descriptors)\n",
" att = desc.run(**script['parameters'])\n",
" \n",
" return att\n",
"\n",
"att = compute_descriptors(expe)\n",
"kronos.time('description')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def cast_expand_i(X, dtype):\n",
" return ((X - X.min()) / (X.max() - X.min()) * np.iinfo(dtype).max).astype(dtype)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"att.shape, att.dtype"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"att = cast_expand_i(att, np.uint8)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"res = np.var(att, axis=-1)\n",
"res.shape, res.dtype"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"view = "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"(res - res.min()) / (res.max() - res.min())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"resd = ((res - res.min()) / (res.max() - res.min()) * np.iinfo(np.uint16).max).astype(np.uint16, casting='unsafe')\n",
"\n",
"plt.figure(figsize=(40,6))\n",
"plt.imshow(resd)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"resd = res.astype(att.dtype, casting='unsafe')\n",
"\n",
"plt.figure(figsize=(40,6))\n",
"plt.imshow(resd)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.imsave('../Res/glitch.png', resd)"
"kronos.time('description')\n",
"att.shape"
]
},
{
@ -336,17 +228,23 @@
" cv = expe['cross_validation']\n",
" cl = expe['classifier']\n",
"\n",
" cross_val = getattr(importlib.import_module(cv['package']), cv['name'])\n",
" classifier = getattr(importlib.import_module(cl['package']), cl['name'])\n",
" \n",
" prediction = np.zeros_like(gt)\n",
"\n",
" for xt, xv, yt, yv, ti in APsCVG(gt, att, **cv['parameters']):\n",
" rfc = RandomForestClassifier(**cl['parameters'])\n",
" for xt, xv, yt, yv, ti in cross_val(gt, att, **cv['parameters']):\n",
" rfc = classifier(**cl['parameters'])\n",
" rfc.fit(xt, yt)\n",
"\n",
" ypred = rfc.predict(xv)\n",
"\n",
" prediction[ti] = ypred\n",
" \n",
" return prediction"
" return prediction\n",
"\n",
"classification = compute_classification(expe, att)\n",
"kronos.time('classification')"
]
},
{
@ -355,8 +253,25 @@
"metadata": {},
"outputs": [],
"source": [
"classification = compute_classification(expe, att)\n",
"kronos.time('classification')"
"import sklearn.ensemble.RandomForestClassifier"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"importlib.import_module('RandomForestClassifier', package='sklearn.ensemble')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"getattr(importlib.import_module('sklearn.ensemble'), 'RandomForestClassifier')"
]
},
{
@ -704,6 +619,97 @@
"E / ('qwer' + '.tif')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"P = Path('../Enrichment/Tests/')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len([f for f in P.iterdir()])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"list(P.glob('*_checkpointwes'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"if not []:\n",
" print('yay')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"l = list()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"E = Exception('Nonte')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"str(E.with_traceback())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(I)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"I[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"yaml.dump({'test': 'I dont care\\\\n lel'}, open('../bdq.yml', 'w'))"
]
},
{
"cell_type": "code",
"execution_count": null,

51
logger.py Normal file
View File

@ -0,0 +1,51 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# \file %filename%.py
# \brief TODO
# \author Florent Guiotte <florent.guiotte@gmail.com>
# \version 0.1
# \date 24 avril 2018
#
# from https://fangpenlin.com/posts/2012/08/26/good-logging-practice-in-python/
import os
import logging.config
from pathlib import Path
import yaml
def setup_logging(
default_path='logging.yaml',
default_level=logging.WARN,
env_key='LOG_CFG'
):
"""Setup logging configuration
"""
path = default_path
value = os.getenv(env_key, None)
if value:
path = value
if os.path.exists(path):
with open(path, 'rt') as f:
config = yaml.safe_load(f.read())
makedirs(config)
logging.config.dictConfig(config)
else:
logging.basicConfig(level=default_level)
def makedirs(dic):
files = finddirs(dic)
for f in files:
d = Path(*f.parts[:-1])
d.mkdir(parents=True, exist_ok=True)
def finddirs(dic, key='filename'):
r = list()
value = dic.get(key)
if value : r.append(Path(value))
for k, v in dic.items():
if isinstance(v, dict):
r.extend(finddirs(v))
return r

View File

@ -7,7 +7,7 @@ formatters:
handlers:
console:
class: logging.StreamHandler
level: DEBUG
level: INFO
formatter: simple
stream: ext://sys.stdout

View File

@ -20,28 +20,44 @@ import datetime
from sklearn import metrics
from pathlib import Path
from operator import itemgetter
import traceback
from sklearn.ensemble import RandomForestClassifier
#sys.path.append('.')
import Descriptors
from CrossValidationGenerator import APsCVG
sys.path.append('./triskele/python')
import triskele
import logging
import logger
### Keep yaml ordered
log = logging.getLogger('Supervisor [{}]'.format(os.uname()[1]))
### Keep yaml ordered, newline string
def setup_yaml():
""" https://stackoverflow.com/a/8661021 """
represent_dict_order = lambda self, data: self.represent_mapping('tag:yaml.org,2002:map', data.items())
yaml.add_representer(OrderedDict, represent_dict_order)
""" https://stackoverflow.com/a/8661021 """
represent_dict_order = lambda self, data: self.represent_mapping('tag:yaml.org,2002:map', data.items())
yaml.add_representer(OrderedDict, represent_dict_order)
""" https://stackoverflow.com/a/24291536 """
yaml.Dumper.org_represent_str = yaml.Dumper.represent_str
yaml.add_representer(str, repr_str, Dumper=yaml.Dumper)
def repr_str(dumper, data):
if '\n' in data:
return dumper.represent_scalar(u'tag:yaml.org,2002:str', data, style='|')
return dumper.org_represent_str(data)
setup_yaml()
enrichment_dir = Path('./Enrichment/')
test_dir = enrichment_dir / 'Tests'
staging_dir = enrichment_dir / 'Staging'
result_dir = enrichment_dir / 'Results'
failed_dir = enrichment_dir / 'Failed'
class TestError(Exception):
pass
def update_queue():
tmp_queue = list()
@ -60,6 +76,7 @@ def get_priority(yml_file):
def run(expe_file):
log.info('Run test {}'.format(expe_file))
with open(expe_file) as f:
expe = OrderedDict(yaml.safe_load(f)['expe'])
@ -67,6 +84,7 @@ def run(expe_file):
kronos = Kronos()
### Compute hashes
log.info('Computing hashes')
expe_hashes = compute_hashes(expe)
### Create output names
@ -78,32 +96,67 @@ def run(expe_file):
expe_report = create_report(kronos)
### Stage expe
log.info('Staging test')
write_expe_file(staging_dir / oname_yml, expe, expe_hashes, expe_report)
expe_file.unlink()
### Compute descriptors
descriptors = compute_descriptors(expe)
log.info('Compute descriptors')
try:
descriptors = compute_descriptors(expe)
except Exception as e:
kronos.time('description')
expe_report = create_report(kronos)
(staging_dir / oname_yml).unlink()
write_error(failed_dir / oname_yml, expe, expe_hashes, expe_report, 'description', e)
raise TestError('Error occured during description')
kronos.time('description')
### Compute classification
classification = compute_classification(expe, descriptors)
log.info('Classify data')
try:
classification = compute_classification(expe, descriptors)
except Exception as e:
kronos.time('classification')
expe_report = create_report(kronos)
(staging_dir / oname_yml).unlink()
write_error(failed_dir / oname_yml, expe, expe_hashes, expe_report, 'classification', e)
raise TestError('Error occured during classification')
kronos.time('classification')
### Metrics
metrics = run_metrics(expe, classification)
log.info('Run initial metrics')
metrics = run_metrics(expe, classification, descriptors)
kronos.time('metrics')
### Create complete report
log.info('Write complete report')
expe_report = create_report(kronos)
(staging_dir / oname_yml).unlink()
### Name and write prediction
triskele.write(result_dir / oname_tif, classification)
### Write report and results
(staging_dir / oname_yml).unlink()
write_expe_file(result_dir / oname_yml, expe, expe_hashes, expe_report, oname_tif, metrics)
log.info('Test complete')
def write_error(file, expe, hashes=None, report=None, when='', e=Exception):
error = OrderedDict()
error['when'] = when
error['what'] = str(e)
error['where'] = traceback.format_exc()
with open(file, 'w') as of:
yaml.dump(OrderedDict({'expe': expe,
'expe_hashes': hashes,
'expe_report': report,
'expe_error': error}),
of, default_flow_style=False, encoding=None, allow_unicode=True)
def write_expe_file(file, expe, hashes=None, report=None, classification=None, results=None):
with open(file, 'w') as of:
yaml.dump(OrderedDict({'expe': expe,
@ -130,7 +183,7 @@ def compute_hashes(expe):
def compute_descriptors(expe):
"""Compute descriptors from a standard expe recipe"""
script = expe['descriptors_script']
desc = importlib.import_module(script['name'], package=Descriptors)
desc = importlib.import_module(script['name'])
#importlib.reload(Descriptors)
att = desc.run(**script['parameters'])
@ -147,10 +200,13 @@ def compute_classification(expe, descriptors):
cv = expe['cross_validation']
cl = expe['classifier']
cross_val = getattr(importlib.import_module(cv['package']), cv['name'])
classifier = getattr(importlib.import_module(cl['package']), cl['name'])
prediction = np.zeros_like(gt)
for xt, xv, yt, yv, ti in APsCVG(gt, descriptors, **cv['parameters']):
rfc = RandomForestClassifier(**cl['parameters'])
for xt, xv, yt, yv, ti in cross_val(gt, descriptors, **cv['parameters']):
rfc = classifier(**cl['parameters'])
rfc.fit(xt, yt)
ypred = rfc.predict(xv)
@ -160,25 +216,26 @@ def compute_classification(expe, descriptors):
return prediction
def compute_metrics(ground_truth, classification):
def compute_metrics(ground_truth, classification, descriptors):
"""Return dict of metrics for ground_truth and classification prediction in parameters"""
f = np.nonzero(classification)
pred = classification[f].ravel()
gt = ground_truth[f].ravel()
results = OrderedDict()
results['dimension'] = descriptors.shape[-1]
results['overall_accuracy'] = float(metrics.accuracy_score(gt, pred))
results['cohen_kappa'] = float(metrics.cohen_kappa_score(gt, pred))
return results
def run_metrics(expe, classification):
def run_metrics(expe, classification, descriptors):
"""Compute the metrics from a standard expe recipe and an given classification"""
### Extensible: meta-classes
gt = triskele.read(expe['ground_truth'])
return compute_metrics(gt, classification)
return compute_metrics(gt, classification, descriptors)
def create_report(kronos):
@ -219,24 +276,33 @@ class Kronos(object):
def watch_folder():
time.sleep(10)
log.info('Waiting for test')
while not list(test_dir.glob('*.yml')):
time.sleep(10)
def main():
while(True):
try:
queue = update_queue()
except Exception:
print('ERROR: while updating work queue. Resuming.')
continue
log.error('Critical exception while updating work queue')
log.error(traceback.format_exc())
log.warning('Resuming')
continue
if not queue:
watch_folder()
continue
try:
run(queue.pop()['expe_file'])
except TestError:
log.warning('Test failed, error logged. Resuming')
except Exception:
print('ERROR: while running test. Resuming.')
log.error('Critical exception while running test. Resuming')
log.error(traceback.format_exc())
log.warning('Resuming')
continue
if __name__ == '__main__':
main()
logger.setup_logging()
log.info('Starting supervisor')
main()