Refactor Supervisor with logs and error management

This commit is contained in:
Florent Guiotte 2018-08-31 18:33:05 +02:00
parent 62cac9f534
commit 22399f9618
7 changed files with 932 additions and 144 deletions

1
CVGenerators/__init__.py Normal file
View File

@ -0,0 +1 @@
from .CrossValidationGenerator import CVG_legacy, APsCVG

View File

@ -0,0 +1,664 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"sys.path.append(\"..\")\n",
"import rasterizer\n",
"import raster_assistant as ra\n",
"\n",
"sys.path.append('../triskele/python/')\n",
"import triskele\n",
"\n",
"figsize = np.array((16, 3)) * 1.5"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Tresholds for Custom Raster from DFC LiDAR data\n",
"\n",
"Compare our results with the DFC rasters and set the tresholds for the raster factory.\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load DFC raster"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dfc_raster = triskele.read('../Data/phase1_rasters/Intensity_C3/UH17_GI3F051_TR.tif')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(dfc_raster)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The raster from DFC dataset are noised with high value noise. We need to filter high values. We empirically set the treshold to 1e4."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"np.clip(dfc_raster, dfc_raster.min(), 1e4, out=dfc_raster)\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(dfc_raster)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Set filtering and clipping treshold to process rasters from LiDAR"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load data without filtering or clipping"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"C3 = ra.bulk_load('../Data/lidar/C3', 'C3', filter_treshold=0, clip_treshold=0, dtype=np.float32)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we process the raster with the same resolution and a nearest interpolation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_f0_c0 = ra.rasterize_cache('intensity', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_c0)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We also have high value noise, but far better than the DFC noise."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load data without filtering and minimal clipping"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"C3 = ra.bulk_load('../Data/lidar/C3', 'C3', filter_treshold=0, clip_treshold=0.01, dtype=np.float32)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we process the raster with the same resolution and a nearest interpolation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_f0_c0_01 = ra.rasterize_cache('intensity', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_c0_01)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Clipping does not remove unwanted high value noise."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load data with minimal filtering and no clipping"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"C3 = ra.bulk_load('../Data/lidar/C1', 'C3', filter_treshold=0.01, clip_treshold=0, dtype=np.float32)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we process the raster with the same resolution and a nearest interpolation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_f0_01_c0 = ra.rasterize_cache('intensity', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_01_c0)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Filtering remove high value noise, but the tone mapping is bad (too dark)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load data with filtering and no clipping"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"C3 = ra.bulk_load('../Data/lidar/C3', 'C3', filter_treshold=0.1, clip_treshold=0, dtype=np.float32)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we process the raster with the same resolution and a nearest interpolation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_f0_1_c0 = ra.rasterize_cache('intensity', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_1_c0)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The tone mapping is correct, but interpolation artifacts appears where too much points are removed from filtering (e.g. in the stadium)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load data without filtering and with clipping"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"C3 = ra.bulk_load('../Data/lidar/C3', 'C3', filter_treshold=0, clip_treshold=0.1, dtype=np.float32)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we process the raster with the same resolution and a nearest interpolation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_f0_c0_1 = ra.rasterize_cache('intensity', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_c0_1)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The tone map is correct, no interpolation artifact but high noise sparkle the result."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load data with minimal filtering and minimal clipping"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"C3 = ra.bulk_load('../Data/lidar/C1', 'C3', filter_treshold=0.01, clip_treshold=0.01, dtype=np.float32)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we process the raster with the same resolution and a nearest interpolation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_f0_01_c0_01 = ra.rasterize_cache('intensity', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_01_c0_01)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The tone map is not correct."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load data with minimal filtering and normal clipping"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"C3 = ra.bulk_load('../Data/lidar/C3', 'C3', filter_treshold=0.2, clip_treshold=0.1, dtype=np.float32)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we process the raster with the same resolution and a nearest interpolation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_f0_01_c0_1 = ra.rasterize_cache('z', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_01_c0_1)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The tone map is correct, no interpolation artifact and low high noise in the result. We will now on choose "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load C123 data with minimal filtering and normal clipping"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"f = (0.01 + 0.01 + 0.2) / 3\n",
"f"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"C123 = ra.bulk_load('../Data/lidar/', 'C123', filter_treshold=0.08, clip_treshold=0.1, dtype=np.float32)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we process the raster with the same resolution and a nearest interpolation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_C123 = ra.rasterize_cache('z', C123, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_01_c0_1)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The tone map is correct, no interpolation artifact and low high noise in the result. We will now on choose "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Compare interpolation method\n",
"\n",
"### Nearest neighbour"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_f0_01_c0_1_nearest = ra.rasterize_cache('intensity', C3, .5, 'nearest', False, cache_dir='../Res/enrichment_rasters')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_01_c0_1_nearest)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Linear interpolation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_f0_01_c0_1 = ra.rasterize_cache('intensity', C3, .5, 'linear', False, cache_dir='../Res/enrichment_rasters')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_01_c0_1)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Cubic interpolation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_f0_01_c0_1 = ra.rasterize_cache('intensity', C3, .5, 'cubic', False, cache_dir='../Res/enrichment_rasters')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_01_c0_1)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The cubic interpolation seems to create negative values, maybe at the same spots of the DFC high noise ?"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=figsize)\n",
"plt.imshow((raster_f0_01_c0_1 < 0) * 1.)\n",
"plt.colorbar()\n",
"plt.title('Cubic low noise')\n",
"plt.show()\n",
"\n",
"dfc_raster_raw = triskele.read('../Data/phase1_rasters/Intensity_C1/UH17_GI1F051_TR.tif')\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow((dfc_raster_raw > 1e4) * 1.)\n",
"plt.colorbar()\n",
"plt.title('DFC high noise')\n",
"plt.show()\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(np.logical_and((dfc_raster_raw > 1e4), (raster_f0_01_c0_1 < 0)) * 1)\n",
"plt.colorbar()\n",
"plt.title('DFC high noise and Cubic low noise')\n",
"plt.show()\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow((dfc_raster_raw > 1e4) * 1 - (raster_f0_01_c0_1 < 0) * 1)\n",
"plt.colorbar()\n",
"plt.title('DFC high noise minus Cubic low noise')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Numerous common noise pixel between DFC noise and our cubic interpolation.\n",
"\n",
"Let's try with our high noise."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=figsize)\n",
"plt.imshow((raster_f0_01_c0_1 > raster_f0_01_c0_1_nearest.max()) * 1.)\n",
"plt.colorbar()\n",
"plt.title('Cubic high noise')\n",
"plt.show()\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow((dfc_raster_raw > 1e4) * 1.)\n",
"plt.colorbar()\n",
"plt.title('DFC high noise')\n",
"plt.show()\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(np.logical_and((dfc_raster_raw > 1e4), (raster_f0_01_c0_1 > raster_f0_01_c0_1_nearest.max())) * 1)\n",
"plt.colorbar()\n",
"plt.title('DFC high noise and Cubic low noise')\n",
"plt.show()\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow((dfc_raster_raw > 1e4) * 1 - (raster_f0_01_c0_1 > raster_f0_01_c0_1_nearest.max()) * 1)\n",
"plt.colorbar()\n",
"plt.title('DFC high noise minus Cubic low noise')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Very low correlation between our raster and the DFC high noise.\n",
"\n",
"### Filter low and high interpolated values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raster_f0_01_c0_1_postprocess = np.clip(raster_f0_01_c0_1, C3.intensity.min(), C3.intensity.max())\n",
"\n",
"plt.figure(figsize=figsize)\n",
"plt.imshow(raster_f0_01_c0_1_postprocess)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# TMP"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tmp = ra.rasterize_cache('intensity', C3, .5, 'cubic-clip', False, cache_dir='../Res/enrichment_rasters')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"C12 = ra.bulk_load(['../Data/lidar/C1', '../Data/lidar/C2'], 'C12', filter_treshold=1., clip_treshold=0.1, dtype=np.float32)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -13,9 +13,9 @@
"- [X] Time metrics\n", "- [X] Time metrics\n",
"- [X] Result metrics\n", "- [X] Result metrics\n",
"- [X] Write metrics\n", "- [X] Write metrics\n",
"- [ ] Write/move results\n", "- [X] Write/move results\n",
"- [ ] Watch folder\n", "- [X] Watch folder\n",
"- [ ] Main loop\n", "- [X] Main loop\n",
"- [ ] Logs\n", "- [ ] Logs\n",
"- [ ] Catch errors\n", "- [ ] Catch errors\n",
"- [ ] Custom CVG\n", "- [ ] Custom CVG\n",
@ -194,123 +194,15 @@
"def compute_descriptors(expe):\n", "def compute_descriptors(expe):\n",
" \"\"\"Compute descriptors from a standard expe recipe\"\"\"\n", " \"\"\"Compute descriptors from a standard expe recipe\"\"\"\n",
" script = expe['descriptors_script']\n", " script = expe['descriptors_script']\n",
" desc = importlib.import_module(script['name'], package=Descriptors)\n", " desc = importlib.import_module(script['name'])\n",
" #importlib.reload(Descriptors)\n", " #importlib.reload(Descriptors)\n",
" att = desc.run(**script['parameters'])\n", " att = desc.run(**script['parameters'])\n",
" \n", " \n",
" return att\n", " return att\n",
"\n", "\n",
"att = compute_descriptors(expe)\n", "att = compute_descriptors(expe)\n",
"kronos.time('description')" "kronos.time('description')\n",
] "att.shape"
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def cast_expand_i(X, dtype):\n",
" return ((X - X.min()) / (X.max() - X.min()) * np.iinfo(dtype).max).astype(dtype)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"att.shape, att.dtype"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"att = cast_expand_i(att, np.uint8)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"res = np.var(att, axis=-1)\n",
"res.shape, res.dtype"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"view = "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"(res - res.min()) / (res.max() - res.min())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"resd = ((res - res.min()) / (res.max() - res.min()) * np.iinfo(np.uint16).max).astype(np.uint16, casting='unsafe')\n",
"\n",
"plt.figure(figsize=(40,6))\n",
"plt.imshow(resd)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"resd = res.astype(att.dtype, casting='unsafe')\n",
"\n",
"plt.figure(figsize=(40,6))\n",
"plt.imshow(resd)\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.imsave('../Res/glitch.png', resd)"
] ]
}, },
{ {
@ -336,17 +228,23 @@
" cv = expe['cross_validation']\n", " cv = expe['cross_validation']\n",
" cl = expe['classifier']\n", " cl = expe['classifier']\n",
"\n", "\n",
" cross_val = getattr(importlib.import_module(cv['package']), cv['name'])\n",
" classifier = getattr(importlib.import_module(cl['package']), cl['name'])\n",
" \n",
" prediction = np.zeros_like(gt)\n", " prediction = np.zeros_like(gt)\n",
"\n", "\n",
" for xt, xv, yt, yv, ti in APsCVG(gt, att, **cv['parameters']):\n", " for xt, xv, yt, yv, ti in cross_val(gt, att, **cv['parameters']):\n",
" rfc = RandomForestClassifier(**cl['parameters'])\n", " rfc = classifier(**cl['parameters'])\n",
" rfc.fit(xt, yt)\n", " rfc.fit(xt, yt)\n",
"\n", "\n",
" ypred = rfc.predict(xv)\n", " ypred = rfc.predict(xv)\n",
"\n", "\n",
" prediction[ti] = ypred\n", " prediction[ti] = ypred\n",
" \n", " \n",
" return prediction" " return prediction\n",
"\n",
"classification = compute_classification(expe, att)\n",
"kronos.time('classification')"
] ]
}, },
{ {
@ -355,8 +253,25 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"classification = compute_classification(expe, att)\n", "import sklearn.ensemble.RandomForestClassifier"
"kronos.time('classification')" ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"importlib.import_module('RandomForestClassifier', package='sklearn.ensemble')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"getattr(importlib.import_module('sklearn.ensemble'), 'RandomForestClassifier')"
] ]
}, },
{ {
@ -704,6 +619,97 @@
"E / ('qwer' + '.tif')" "E / ('qwer' + '.tif')"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"P = Path('../Enrichment/Tests/')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len([f for f in P.iterdir()])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"list(P.glob('*_checkpointwes'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"if not []:\n",
" print('yay')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"l = list()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"E = Exception('Nonte')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"str(E.with_traceback())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(I)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"I[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"yaml.dump({'test': 'I dont care\\\\n lel'}, open('../bdq.yml', 'w'))"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,

51
logger.py Normal file
View File

@ -0,0 +1,51 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# \file %filename%.py
# \brief TODO
# \author Florent Guiotte <florent.guiotte@gmail.com>
# \version 0.1
# \date 24 avril 2018
#
# from https://fangpenlin.com/posts/2012/08/26/good-logging-practice-in-python/
import os
import logging.config
from pathlib import Path
import yaml
def setup_logging(
default_path='logging.yaml',
default_level=logging.WARN,
env_key='LOG_CFG'
):
"""Setup logging configuration
"""
path = default_path
value = os.getenv(env_key, None)
if value:
path = value
if os.path.exists(path):
with open(path, 'rt') as f:
config = yaml.safe_load(f.read())
makedirs(config)
logging.config.dictConfig(config)
else:
logging.basicConfig(level=default_level)
def makedirs(dic):
files = finddirs(dic)
for f in files:
d = Path(*f.parts[:-1])
d.mkdir(parents=True, exist_ok=True)
def finddirs(dic, key='filename'):
r = list()
value = dic.get(key)
if value : r.append(Path(value))
for k, v in dic.items():
if isinstance(v, dict):
r.extend(finddirs(v))
return r

View File

@ -7,7 +7,7 @@ formatters:
handlers: handlers:
console: console:
class: logging.StreamHandler class: logging.StreamHandler
level: DEBUG level: INFO
formatter: simple formatter: simple
stream: ext://sys.stdout stream: ext://sys.stdout

View File

@ -20,28 +20,44 @@ import datetime
from sklearn import metrics from sklearn import metrics
from pathlib import Path from pathlib import Path
from operator import itemgetter from operator import itemgetter
import traceback
from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import RandomForestClassifier
#sys.path.append('.')
import Descriptors
from CrossValidationGenerator import APsCVG
sys.path.append('./triskele/python') sys.path.append('./triskele/python')
import triskele import triskele
import logging
import logger
### Keep yaml ordered log = logging.getLogger('Supervisor [{}]'.format(os.uname()[1]))
### Keep yaml ordered, newline string
def setup_yaml(): def setup_yaml():
""" https://stackoverflow.com/a/8661021 """ """ https://stackoverflow.com/a/8661021 """
represent_dict_order = lambda self, data: self.represent_mapping('tag:yaml.org,2002:map', data.items()) represent_dict_order = lambda self, data: self.represent_mapping('tag:yaml.org,2002:map', data.items())
yaml.add_representer(OrderedDict, represent_dict_order) yaml.add_representer(OrderedDict, represent_dict_order)
""" https://stackoverflow.com/a/24291536 """
yaml.Dumper.org_represent_str = yaml.Dumper.represent_str
yaml.add_representer(str, repr_str, Dumper=yaml.Dumper)
def repr_str(dumper, data):
if '\n' in data:
return dumper.represent_scalar(u'tag:yaml.org,2002:str', data, style='|')
return dumper.org_represent_str(data)
setup_yaml() setup_yaml()
enrichment_dir = Path('./Enrichment/') enrichment_dir = Path('./Enrichment/')
test_dir = enrichment_dir / 'Tests' test_dir = enrichment_dir / 'Tests'
staging_dir = enrichment_dir / 'Staging' staging_dir = enrichment_dir / 'Staging'
result_dir = enrichment_dir / 'Results' result_dir = enrichment_dir / 'Results'
failed_dir = enrichment_dir / 'Failed'
class TestError(Exception):
pass
def update_queue(): def update_queue():
tmp_queue = list() tmp_queue = list()
@ -60,6 +76,7 @@ def get_priority(yml_file):
def run(expe_file): def run(expe_file):
log.info('Run test {}'.format(expe_file))
with open(expe_file) as f: with open(expe_file) as f:
expe = OrderedDict(yaml.safe_load(f)['expe']) expe = OrderedDict(yaml.safe_load(f)['expe'])
@ -67,6 +84,7 @@ def run(expe_file):
kronos = Kronos() kronos = Kronos()
### Compute hashes ### Compute hashes
log.info('Computing hashes')
expe_hashes = compute_hashes(expe) expe_hashes = compute_hashes(expe)
### Create output names ### Create output names
@ -78,32 +96,67 @@ def run(expe_file):
expe_report = create_report(kronos) expe_report = create_report(kronos)
### Stage expe ### Stage expe
log.info('Staging test')
write_expe_file(staging_dir / oname_yml, expe, expe_hashes, expe_report) write_expe_file(staging_dir / oname_yml, expe, expe_hashes, expe_report)
expe_file.unlink() expe_file.unlink()
### Compute descriptors ### Compute descriptors
descriptors = compute_descriptors(expe) log.info('Compute descriptors')
try:
descriptors = compute_descriptors(expe)
except Exception as e:
kronos.time('description')
expe_report = create_report(kronos)
(staging_dir / oname_yml).unlink()
write_error(failed_dir / oname_yml, expe, expe_hashes, expe_report, 'description', e)
raise TestError('Error occured during description')
kronos.time('description') kronos.time('description')
### Compute classification ### Compute classification
classification = compute_classification(expe, descriptors) log.info('Classify data')
try:
classification = compute_classification(expe, descriptors)
except Exception as e:
kronos.time('classification')
expe_report = create_report(kronos)
(staging_dir / oname_yml).unlink()
write_error(failed_dir / oname_yml, expe, expe_hashes, expe_report, 'classification', e)
raise TestError('Error occured during classification')
kronos.time('classification') kronos.time('classification')
### Metrics ### Metrics
metrics = run_metrics(expe, classification) log.info('Run initial metrics')
metrics = run_metrics(expe, classification, descriptors)
kronos.time('metrics') kronos.time('metrics')
### Create complete report ### Create complete report
log.info('Write complete report')
expe_report = create_report(kronos) expe_report = create_report(kronos)
(staging_dir / oname_yml).unlink()
### Name and write prediction ### Name and write prediction
triskele.write(result_dir / oname_tif, classification) triskele.write(result_dir / oname_tif, classification)
### Write report and results ### Write report and results
(staging_dir / oname_yml).unlink()
write_expe_file(result_dir / oname_yml, expe, expe_hashes, expe_report, oname_tif, metrics) write_expe_file(result_dir / oname_yml, expe, expe_hashes, expe_report, oname_tif, metrics)
log.info('Test complete')
def write_error(file, expe, hashes=None, report=None, when='', e=Exception):
error = OrderedDict()
error['when'] = when
error['what'] = str(e)
error['where'] = traceback.format_exc()
with open(file, 'w') as of:
yaml.dump(OrderedDict({'expe': expe,
'expe_hashes': hashes,
'expe_report': report,
'expe_error': error}),
of, default_flow_style=False, encoding=None, allow_unicode=True)
def write_expe_file(file, expe, hashes=None, report=None, classification=None, results=None): def write_expe_file(file, expe, hashes=None, report=None, classification=None, results=None):
with open(file, 'w') as of: with open(file, 'w') as of:
yaml.dump(OrderedDict({'expe': expe, yaml.dump(OrderedDict({'expe': expe,
@ -130,7 +183,7 @@ def compute_hashes(expe):
def compute_descriptors(expe): def compute_descriptors(expe):
"""Compute descriptors from a standard expe recipe""" """Compute descriptors from a standard expe recipe"""
script = expe['descriptors_script'] script = expe['descriptors_script']
desc = importlib.import_module(script['name'], package=Descriptors) desc = importlib.import_module(script['name'])
#importlib.reload(Descriptors) #importlib.reload(Descriptors)
att = desc.run(**script['parameters']) att = desc.run(**script['parameters'])
@ -147,10 +200,13 @@ def compute_classification(expe, descriptors):
cv = expe['cross_validation'] cv = expe['cross_validation']
cl = expe['classifier'] cl = expe['classifier']
cross_val = getattr(importlib.import_module(cv['package']), cv['name'])
classifier = getattr(importlib.import_module(cl['package']), cl['name'])
prediction = np.zeros_like(gt) prediction = np.zeros_like(gt)
for xt, xv, yt, yv, ti in APsCVG(gt, descriptors, **cv['parameters']): for xt, xv, yt, yv, ti in cross_val(gt, descriptors, **cv['parameters']):
rfc = RandomForestClassifier(**cl['parameters']) rfc = classifier(**cl['parameters'])
rfc.fit(xt, yt) rfc.fit(xt, yt)
ypred = rfc.predict(xv) ypred = rfc.predict(xv)
@ -160,25 +216,26 @@ def compute_classification(expe, descriptors):
return prediction return prediction
def compute_metrics(ground_truth, classification): def compute_metrics(ground_truth, classification, descriptors):
"""Return dict of metrics for ground_truth and classification prediction in parameters""" """Return dict of metrics for ground_truth and classification prediction in parameters"""
f = np.nonzero(classification) f = np.nonzero(classification)
pred = classification[f].ravel() pred = classification[f].ravel()
gt = ground_truth[f].ravel() gt = ground_truth[f].ravel()
results = OrderedDict() results = OrderedDict()
results['dimension'] = descriptors.shape[-1]
results['overall_accuracy'] = float(metrics.accuracy_score(gt, pred)) results['overall_accuracy'] = float(metrics.accuracy_score(gt, pred))
results['cohen_kappa'] = float(metrics.cohen_kappa_score(gt, pred)) results['cohen_kappa'] = float(metrics.cohen_kappa_score(gt, pred))
return results return results
def run_metrics(expe, classification): def run_metrics(expe, classification, descriptors):
"""Compute the metrics from a standard expe recipe and an given classification""" """Compute the metrics from a standard expe recipe and an given classification"""
### Extensible: meta-classes ### Extensible: meta-classes
gt = triskele.read(expe['ground_truth']) gt = triskele.read(expe['ground_truth'])
return compute_metrics(gt, classification) return compute_metrics(gt, classification, descriptors)
def create_report(kronos): def create_report(kronos):
@ -219,24 +276,33 @@ class Kronos(object):
def watch_folder(): def watch_folder():
time.sleep(10) log.info('Waiting for test')
while not list(test_dir.glob('*.yml')):
time.sleep(10)
def main(): def main():
while(True): while(True):
try: try:
queue = update_queue() queue = update_queue()
except Exception: except Exception:
print('ERROR: while updating work queue. Resuming.') log.error('Critical exception while updating work queue')
continue log.error(traceback.format_exc())
log.warning('Resuming')
continue
if not queue: if not queue:
watch_folder() watch_folder()
continue continue
try: try:
run(queue.pop()['expe_file']) run(queue.pop()['expe_file'])
except TestError:
log.warning('Test failed, error logged. Resuming')
except Exception: except Exception:
print('ERROR: while running test. Resuming.') log.error('Critical exception while running test. Resuming')
log.error(traceback.format_exc())
log.warning('Resuming')
continue continue
if __name__ == '__main__': if __name__ == '__main__':
main() logger.setup_logging()
log.info('Starting supervisor')
main()