Supervisor WIP

This commit is contained in:
Florent Guiotte 2018-07-25 17:29:17 +02:00
parent ad68cafe1e
commit 578249f644
3 changed files with 487 additions and 33 deletions

View File

@ -8,11 +8,11 @@
"\n", "\n",
"- [X] Read a YAML recipe\n", "- [X] Read a YAML recipe\n",
"- [X] Brew recipe\n", "- [X] Brew recipe\n",
"- [] Compute hashes\n", "- [X] Compute hashes\n",
"- [] Write hashes\n", "- [X] Write hashes\n",
"- [] Time metrics\n", "- [X] Time metrics\n",
"- [] Result metrics\n", "- [X] Result metrics\n",
"- [] Write metrics\n", "- [X] Write metrics\n",
"- [] Write/move results\n", "- [] Write/move results\n",
"- [] Watch folder\n", "- [] Watch folder\n",
"- [] Main loop\n", "- [] Main loop\n",
@ -34,6 +34,13 @@
"import numpy as np\n", "import numpy as np\n",
"import importlib\n", "import importlib\n",
"import sys\n", "import sys\n",
"import hashlib\n",
"from collections import OrderedDict\n",
"import time\n",
"import os\n",
"import datetime\n",
"from sklearn import metrics\n",
"from pathlib import Path\n",
"\n", "\n",
"from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.ensemble import RandomForestClassifier\n",
"\n", "\n",
@ -45,6 +52,21 @@
"import triskele" "import triskele"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"### Keep yaml ordered\n",
"\n",
"def setup_yaml():\n",
" \"\"\" https://stackoverflow.com/a/8661021 \"\"\"\n",
" represent_dict_order = lambda self, data: self.represent_mapping('tag:yaml.org,2002:map', data.items())\n",
" yaml.add_representer(OrderedDict, represent_dict_order) \n",
"setup_yaml()"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@ -52,6 +74,16 @@
"## Serial Classifier" "## Serial Classifier"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"expe_in = '../test.yml'\n",
"expe_out = '../test_out.yml'"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@ -60,32 +92,301 @@
"source": [ "source": [
"%load_ext autoreload\n", "%load_ext autoreload\n",
"%autoreload 2\n", "%autoreload 2\n",
"with open('../test.yml') as f:\n", "with open(expe_in) as f:\n",
" expe = yaml.safe_load(f)['expe']\n", " expe = OrderedDict(yaml.safe_load(f)['expe'])\n",
"display(expe)\n", "display(expe)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Compute hashes"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def compute_hashes(expe):\n",
" glob = hashlib.sha1()\n",
"\n", "\n",
"# Ground truth\n", " expe_hashes = OrderedDict()\n",
"gt = triskele.read(expe['ground_truth'])\n",
"\n", "\n",
"# Descriptors\n", " for k in ['ground_truth', 'descriptors_script', 'cross_validation', 'classifier']:\n",
"script = expe['descriptors_script']\n", " v = str(expe[k]).encode('utf-8')\n",
"desc = importlib.import_module(script['name'], package=Descriptors)\n", " expe_hashes[k] = hashlib.sha1(v).hexdigest()\n",
"importlib.reload(Descriptors)\n", " glob.update(v)\n",
"att = desc.run(**script['parameters'])\n", " expe_hashes['global'] = glob.hexdigest()\n",
" return expe_hashes\n",
"\n", "\n",
"# CrossVal and ML\n", "expe_hashes = compute_hashes(expe)\n",
"cv = expe['cross_validation']\n", "expe_hashes"
"cl = expe['classifier']\n", ]
"\n", },
"prediction = np.zeros_like(gt)\n", {
"\n", "cell_type": "markdown",
"for xt, xv, yt, yv, ti in APsCVG(gt, att, **cv['parameters']):\n", "metadata": {},
" rfc = RandomForestClassifier(**cl['parameters'])\n", "source": [
" rfc.fit(xt, yt)\n", "### Write hashes"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open(expe_out, 'w') as of:\n",
" yaml.dump({'expe': expe, 'expe_hashes': expe_hashes}, of, default_flow_style=False, encoding=None, allow_unicode=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Keep track of time"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class Kronos(object):\n",
" def __init__(self):\n",
" self._pt = time.process_time()\n",
" self._times = OrderedDict()\n",
" \n",
" def time(self, name):\n",
" self._times[name + '_process_time'] = time.process_time() - self._pt\n",
" self._pt = time.process_time()\n",
" \n",
" def get_times(self):\n",
" return self._times\n",
" \n", " \n",
" ypred = rfc.predict(xv)\n", "kronos = Kronos()\n",
"start_time = time.time()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Compute descriptors"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def compute_descriptors(expe):\n",
" \"\"\"Compute descriptors from a standard expe recipe\"\"\"\n",
" script = expe['descriptors_script']\n",
" desc = importlib.import_module(script['name'], package=Descriptors)\n",
" #importlib.reload(Descriptors)\n",
" att = desc.run(**script['parameters'])\n",
" \n", " \n",
" prediction[ti] = ypred" " return att\n",
"\n",
"att = compute_descriptors(expe)\n",
"kronos.time('description')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Compute classification"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def compute_classification(expe, att):\n",
" \"\"\"Read a standard expe recipe and attributes, return the result classification\"\"\"\n",
" # Ground truth\n",
" gt = triskele.read(expe['ground_truth'])\n",
"\n",
"\n",
" # CrossVal and ML\n",
" cv = expe['cross_validation']\n",
" cl = expe['classifier']\n",
"\n",
" prediction = np.zeros_like(gt)\n",
"\n",
" for xt, xv, yt, yv, ti in APsCVG(gt, att, **cv['parameters']):\n",
" rfc = RandomForestClassifier(**cl['parameters'])\n",
" rfc.fit(xt, yt)\n",
"\n",
" ypred = rfc.predict(xv)\n",
"\n",
" prediction[ti] = ypred\n",
" \n",
" return prediction"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"classification = compute_classification(expe, att)\n",
"kronos.time('classification')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Metrics"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def compute_metrics(ground_truth, classication):\n",
" \"\"\"Return dict of metrics for ground_truth and classification prediction in parameters\"\"\"\n",
" f = np.nonzero(classification)\n",
" pred = classification[f].ravel()\n",
" gt = ground_truth[f].ravel()\n",
" \n",
" results = OrderedDict() \n",
" results['overall_accuracy'] = float(metrics.accuracy_score(gt, pred))\n",
" results['cohen_kappa'] = float(metrics.cohen_kappa_score(gt, pred))\n",
" \n",
" return results\n",
"\n",
"def run_metrics(expe, classification):\n",
" \"\"\"Compute the metrics from a standard expe recipe and an given classification\"\"\"\n",
" \n",
" ### Extensible: meta-classes\n",
" gt = triskele.read(expe['ground_truth'])\n",
" return compute_metrics(gt, classification)\n",
"\n",
"expe_results = run_metrics(expe, classification)\n",
"expe_results"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"kronos.time('metrics')\n",
"end_time = time.time()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Report"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def create_report(kronos):\n",
" expe_report = OrderedDict()\n",
"\n",
" expe_report['supervisor'] = os.uname()[1]\n",
"\n",
" for timev, datek in zip((start_time, end_time), ('start_date', 'end_date')):\n",
" expe_report[datek] = datetime.datetime.fromtimestamp(timev).strftime('Le %d/%m/%Y à %H:%M:%S')\n",
"\n",
" ressources = kronos.get_times()\n",
" ressources['ram'] = None\n",
"\n",
" expe_report['ressources'] = ressources\n",
" return expe_report\n",
"\n",
"expe_report = create_report(kronos)\n",
"expe_report"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**TODO**\n",
"\n",
"améliorer kronos pour le start et le end time (build and get_times)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Name and write prediction"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"oname = '{}_{}'.format(Path(expe_in).stem, expe_hashes['global'][:6])\n",
"oname_tif = oname + '.tif'\n",
"oname_yml = oname + '.yml'\n",
"\n",
"triskele.write(oname_tif, classification)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Write report and results"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open(oname_yml, 'w') as of:\n",
" yaml.dump(OrderedDict({'expe': expe, \n",
" 'expe_hashes': expe_hashes, \n",
" 'expe_report': expe_report,\n",
" 'expe_classification': oname_tif,\n",
" 'expe_results': expe_results}), of, default_flow_style=False, encoding=None, allow_unicode=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"att.dtype"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import watchdog"
] ]
}, },
{ {

156
supervisor.py Normal file
View File

@ -0,0 +1,156 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# \file supervisor.py
# \brief TODO
# \author Florent Guiotte <florent.guiotte@gmail.com>
# \version 0.1
# \date 25 juil. 2018
#
# TODO details
import yaml
import numpy as np
import importlib
import sys
import hashlib
from collections import OrderedDict
import time
import os
import datetime
from sklearn import metrics
from pathlib import Path
from sklearn.ensemble import RandomForestClassifier
#sys.path.append('.')
import Descriptors
from CrossValidationGenerator import APsCVG
sys.path.append('./triskele/python')
import triskele
### Keep yaml ordered
def setup_yaml():
""" https://stackoverflow.com/a/8661021 """
represent_dict_order = lambda self, data: self.represent_mapping('tag:yaml.org,2002:map', data.items())
yaml.add_representer(OrderedDict, represent_dict_order)
setup_yaml()
def run(expe_file):
with open(expe_file) as f:
expe = OrderedDict(yaml.safe_load(f)['expe'])
### Compute hashes
expe_hashes = compute_hashes(expe)
### Keep track of time
kronos = Kronos()
start_time = time.time()
### Compute descriptors
descriptors = compute_descriptors(expe)
kronos.time('description')
### Compute classification
classification = compute_classification(expe, descriptors)
kronos.time('classification')
### Metrics
metrics = run_metrics(expe, classification)
kronos.time('metrics')
### Create report WIP WIP WIP WIP WIP WIP WIP
expe_report = OrderedDict()
expe_report['supervisor'] = os.uname()[1]
for timev, datek in zip((start_time, end_time), ('start_date', 'end_date')):
expe_report[datek] = datetime.datetime.fromtimestamp(timev).strftime('Le %d/%m/%Y à %H:%M:%S')
ressources = kronos.get_times()
ressources['ram'] = None
expe_report['ressources'] = ressources
def compute_hashes(expe):
glob = hashlib.sha1()
expe_hashes = OrderedDict()
for k in ['ground_truth', 'descriptors_script', 'cross_validation', 'classifier']:
v = str(expe[k]).encode('utf-8')
expe_hashes[k] = hashlib.sha1(v).hexdigest()
glob.update(v)
expe_hashes['global'] = glob.hexdigest()
return expe_hashes
def compute_descriptors(expe):
"""Compute descriptors from a standard expe recipe"""
script = expe['descriptors_script']
desc = importlib.import_module(script['name'], package=Descriptors)
#importlib.reload(Descriptors)
att = desc.run(**script['parameters'])
return att
def compute_classification(expe, descriptors):
"""Read a standard expe recipe and descriptors, return the result classification"""
# Ground truth
gt = triskele.read(expe['ground_truth'])
# CrossVal and ML
cv = expe['cross_validation']
cl = expe['classifier']
prediction = np.zeros_like(gt)
for xt, xv, yt, yv, ti in APsCVG(gt, descriptors, **cv['parameters']):
rfc = RandomForestClassifier(**cl['parameters'])
rfc.fit(xt, yt)
ypred = rfc.predict(xv)
prediction[ti] = ypred
return prediction
def compute_metrics(ground_truth, classication):
"""Return dict of metrics for ground_truth and classification prediction in parameters"""
f = np.nonzero(classification)
pred = classification[f].ravel()
gt = ground_truth[f].ravel()
results = OrderedDict()
results['overall_accuracy'] = float(metrics.accuracy_score(gt, pred))
results['cohen_kappa'] = float(metrics.cohen_kappa_score(gt, pred))
return results
def run_metrics(expe, classification):
"""Compute the metrics from a standard expe recipe and an given classification"""
### Extensible: meta-classes
gt = triskele.read(expe['ground_truth'])
return compute_metrics(gt, classification)
class Kronos(object):
def __init__(self):
self._pt = time.process_time()
self._times = OrderedDict()
def time(self, name):
self._times[name + '_process_time'] = time.process_time() - self._pt
self._pt = time.process_time()
def get_times(self):
return self._times

View File

@ -10,21 +10,18 @@ expe:
rasters: rasters:
- '../Data/phase1_rasters/DEM+B_C123/UH17_GEM051_TR.tif' - '../Data/phase1_rasters/DEM+B_C123/UH17_GEM051_TR.tif'
- '../Data/phase1_rasters/DEM_C123_3msr/UH17_GEG051_TR.tif' - '../Data/phase1_rasters/DEM_C123_3msr/UH17_GEG051_TR.tif'
areas: areas: [100, 1000]
- 10 moi: [.5, .9]
- 100
- 1e4
moi: [.5, .7, .9]
cross_validation: cross_validation:
name: CrossValidationGenerator.APsCVG name: CrossValidationGenerator.APsCVG
parameters: parameters:
n_test: 5 n_test: 2
classifier: classifier:
name: sklearn.ensemble.RandomForestClassifier name: sklearn.ensemble.RandomForestClassifier
parameters: parameters:
n_jobs: -1 n_jobs: -1
random_state: 0 random_state: 0
n_estimators: 100 n_estimators: 50
min_samples_leaf: 10 min_samples_leaf: 10
expe_hashes: expe_hashes:
ground_truth: XXX ground_truth: XXX