Supervisor WIP
This commit is contained in:
parent
ad68cafe1e
commit
578249f644
@ -8,11 +8,11 @@
|
||||
"\n",
|
||||
"- [X] Read a YAML recipe\n",
|
||||
"- [X] Brew recipe\n",
|
||||
"- [] Compute hashes\n",
|
||||
"- [] Write hashes\n",
|
||||
"- [] Time metrics\n",
|
||||
"- [] Result metrics\n",
|
||||
"- [] Write metrics\n",
|
||||
"- [X] Compute hashes\n",
|
||||
"- [X] Write hashes\n",
|
||||
"- [X] Time metrics\n",
|
||||
"- [X] Result metrics\n",
|
||||
"- [X] Write metrics\n",
|
||||
"- [] Write/move results\n",
|
||||
"- [] Watch folder\n",
|
||||
"- [] Main loop\n",
|
||||
@ -34,6 +34,13 @@
|
||||
"import numpy as np\n",
|
||||
"import importlib\n",
|
||||
"import sys\n",
|
||||
"import hashlib\n",
|
||||
"from collections import OrderedDict\n",
|
||||
"import time\n",
|
||||
"import os\n",
|
||||
"import datetime\n",
|
||||
"from sklearn import metrics\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"from sklearn.ensemble import RandomForestClassifier\n",
|
||||
"\n",
|
||||
@ -45,6 +52,21 @@
|
||||
"import triskele"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"### Keep yaml ordered\n",
|
||||
"\n",
|
||||
"def setup_yaml():\n",
|
||||
" \"\"\" https://stackoverflow.com/a/8661021 \"\"\"\n",
|
||||
" represent_dict_order = lambda self, data: self.represent_mapping('tag:yaml.org,2002:map', data.items())\n",
|
||||
" yaml.add_representer(OrderedDict, represent_dict_order) \n",
|
||||
"setup_yaml()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@ -52,6 +74,16 @@
|
||||
"## Serial Classifier"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"expe_in = '../test.yml'\n",
|
||||
"expe_out = '../test_out.yml'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@ -60,32 +92,301 @@
|
||||
"source": [
|
||||
"%load_ext autoreload\n",
|
||||
"%autoreload 2\n",
|
||||
"with open('../test.yml') as f:\n",
|
||||
" expe = yaml.safe_load(f)['expe']\n",
|
||||
"display(expe)\n",
|
||||
"with open(expe_in) as f:\n",
|
||||
" expe = OrderedDict(yaml.safe_load(f)['expe'])\n",
|
||||
"display(expe)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Compute hashes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def compute_hashes(expe):\n",
|
||||
" glob = hashlib.sha1()\n",
|
||||
"\n",
|
||||
"# Ground truth\n",
|
||||
"gt = triskele.read(expe['ground_truth'])\n",
|
||||
" expe_hashes = OrderedDict()\n",
|
||||
"\n",
|
||||
"# Descriptors\n",
|
||||
"script = expe['descriptors_script']\n",
|
||||
"desc = importlib.import_module(script['name'], package=Descriptors)\n",
|
||||
"importlib.reload(Descriptors)\n",
|
||||
"att = desc.run(**script['parameters'])\n",
|
||||
" for k in ['ground_truth', 'descriptors_script', 'cross_validation', 'classifier']:\n",
|
||||
" v = str(expe[k]).encode('utf-8')\n",
|
||||
" expe_hashes[k] = hashlib.sha1(v).hexdigest()\n",
|
||||
" glob.update(v)\n",
|
||||
" expe_hashes['global'] = glob.hexdigest()\n",
|
||||
" return expe_hashes\n",
|
||||
"\n",
|
||||
"# CrossVal and ML\n",
|
||||
"cv = expe['cross_validation']\n",
|
||||
"cl = expe['classifier']\n",
|
||||
"\n",
|
||||
"prediction = np.zeros_like(gt)\n",
|
||||
"\n",
|
||||
"for xt, xv, yt, yv, ti in APsCVG(gt, att, **cv['parameters']):\n",
|
||||
" rfc = RandomForestClassifier(**cl['parameters'])\n",
|
||||
" rfc.fit(xt, yt)\n",
|
||||
"expe_hashes = compute_hashes(expe)\n",
|
||||
"expe_hashes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Write hashes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open(expe_out, 'w') as of:\n",
|
||||
" yaml.dump({'expe': expe, 'expe_hashes': expe_hashes}, of, default_flow_style=False, encoding=None, allow_unicode=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Keep track of time"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class Kronos(object):\n",
|
||||
" def __init__(self):\n",
|
||||
" self._pt = time.process_time()\n",
|
||||
" self._times = OrderedDict()\n",
|
||||
" \n",
|
||||
" def time(self, name):\n",
|
||||
" self._times[name + '_process_time'] = time.process_time() - self._pt\n",
|
||||
" self._pt = time.process_time()\n",
|
||||
" \n",
|
||||
" def get_times(self):\n",
|
||||
" return self._times\n",
|
||||
" \n",
|
||||
" ypred = rfc.predict(xv)\n",
|
||||
"kronos = Kronos()\n",
|
||||
"start_time = time.time()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Compute descriptors"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def compute_descriptors(expe):\n",
|
||||
" \"\"\"Compute descriptors from a standard expe recipe\"\"\"\n",
|
||||
" script = expe['descriptors_script']\n",
|
||||
" desc = importlib.import_module(script['name'], package=Descriptors)\n",
|
||||
" #importlib.reload(Descriptors)\n",
|
||||
" att = desc.run(**script['parameters'])\n",
|
||||
" \n",
|
||||
" prediction[ti] = ypred"
|
||||
" return att\n",
|
||||
"\n",
|
||||
"att = compute_descriptors(expe)\n",
|
||||
"kronos.time('description')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Compute classification"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def compute_classification(expe, att):\n",
|
||||
" \"\"\"Read a standard expe recipe and attributes, return the result classification\"\"\"\n",
|
||||
" # Ground truth\n",
|
||||
" gt = triskele.read(expe['ground_truth'])\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" # CrossVal and ML\n",
|
||||
" cv = expe['cross_validation']\n",
|
||||
" cl = expe['classifier']\n",
|
||||
"\n",
|
||||
" prediction = np.zeros_like(gt)\n",
|
||||
"\n",
|
||||
" for xt, xv, yt, yv, ti in APsCVG(gt, att, **cv['parameters']):\n",
|
||||
" rfc = RandomForestClassifier(**cl['parameters'])\n",
|
||||
" rfc.fit(xt, yt)\n",
|
||||
"\n",
|
||||
" ypred = rfc.predict(xv)\n",
|
||||
"\n",
|
||||
" prediction[ti] = ypred\n",
|
||||
" \n",
|
||||
" return prediction"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"classification = compute_classification(expe, att)\n",
|
||||
"kronos.time('classification')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Metrics"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def compute_metrics(ground_truth, classication):\n",
|
||||
" \"\"\"Return dict of metrics for ground_truth and classification prediction in parameters\"\"\"\n",
|
||||
" f = np.nonzero(classification)\n",
|
||||
" pred = classification[f].ravel()\n",
|
||||
" gt = ground_truth[f].ravel()\n",
|
||||
" \n",
|
||||
" results = OrderedDict() \n",
|
||||
" results['overall_accuracy'] = float(metrics.accuracy_score(gt, pred))\n",
|
||||
" results['cohen_kappa'] = float(metrics.cohen_kappa_score(gt, pred))\n",
|
||||
" \n",
|
||||
" return results\n",
|
||||
"\n",
|
||||
"def run_metrics(expe, classification):\n",
|
||||
" \"\"\"Compute the metrics from a standard expe recipe and an given classification\"\"\"\n",
|
||||
" \n",
|
||||
" ### Extensible: meta-classes\n",
|
||||
" gt = triskele.read(expe['ground_truth'])\n",
|
||||
" return compute_metrics(gt, classification)\n",
|
||||
"\n",
|
||||
"expe_results = run_metrics(expe, classification)\n",
|
||||
"expe_results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"kronos.time('metrics')\n",
|
||||
"end_time = time.time()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Report"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def create_report(kronos):\n",
|
||||
" expe_report = OrderedDict()\n",
|
||||
"\n",
|
||||
" expe_report['supervisor'] = os.uname()[1]\n",
|
||||
"\n",
|
||||
" for timev, datek in zip((start_time, end_time), ('start_date', 'end_date')):\n",
|
||||
" expe_report[datek] = datetime.datetime.fromtimestamp(timev).strftime('Le %d/%m/%Y à %H:%M:%S')\n",
|
||||
"\n",
|
||||
" ressources = kronos.get_times()\n",
|
||||
" ressources['ram'] = None\n",
|
||||
"\n",
|
||||
" expe_report['ressources'] = ressources\n",
|
||||
" return expe_report\n",
|
||||
"\n",
|
||||
"expe_report = create_report(kronos)\n",
|
||||
"expe_report"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**TODO**\n",
|
||||
"\n",
|
||||
"améliorer kronos pour le start et le end time (build and get_times)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Name and write prediction"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"oname = '{}_{}'.format(Path(expe_in).stem, expe_hashes['global'][:6])\n",
|
||||
"oname_tif = oname + '.tif'\n",
|
||||
"oname_yml = oname + '.yml'\n",
|
||||
"\n",
|
||||
"triskele.write(oname_tif, classification)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Write report and results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open(oname_yml, 'w') as of:\n",
|
||||
" yaml.dump(OrderedDict({'expe': expe, \n",
|
||||
" 'expe_hashes': expe_hashes, \n",
|
||||
" 'expe_report': expe_report,\n",
|
||||
" 'expe_classification': oname_tif,\n",
|
||||
" 'expe_results': expe_results}), of, default_flow_style=False, encoding=None, allow_unicode=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"att.dtype"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import watchdog"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
156
supervisor.py
Normal file
156
supervisor.py
Normal file
@ -0,0 +1,156 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
# \file supervisor.py
|
||||
# \brief TODO
|
||||
# \author Florent Guiotte <florent.guiotte@gmail.com>
|
||||
# \version 0.1
|
||||
# \date 25 juil. 2018
|
||||
#
|
||||
# TODO details
|
||||
|
||||
import yaml
|
||||
import numpy as np
|
||||
import importlib
|
||||
import sys
|
||||
import hashlib
|
||||
from collections import OrderedDict
|
||||
import time
|
||||
import os
|
||||
import datetime
|
||||
from sklearn import metrics
|
||||
from pathlib import Path
|
||||
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
|
||||
#sys.path.append('.')
|
||||
import Descriptors
|
||||
from CrossValidationGenerator import APsCVG
|
||||
|
||||
sys.path.append('./triskele/python')
|
||||
import triskele
|
||||
|
||||
|
||||
### Keep yaml ordered
|
||||
def setup_yaml():
|
||||
""" https://stackoverflow.com/a/8661021 """
|
||||
represent_dict_order = lambda self, data: self.represent_mapping('tag:yaml.org,2002:map', data.items())
|
||||
yaml.add_representer(OrderedDict, represent_dict_order)
|
||||
setup_yaml()
|
||||
|
||||
|
||||
def run(expe_file):
|
||||
with open(expe_file) as f:
|
||||
expe = OrderedDict(yaml.safe_load(f)['expe'])
|
||||
|
||||
### Compute hashes
|
||||
expe_hashes = compute_hashes(expe)
|
||||
|
||||
### Keep track of time
|
||||
kronos = Kronos()
|
||||
start_time = time.time()
|
||||
|
||||
### Compute descriptors
|
||||
descriptors = compute_descriptors(expe)
|
||||
kronos.time('description')
|
||||
|
||||
### Compute classification
|
||||
classification = compute_classification(expe, descriptors)
|
||||
kronos.time('classification')
|
||||
|
||||
### Metrics
|
||||
metrics = run_metrics(expe, classification)
|
||||
kronos.time('metrics')
|
||||
|
||||
### Create report WIP WIP WIP WIP WIP WIP WIP
|
||||
expe_report = OrderedDict()
|
||||
|
||||
expe_report['supervisor'] = os.uname()[1]
|
||||
|
||||
for timev, datek in zip((start_time, end_time), ('start_date', 'end_date')):
|
||||
expe_report[datek] = datetime.datetime.fromtimestamp(timev).strftime('Le %d/%m/%Y à %H:%M:%S')
|
||||
|
||||
ressources = kronos.get_times()
|
||||
ressources['ram'] = None
|
||||
|
||||
expe_report['ressources'] = ressources
|
||||
|
||||
|
||||
def compute_hashes(expe):
|
||||
glob = hashlib.sha1()
|
||||
|
||||
expe_hashes = OrderedDict()
|
||||
|
||||
for k in ['ground_truth', 'descriptors_script', 'cross_validation', 'classifier']:
|
||||
v = str(expe[k]).encode('utf-8')
|
||||
expe_hashes[k] = hashlib.sha1(v).hexdigest()
|
||||
glob.update(v)
|
||||
expe_hashes['global'] = glob.hexdigest()
|
||||
return expe_hashes
|
||||
|
||||
|
||||
def compute_descriptors(expe):
|
||||
"""Compute descriptors from a standard expe recipe"""
|
||||
script = expe['descriptors_script']
|
||||
desc = importlib.import_module(script['name'], package=Descriptors)
|
||||
#importlib.reload(Descriptors)
|
||||
att = desc.run(**script['parameters'])
|
||||
|
||||
return att
|
||||
|
||||
|
||||
def compute_classification(expe, descriptors):
|
||||
"""Read a standard expe recipe and descriptors, return the result classification"""
|
||||
# Ground truth
|
||||
gt = triskele.read(expe['ground_truth'])
|
||||
|
||||
|
||||
# CrossVal and ML
|
||||
cv = expe['cross_validation']
|
||||
cl = expe['classifier']
|
||||
|
||||
prediction = np.zeros_like(gt)
|
||||
|
||||
for xt, xv, yt, yv, ti in APsCVG(gt, descriptors, **cv['parameters']):
|
||||
rfc = RandomForestClassifier(**cl['parameters'])
|
||||
rfc.fit(xt, yt)
|
||||
|
||||
ypred = rfc.predict(xv)
|
||||
|
||||
prediction[ti] = ypred
|
||||
|
||||
return prediction
|
||||
|
||||
|
||||
def compute_metrics(ground_truth, classication):
|
||||
"""Return dict of metrics for ground_truth and classification prediction in parameters"""
|
||||
f = np.nonzero(classification)
|
||||
pred = classification[f].ravel()
|
||||
gt = ground_truth[f].ravel()
|
||||
|
||||
results = OrderedDict()
|
||||
results['overall_accuracy'] = float(metrics.accuracy_score(gt, pred))
|
||||
results['cohen_kappa'] = float(metrics.cohen_kappa_score(gt, pred))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def run_metrics(expe, classification):
|
||||
"""Compute the metrics from a standard expe recipe and an given classification"""
|
||||
|
||||
### Extensible: meta-classes
|
||||
gt = triskele.read(expe['ground_truth'])
|
||||
return compute_metrics(gt, classification)
|
||||
|
||||
|
||||
|
||||
class Kronos(object):
|
||||
def __init__(self):
|
||||
self._pt = time.process_time()
|
||||
self._times = OrderedDict()
|
||||
|
||||
def time(self, name):
|
||||
self._times[name + '_process_time'] = time.process_time() - self._pt
|
||||
self._pt = time.process_time()
|
||||
|
||||
def get_times(self):
|
||||
return self._times
|
||||
11
test.yml
11
test.yml
@ -10,21 +10,18 @@ expe:
|
||||
rasters:
|
||||
- '../Data/phase1_rasters/DEM+B_C123/UH17_GEM051_TR.tif'
|
||||
- '../Data/phase1_rasters/DEM_C123_3msr/UH17_GEG051_TR.tif'
|
||||
areas:
|
||||
- 10
|
||||
- 100
|
||||
- 1e4
|
||||
moi: [.5, .7, .9]
|
||||
areas: [100, 1000]
|
||||
moi: [.5, .9]
|
||||
cross_validation:
|
||||
name: CrossValidationGenerator.APsCVG
|
||||
parameters:
|
||||
n_test: 5
|
||||
n_test: 2
|
||||
classifier:
|
||||
name: sklearn.ensemble.RandomForestClassifier
|
||||
parameters:
|
||||
n_jobs: -1
|
||||
random_state: 0
|
||||
n_estimators: 100
|
||||
n_estimators: 50
|
||||
min_samples_leaf: 10
|
||||
expe_hashes:
|
||||
ground_truth: XXX
|
||||
|
||||
Loading…
Reference in New Issue
Block a user