diff --git a/Notebooks/YAML Serialization.ipynb b/Notebooks/YAML Serialization.ipynb index 8dd087c..b1fa07e 100644 --- a/Notebooks/YAML Serialization.ipynb +++ b/Notebooks/YAML Serialization.ipynb @@ -8,11 +8,11 @@ "\n", "- [X] Read a YAML recipe\n", "- [X] Brew recipe\n", - "- [] Compute hashes\n", - "- [] Write hashes\n", - "- [] Time metrics\n", - "- [] Result metrics\n", - "- [] Write metrics\n", + "- [X] Compute hashes\n", + "- [X] Write hashes\n", + "- [X] Time metrics\n", + "- [X] Result metrics\n", + "- [X] Write metrics\n", "- [] Write/move results\n", "- [] Watch folder\n", "- [] Main loop\n", @@ -34,6 +34,13 @@ "import numpy as np\n", "import importlib\n", "import sys\n", + "import hashlib\n", + "from collections import OrderedDict\n", + "import time\n", + "import os\n", + "import datetime\n", + "from sklearn import metrics\n", + "from pathlib import Path\n", "\n", "from sklearn.ensemble import RandomForestClassifier\n", "\n", @@ -45,6 +52,21 @@ "import triskele" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### Keep yaml ordered\n", + "\n", + "def setup_yaml():\n", + " \"\"\" https://stackoverflow.com/a/8661021 \"\"\"\n", + " represent_dict_order = lambda self, data: self.represent_mapping('tag:yaml.org,2002:map', data.items())\n", + " yaml.add_representer(OrderedDict, represent_dict_order) \n", + "setup_yaml()" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -52,6 +74,16 @@ "## Serial Classifier" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "expe_in = '../test.yml'\n", + "expe_out = '../test_out.yml'" + ] + }, { "cell_type": "code", "execution_count": null, @@ -60,32 +92,301 @@ "source": [ "%load_ext autoreload\n", "%autoreload 2\n", - "with open('../test.yml') as f:\n", - " expe = yaml.safe_load(f)['expe']\n", - "display(expe)\n", + "with open(expe_in) as f:\n", + " expe = OrderedDict(yaml.safe_load(f)['expe'])\n", + "display(expe)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Compute hashes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def compute_hashes(expe):\n", + " glob = hashlib.sha1()\n", "\n", - "# Ground truth\n", - "gt = triskele.read(expe['ground_truth'])\n", + " expe_hashes = OrderedDict()\n", "\n", - "# Descriptors\n", - "script = expe['descriptors_script']\n", - "desc = importlib.import_module(script['name'], package=Descriptors)\n", - "importlib.reload(Descriptors)\n", - "att = desc.run(**script['parameters'])\n", + " for k in ['ground_truth', 'descriptors_script', 'cross_validation', 'classifier']:\n", + " v = str(expe[k]).encode('utf-8')\n", + " expe_hashes[k] = hashlib.sha1(v).hexdigest()\n", + " glob.update(v)\n", + " expe_hashes['global'] = glob.hexdigest()\n", + " return expe_hashes\n", "\n", - "# CrossVal and ML\n", - "cv = expe['cross_validation']\n", - "cl = expe['classifier']\n", - "\n", - "prediction = np.zeros_like(gt)\n", - "\n", - "for xt, xv, yt, yv, ti in APsCVG(gt, att, **cv['parameters']):\n", - " rfc = RandomForestClassifier(**cl['parameters'])\n", - " rfc.fit(xt, yt)\n", + "expe_hashes = compute_hashes(expe)\n", + "expe_hashes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Write hashes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open(expe_out, 'w') as of:\n", + " yaml.dump({'expe': expe, 'expe_hashes': expe_hashes}, of, default_flow_style=False, encoding=None, allow_unicode=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Keep track of time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class Kronos(object):\n", + " def __init__(self):\n", + " self._pt = time.process_time()\n", + " self._times = OrderedDict()\n", + " \n", + " def time(self, name):\n", + " self._times[name + '_process_time'] = time.process_time() - self._pt\n", + " self._pt = time.process_time()\n", + " \n", + " def get_times(self):\n", + " return self._times\n", " \n", - " ypred = rfc.predict(xv)\n", + "kronos = Kronos()\n", + "start_time = time.time()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Compute descriptors" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def compute_descriptors(expe):\n", + " \"\"\"Compute descriptors from a standard expe recipe\"\"\"\n", + " script = expe['descriptors_script']\n", + " desc = importlib.import_module(script['name'], package=Descriptors)\n", + " #importlib.reload(Descriptors)\n", + " att = desc.run(**script['parameters'])\n", " \n", - " prediction[ti] = ypred" + " return att\n", + "\n", + "att = compute_descriptors(expe)\n", + "kronos.time('description')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Compute classification" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def compute_classification(expe, att):\n", + " \"\"\"Read a standard expe recipe and attributes, return the result classification\"\"\"\n", + " # Ground truth\n", + " gt = triskele.read(expe['ground_truth'])\n", + "\n", + "\n", + " # CrossVal and ML\n", + " cv = expe['cross_validation']\n", + " cl = expe['classifier']\n", + "\n", + " prediction = np.zeros_like(gt)\n", + "\n", + " for xt, xv, yt, yv, ti in APsCVG(gt, att, **cv['parameters']):\n", + " rfc = RandomForestClassifier(**cl['parameters'])\n", + " rfc.fit(xt, yt)\n", + "\n", + " ypred = rfc.predict(xv)\n", + "\n", + " prediction[ti] = ypred\n", + " \n", + " return prediction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "classification = compute_classification(expe, att)\n", + "kronos.time('classification')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Metrics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def compute_metrics(ground_truth, classication):\n", + " \"\"\"Return dict of metrics for ground_truth and classification prediction in parameters\"\"\"\n", + " f = np.nonzero(classification)\n", + " pred = classification[f].ravel()\n", + " gt = ground_truth[f].ravel()\n", + " \n", + " results = OrderedDict() \n", + " results['overall_accuracy'] = float(metrics.accuracy_score(gt, pred))\n", + " results['cohen_kappa'] = float(metrics.cohen_kappa_score(gt, pred))\n", + " \n", + " return results\n", + "\n", + "def run_metrics(expe, classification):\n", + " \"\"\"Compute the metrics from a standard expe recipe and an given classification\"\"\"\n", + " \n", + " ### Extensible: meta-classes\n", + " gt = triskele.read(expe['ground_truth'])\n", + " return compute_metrics(gt, classification)\n", + "\n", + "expe_results = run_metrics(expe, classification)\n", + "expe_results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kronos.time('metrics')\n", + "end_time = time.time()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Report" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def create_report(kronos):\n", + " expe_report = OrderedDict()\n", + "\n", + " expe_report['supervisor'] = os.uname()[1]\n", + "\n", + " for timev, datek in zip((start_time, end_time), ('start_date', 'end_date')):\n", + " expe_report[datek] = datetime.datetime.fromtimestamp(timev).strftime('Le %d/%m/%Y à %H:%M:%S')\n", + "\n", + " ressources = kronos.get_times()\n", + " ressources['ram'] = None\n", + "\n", + " expe_report['ressources'] = ressources\n", + " return expe_report\n", + "\n", + "expe_report = create_report(kronos)\n", + "expe_report" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**TODO**\n", + "\n", + "améliorer kronos pour le start et le end time (build and get_times)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Name and write prediction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "oname = '{}_{}'.format(Path(expe_in).stem, expe_hashes['global'][:6])\n", + "oname_tif = oname + '.tif'\n", + "oname_yml = oname + '.yml'\n", + "\n", + "triskele.write(oname_tif, classification)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Write report and results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open(oname_yml, 'w') as of:\n", + " yaml.dump(OrderedDict({'expe': expe, \n", + " 'expe_hashes': expe_hashes, \n", + " 'expe_report': expe_report,\n", + " 'expe_classification': oname_tif,\n", + " 'expe_results': expe_results}), of, default_flow_style=False, encoding=None, allow_unicode=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "att.dtype" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import watchdog" ] }, { diff --git a/supervisor.py b/supervisor.py new file mode 100644 index 0000000..0a5b7ab --- /dev/null +++ b/supervisor.py @@ -0,0 +1,156 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# \file supervisor.py +# \brief TODO +# \author Florent Guiotte +# \version 0.1 +# \date 25 juil. 2018 +# +# TODO details + +import yaml +import numpy as np +import importlib +import sys +import hashlib +from collections import OrderedDict +import time +import os +import datetime +from sklearn import metrics +from pathlib import Path + +from sklearn.ensemble import RandomForestClassifier + +#sys.path.append('.') +import Descriptors +from CrossValidationGenerator import APsCVG + +sys.path.append('./triskele/python') +import triskele + + +### Keep yaml ordered +def setup_yaml(): + """ https://stackoverflow.com/a/8661021 """ + represent_dict_order = lambda self, data: self.represent_mapping('tag:yaml.org,2002:map', data.items()) + yaml.add_representer(OrderedDict, represent_dict_order) +setup_yaml() + + +def run(expe_file): + with open(expe_file) as f: + expe = OrderedDict(yaml.safe_load(f)['expe']) + + ### Compute hashes + expe_hashes = compute_hashes(expe) + + ### Keep track of time + kronos = Kronos() + start_time = time.time() + + ### Compute descriptors + descriptors = compute_descriptors(expe) + kronos.time('description') + + ### Compute classification + classification = compute_classification(expe, descriptors) + kronos.time('classification') + + ### Metrics + metrics = run_metrics(expe, classification) + kronos.time('metrics') + + ### Create report WIP WIP WIP WIP WIP WIP WIP +expe_report = OrderedDict() + +expe_report['supervisor'] = os.uname()[1] + +for timev, datek in zip((start_time, end_time), ('start_date', 'end_date')): + expe_report[datek] = datetime.datetime.fromtimestamp(timev).strftime('Le %d/%m/%Y à %H:%M:%S') + +ressources = kronos.get_times() +ressources['ram'] = None + +expe_report['ressources'] = ressources + + +def compute_hashes(expe): + glob = hashlib.sha1() + + expe_hashes = OrderedDict() + + for k in ['ground_truth', 'descriptors_script', 'cross_validation', 'classifier']: + v = str(expe[k]).encode('utf-8') + expe_hashes[k] = hashlib.sha1(v).hexdigest() + glob.update(v) + expe_hashes['global'] = glob.hexdigest() + return expe_hashes + + +def compute_descriptors(expe): + """Compute descriptors from a standard expe recipe""" + script = expe['descriptors_script'] + desc = importlib.import_module(script['name'], package=Descriptors) + #importlib.reload(Descriptors) + att = desc.run(**script['parameters']) + + return att + + +def compute_classification(expe, descriptors): + """Read a standard expe recipe and descriptors, return the result classification""" + # Ground truth + gt = triskele.read(expe['ground_truth']) + + + # CrossVal and ML + cv = expe['cross_validation'] + cl = expe['classifier'] + + prediction = np.zeros_like(gt) + + for xt, xv, yt, yv, ti in APsCVG(gt, descriptors, **cv['parameters']): + rfc = RandomForestClassifier(**cl['parameters']) + rfc.fit(xt, yt) + + ypred = rfc.predict(xv) + + prediction[ti] = ypred + + return prediction + + +def compute_metrics(ground_truth, classication): + """Return dict of metrics for ground_truth and classification prediction in parameters""" + f = np.nonzero(classification) + pred = classification[f].ravel() + gt = ground_truth[f].ravel() + + results = OrderedDict() + results['overall_accuracy'] = float(metrics.accuracy_score(gt, pred)) + results['cohen_kappa'] = float(metrics.cohen_kappa_score(gt, pred)) + + return results + + +def run_metrics(expe, classification): + """Compute the metrics from a standard expe recipe and an given classification""" + + ### Extensible: meta-classes + gt = triskele.read(expe['ground_truth']) + return compute_metrics(gt, classification) + + + +class Kronos(object): + def __init__(self): + self._pt = time.process_time() + self._times = OrderedDict() + + def time(self, name): + self._times[name + '_process_time'] = time.process_time() - self._pt + self._pt = time.process_time() + + def get_times(self): + return self._times diff --git a/test.yml b/test.yml index 4b6747d..f57c103 100644 --- a/test.yml +++ b/test.yml @@ -10,21 +10,18 @@ expe: rasters: - '../Data/phase1_rasters/DEM+B_C123/UH17_GEM051_TR.tif' - '../Data/phase1_rasters/DEM_C123_3msr/UH17_GEG051_TR.tif' - areas: - - 10 - - 100 - - 1e4 - moi: [.5, .7, .9] + areas: [100, 1000] + moi: [.5, .9] cross_validation: name: CrossValidationGenerator.APsCVG parameters: - n_test: 5 + n_test: 2 classifier: name: sklearn.ensemble.RandomForestClassifier parameters: n_jobs: -1 random_state: 0 - n_estimators: 100 + n_estimators: 50 min_samples_leaf: 10 expe_hashes: ground_truth: XXX