Supervisor WIP

2018-07-25 17:29:17 +02:00 · 2018-07-25 17:29:17 +02:00 · 578249f644
commit 578249f644
parent ad68cafe1e
3 changed files with 487 additions and 33 deletions
--- a/Serialization.ipynb
+++ b/Serialization.ipynb
@ -8,11 +8,11 @@
    "\n",
    "- [X] Read a YAML recipe\n",
    "- [X] Brew recipe\n",
-    "- [] Compute hashes\n",
-    "- [] Write hashes\n",
-    "- [] Time metrics\n",
-    "- [] Result metrics\n",
-    "- [] Write metrics\n",
+    "- [X] Compute hashes\n",
+    "- [X] Write hashes\n",
+    "- [X] Time metrics\n",
+    "- [X] Result metrics\n",
+    "- [X] Write metrics\n",
    "- [] Write/move results\n",
    "- [] Watch folder\n",
    "- [] Main loop\n",
@ -34,6 +34,13 @@
    "import numpy as np\n",
    "import importlib\n",
    "import sys\n",
+    "import hashlib\n",
+    "from collections import OrderedDict\n",
+    "import time\n",
+    "import os\n",
+    "import datetime\n",
+    "from sklearn import metrics\n",
+    "from pathlib import Path\n",
    "\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "\n",
@ -45,6 +52,21 @@
    "import triskele"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "### Keep yaml ordered\n",
+    "\n",
+    "def setup_yaml():\n",
+    "  \"\"\" https://stackoverflow.com/a/8661021 \"\"\"\n",
+    "  represent_dict_order = lambda self, data:  self.represent_mapping('tag:yaml.org,2002:map', data.items())\n",
+    "  yaml.add_representer(OrderedDict, represent_dict_order)    \n",
+    "setup_yaml()"
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@ -52,6 +74,16 @@
    "## Serial Classifier"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "expe_in = '../test.yml'\n",
+    "expe_out = '../test_out.yml'"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@ -60,32 +92,301 @@
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
-    "with open('../test.yml') as f:\n",
-    "    expe = yaml.safe_load(f)['expe']\n",
-    "display(expe)\n",
+    "with open(expe_in) as f:\n",
+    "    expe = OrderedDict(yaml.safe_load(f)['expe'])\n",
+    "display(expe)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Compute hashes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def compute_hashes(expe):\n",
+    "    glob = hashlib.sha1()\n",
    "\n",
-    "# Ground truth\n",
-    "gt = triskele.read(expe['ground_truth'])\n",
+    "    expe_hashes = OrderedDict()\n",
    "\n",
-    "# Descriptors\n",
-    "script = expe['descriptors_script']\n",
-    "desc = importlib.import_module(script['name'], package=Descriptors)\n",
-    "importlib.reload(Descriptors)\n",
-    "att = desc.run(**script['parameters'])\n",
+    "    for k in ['ground_truth', 'descriptors_script', 'cross_validation', 'classifier']:\n",
+    "        v = str(expe[k]).encode('utf-8')\n",
+    "        expe_hashes[k] = hashlib.sha1(v).hexdigest()\n",
+    "        glob.update(v)\n",
+    "    expe_hashes['global'] = glob.hexdigest()\n",
+    "    return expe_hashes\n",
    "\n",
-    "# CrossVal and ML\n",
-    "cv = expe['cross_validation']\n",
-    "cl = expe['classifier']\n",
-    "\n",
-    "prediction = np.zeros_like(gt)\n",
-    "\n",
-    "for xt, xv, yt, yv, ti in APsCVG(gt, att, **cv['parameters']):\n",
-    "    rfc = RandomForestClassifier(**cl['parameters'])\n",
-    "    rfc.fit(xt, yt)\n",
+    "expe_hashes = compute_hashes(expe)\n",
+    "expe_hashes"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Write hashes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(expe_out, 'w') as of:\n",
+    "    yaml.dump({'expe': expe, 'expe_hashes': expe_hashes}, of, default_flow_style=False, encoding=None, allow_unicode=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Keep track of time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Kronos(object):\n",
+    "    def __init__(self):\n",
+    "        self._pt = time.process_time()\n",
+    "        self._times = OrderedDict()\n",
+    "        \n",
+    "    def time(self, name):\n",
+    "        self._times[name + '_process_time'] = time.process_time() - self._pt\n",
+    "        self._pt = time.process_time()\n",
+    "        \n",
+    "    def get_times(self):\n",
+    "        return self._times\n",
    "    \n",
-    "    ypred = rfc.predict(xv)\n",
+    "kronos = Kronos()\n",
+    "start_time = time.time()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Compute descriptors"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def compute_descriptors(expe):\n",
+    "    \"\"\"Compute descriptors from a standard expe recipe\"\"\"\n",
+    "    script = expe['descriptors_script']\n",
+    "    desc = importlib.import_module(script['name'], package=Descriptors)\n",
+    "    #importlib.reload(Descriptors)\n",
+    "    att = desc.run(**script['parameters'])\n",
    "    \n",
-    "    prediction[ti] = ypred"
+    "    return att\n",
+    "\n",
+    "att = compute_descriptors(expe)\n",
+    "kronos.time('description')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Compute classification"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def compute_classification(expe, att):\n",
+    "    \"\"\"Read a standard expe recipe and attributes, return the result classification\"\"\"\n",
+    "    # Ground truth\n",
+    "    gt = triskele.read(expe['ground_truth'])\n",
+    "\n",
+    "\n",
+    "    # CrossVal and ML\n",
+    "    cv = expe['cross_validation']\n",
+    "    cl = expe['classifier']\n",
+    "\n",
+    "    prediction = np.zeros_like(gt)\n",
+    "\n",
+    "    for xt, xv, yt, yv, ti in APsCVG(gt, att, **cv['parameters']):\n",
+    "        rfc = RandomForestClassifier(**cl['parameters'])\n",
+    "        rfc.fit(xt, yt)\n",
+    "\n",
+    "        ypred = rfc.predict(xv)\n",
+    "\n",
+    "        prediction[ti] = ypred\n",
+    "        \n",
+    "    return prediction"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "classification = compute_classification(expe, att)\n",
+    "kronos.time('classification')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Metrics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def compute_metrics(ground_truth, classication):\n",
+    "    \"\"\"Return dict of metrics for ground_truth and classification prediction in parameters\"\"\"\n",
+    "    f = np.nonzero(classification)\n",
+    "    pred = classification[f].ravel()\n",
+    "    gt = ground_truth[f].ravel()\n",
+    "    \n",
+    "    results = OrderedDict() \n",
+    "    results['overall_accuracy'] = float(metrics.accuracy_score(gt, pred))\n",
+    "    results['cohen_kappa'] = float(metrics.cohen_kappa_score(gt, pred))\n",
+    "    \n",
+    "    return results\n",
+    "\n",
+    "def run_metrics(expe, classification):\n",
+    "    \"\"\"Compute the metrics from a standard expe recipe and an given classification\"\"\"\n",
+    "    \n",
+    "    ### Extensible: meta-classes\n",
+    "    gt = triskele.read(expe['ground_truth'])\n",
+    "    return compute_metrics(gt, classification)\n",
+    "\n",
+    "expe_results = run_metrics(expe, classification)\n",
+    "expe_results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "kronos.time('metrics')\n",
+    "end_time = time.time()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_report(kronos):\n",
+    "    expe_report = OrderedDict()\n",
+    "\n",
+    "    expe_report['supervisor'] = os.uname()[1]\n",
+    "\n",
+    "    for timev, datek in zip((start_time, end_time), ('start_date', 'end_date')):\n",
+    "        expe_report[datek] = datetime.datetime.fromtimestamp(timev).strftime('Le %d/%m/%Y à %H:%M:%S')\n",
+    "\n",
+    "    ressources = kronos.get_times()\n",
+    "    ressources['ram'] = None\n",
+    "\n",
+    "    expe_report['ressources'] = ressources\n",
+    "    return expe_report\n",
+    "\n",
+    "expe_report = create_report(kronos)\n",
+    "expe_report"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**TODO**\n",
+    "\n",
+    "améliorer kronos pour le start et le end time (build and get_times)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Name and write prediction"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "oname = '{}_{}'.format(Path(expe_in).stem, expe_hashes['global'][:6])\n",
+    "oname_tif = oname + '.tif'\n",
+    "oname_yml = oname + '.yml'\n",
+    "\n",
+    "triskele.write(oname_tif, classification)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Write report and results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(oname_yml, 'w') as of:\n",
+    "    yaml.dump(OrderedDict({'expe': expe, \n",
+    "               'expe_hashes': expe_hashes, \n",
+    "               'expe_report': expe_report,\n",
+    "               'expe_classification': oname_tif,\n",
+    "               'expe_results': expe_results}), of, default_flow_style=False, encoding=None, allow_unicode=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "att.dtype"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import watchdog"
   ]
  },
  {
--- a/supervisor.py
+++ b/supervisor.py
@ -0,0 +1,156 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# \file supervisor.py
+# \brief TODO
+# \author Florent Guiotte <florent.guiotte@gmail.com>
+# \version 0.1
+# \date 25 juil. 2018
+#
+# TODO details
+
+import yaml
+import numpy as np
+import importlib
+import sys
+import hashlib
+from collections import OrderedDict
+import time
+import os
+import datetime
+from sklearn import metrics
+from pathlib import Path
+
+from sklearn.ensemble import RandomForestClassifier
+
+#sys.path.append('.')
+import Descriptors
+from CrossValidationGenerator import APsCVG
+
+sys.path.append('./triskele/python')
+import triskele
+
+
+### Keep yaml ordered
+def setup_yaml():
+  """ https://stackoverflow.com/a/8661021 """
+  represent_dict_order = lambda self, data:  self.represent_mapping('tag:yaml.org,2002:map', data.items())
+  yaml.add_representer(OrderedDict, represent_dict_order)    
+setup_yaml()
+
+
+def run(expe_file):
+    with open(expe_file) as f:
+        expe = OrderedDict(yaml.safe_load(f)['expe'])
+    
+    ### Compute hashes
+    expe_hashes = compute_hashes(expe)
+
+    ### Keep track of time 
+    kronos = Kronos()
+    start_time = time.time()
+    
+    ### Compute descriptors
+    descriptors = compute_descriptors(expe)
+    kronos.time('description')
+
+    ### Compute classification
+    classification = compute_classification(expe, descriptors)
+    kronos.time('classification')
+    
+    ### Metrics  
+    metrics = run_metrics(expe, classification)
+    kronos.time('metrics')
+    
+    ### Create report WIP WIP WIP WIP WIP WIP WIP
+expe_report = OrderedDict()
+
+expe_report['supervisor'] = os.uname()[1]
+
+for timev, datek in zip((start_time, end_time), ('start_date', 'end_date')):
+    expe_report[datek] = datetime.datetime.fromtimestamp(timev).strftime('Le %d/%m/%Y à %H:%M:%S')
+
+ressources = kronos.get_times()
+ressources['ram'] = None
+
+expe_report['ressources'] = ressources
+
+
+def compute_hashes(expe):
+    glob = hashlib.sha1()
+
+    expe_hashes = OrderedDict()
+
+    for k in ['ground_truth', 'descriptors_script', 'cross_validation', 'classifier']:
+        v = str(expe[k]).encode('utf-8')
+        expe_hashes[k] = hashlib.sha1(v).hexdigest()
+        glob.update(v)
+    expe_hashes['global'] = glob.hexdigest()
+    return expe_hashes
+
+
+def compute_descriptors(expe):
+    """Compute descriptors from a standard expe recipe"""
+    script = expe['descriptors_script']
+    desc = importlib.import_module(script['name'], package=Descriptors)
+    #importlib.reload(Descriptors)
+    att = desc.run(**script['parameters'])
+    
+    return att
+
+
+def compute_classification(expe, descriptors):
+    """Read a standard expe recipe and descriptors, return the result classification"""
+    # Ground truth
+    gt = triskele.read(expe['ground_truth'])
+
+
+    # CrossVal and ML
+    cv = expe['cross_validation']
+    cl = expe['classifier']
+
+    prediction = np.zeros_like(gt)
+
+    for xt, xv, yt, yv, ti in APsCVG(gt, descriptors, **cv['parameters']):
+        rfc = RandomForestClassifier(**cl['parameters'])
+        rfc.fit(xt, yt)
+
+        ypred = rfc.predict(xv)
+
+        prediction[ti] = ypred
+        
+    return prediction
+
+
+def compute_metrics(ground_truth, classication):
+    """Return dict of metrics for ground_truth and classification prediction in parameters"""
+    f = np.nonzero(classification)
+    pred = classification[f].ravel()
+    gt = ground_truth[f].ravel()
+    
+    results = OrderedDict() 
+    results['overall_accuracy'] = float(metrics.accuracy_score(gt, pred))
+    results['cohen_kappa'] = float(metrics.cohen_kappa_score(gt, pred))
+    
+    return results
+
+
+def run_metrics(expe, classification):
+    """Compute the metrics from a standard expe recipe and an given classification"""
+    
+    ### Extensible: meta-classes
+    gt = triskele.read(expe['ground_truth'])
+    return compute_metrics(gt, classification)
+
+
+
+class Kronos(object):
+    def __init__(self):
+        self._pt = time.process_time()
+        self._times = OrderedDict()
+        
+    def time(self, name):
+        self._times[name + '_process_time'] = time.process_time() - self._pt
+        self._pt = time.process_time()
+        
+    def get_times(self):
+        return self._times
--- a/test.yml
+++ b/test.yml
@ -10,21 +10,18 @@ expe:
            rasters:
                - '../Data/phase1_rasters/DEM+B_C123/UH17_GEM051_TR.tif'
                - '../Data/phase1_rasters/DEM_C123_3msr/UH17_GEG051_TR.tif'
-            areas:
-                - 10
-                - 100
-                - 1e4
-            moi: [.5, .7, .9]
+            areas: [100, 1000]
+            moi: [.5, .9]
    cross_validation:
        name: CrossValidationGenerator.APsCVG
        parameters:
-            n_test: 5
+            n_test: 2
    classifier:
        name: sklearn.ensemble.RandomForestClassifier
        parameters:
            n_jobs: -1
            random_state: 0
-            n_estimators: 100
+            n_estimators: 50
            min_samples_leaf: 10
 expe_hashes:
    ground_truth: XXX