ld2daps/Notebooks/YAML Serialization.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Serialize Attribute Profiles Classification\n",
    "\n",
    "- [X] Read a YAML recipe\n",
    "- [X] Brew recipe\n",
    "- [X] Compute hashes\n",
    "- [X] Write hashes\n",
    "- [X] Time metrics\n",
    "- [X] Result metrics\n",
    "- [X] Write metrics\n",
    "- [X] Write/move results\n",
    "- [X] Watch folder\n",
    "- [X] Main loop\n",
    "- [ ] Logs\n",
    "- [ ] Catch errors\n",
    "- [ ] Custom CVG\n",
    "\n",
    "\n",
    "## Init"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import yaml\n",
    "import numpy as np\n",
    "import importlib\n",
    "import sys\n",
    "import hashlib\n",
    "from collections import OrderedDict\n",
    "import time\n",
    "import os\n",
    "import datetime\n",
    "from sklearn import metrics\n",
    "from pathlib import Path\n",
    "\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "\n",
    "sys.path.append('..')\n",
    "import Descriptors\n",
    "from CrossValidationGenerator import APsCVG\n",
    "\n",
    "sys.path.append('../triskele/python')\n",
    "import triskele"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "### Keep yaml ordered\n",
    "\n",
    "def setup_yaml():\n",
    "  \"\"\" https://stackoverflow.com/a/8661021 \"\"\"\n",
    "  represent_dict_order = lambda self, data:  self.represent_mapping('tag:yaml.org,2002:map', data.items())\n",
    "  yaml.add_representer(OrderedDict, represent_dict_order)    \n",
    "setup_yaml()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Serial Classifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "expe_in = '../test.yml'\n",
    "expe_out = '../test_out.yml'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "with open(expe_in) as f:\n",
    "    expe = OrderedDict(yaml.safe_load(f)['expe'])\n",
    "display(expe)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Compute hashes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_hashes(expe):\n",
    "    glob = hashlib.sha1()\n",
    "\n",
    "    expe_hashes = OrderedDict()\n",
    "\n",
    "    for k in ['ground_truth', 'descriptors_script', 'cross_validation', 'classifier']:\n",
    "        v = str(expe[k]).encode('utf-8')\n",
    "        expe_hashes[k] = hashlib.sha1(v).hexdigest()\n",
    "        glob.update(v)\n",
    "    expe_hashes['global'] = glob.hexdigest()\n",
    "    return expe_hashes\n",
    "\n",
    "expe_hashes = compute_hashes(expe)\n",
    "expe_hashes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Write hashes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(expe_out, 'w') as of:\n",
    "    yaml.dump({'expe': expe, 'expe_hashes': expe_hashes}, of, default_flow_style=False, encoding=None, allow_unicode=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Keep track of time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Kronos(object):\n",
    "    def __init__(self):\n",
    "        self._pt = time.process_time()\n",
    "        self._times = OrderedDict()\n",
    "        self._stime = time.time()\n",
    "        \n",
    "    def time(self, name):\n",
    "        self._times[name + '_process_time'] = time.process_time() - self._pt\n",
    "        self._pt = time.process_time()\n",
    "        \n",
    "    def get_times(self):\n",
    "        return self._times\n",
    "    \n",
    "    def get_start_date(self):\n",
    "        return self._stime\n",
    "    \n",
    "    def get_end_date(self):\n",
    "        return time.time()\n",
    "    \n",
    "kronos = Kronos()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Compute descriptors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_descriptors(expe):\n",
    "    \"\"\"Compute descriptors from a standard expe recipe\"\"\"\n",
    "    script = expe['descriptors_script']\n",
    "    desc = importlib.import_module(script['name'])\n",
    "    #importlib.reload(Descriptors)\n",
    "    att = desc.run(**script['parameters'])\n",
    "    \n",
    "    return att\n",
    "\n",
    "att = compute_descriptors(expe)\n",
    "kronos.time('description')\n",
    "att.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Compute classification"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_classification(expe, att):\n",
    "    \"\"\"Read a standard expe recipe and attributes, return the result classification\"\"\"\n",
    "    # Ground truth\n",
    "    gt = triskele.read(expe['ground_truth'])\n",
    "\n",
    "\n",
    "    # CrossVal and ML\n",
    "    cv = expe['cross_validation']\n",
    "    cl = expe['classifier']\n",
    "\n",
    "    cross_val  = getattr(importlib.import_module(cv['package']), cv['name'])\n",
    "    classifier = getattr(importlib.import_module(cl['package']), cl['name'])\n",
    "    \n",
    "    prediction = np.zeros_like(gt)\n",
    "\n",
    "    for xt, xv, yt, yv, ti in cross_val(gt, att, **cv['parameters']):\n",
    "        rfc = classifier(**cl['parameters'])\n",
    "        rfc.fit(xt, yt)\n",
    "\n",
    "        ypred = rfc.predict(xv)\n",
    "\n",
    "        prediction[ti] = ypred\n",
    "        \n",
    "    return prediction\n",
    "\n",
    "classification = compute_classification(expe, att)\n",
    "kronos.time('classification')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sklearn.ensemble.RandomForestClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "importlib.import_module('RandomForestClassifier', package='sklearn.ensemble')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "getattr(importlib.import_module('sklearn.ensemble'), 'RandomForestClassifier')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_metrics(ground_truth, classication):\n",
    "    \"\"\"Return dict of metrics for ground_truth and classification prediction in parameters\"\"\"\n",
    "    f = np.nonzero(classification)\n",
    "    pred = classification[f].ravel()\n",
    "    gt = ground_truth[f].ravel()\n",
    "    \n",
    "    results = OrderedDict() \n",
    "    results['overall_accuracy'] = float(metrics.accuracy_score(gt, pred))\n",
    "    results['cohen_kappa'] = float(metrics.cohen_kappa_score(gt, pred))\n",
    "    \n",
    "    return results\n",
    "\n",
    "def run_metrics(expe, classification):\n",
    "    \"\"\"Compute the metrics from a standard expe recipe and an given classification\"\"\"\n",
    "    \n",
    "    ### Extensible: meta-classes\n",
    "    gt = triskele.read(expe['ground_truth'])\n",
    "    return compute_metrics(gt, classification)\n",
    "\n",
    "expe_results = run_metrics(expe, classification)\n",
    "expe_results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "kronos.time('metrics')\n",
    "end_time = time.time()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_report(kronos):\n",
    "    expe_report = OrderedDict()\n",
    "\n",
    "    expe_report['supervisor'] = os.uname()[1]\n",
    "\n",
    "    for timev, datek in zip((kronos.get_start_date(), kronos.get_end_date()), ('start_date', 'end_date')):\n",
    "        expe_report[datek] = datetime.datetime.fromtimestamp(timev).strftime('Le %d/%m/%Y à %H:%M:%S')\n",
    "\n",
    "    ressources = kronos.get_times()\n",
    "    ressources['ram'] = None\n",
    "\n",
    "    expe_report['ressources'] = ressources\n",
    "    return expe_report\n",
    "\n",
    "expe_report = create_report(kronos)\n",
    "expe_report"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Name and write prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "oname = '{}_{}'.format(Path(expe_in).stem, expe_hashes['global'][:6])\n",
    "oname_tif = oname + '.tif'\n",
    "oname_yml = oname + '.yml'\n",
    "\n",
    "triskele.write(oname_tif, classification)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Write report and results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(oname_yml, 'w') as of:\n",
    "    yaml.dump(OrderedDict({'expe': expe, \n",
    "               'expe_hashes': expe_hashes, \n",
    "               'expe_report': expe_report,\n",
    "               'expe_classification': oname_tif,\n",
    "               'expe_results': expe_results}), of, default_flow_style=False, encoding=None, allow_unicode=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "att.dtype"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import watchdog"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "plt.figure(figsize=(16, 9))\n",
    "plt.imshow(prediction)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Import from string module, class and instantiate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import importlib\n",
    "module = importlib.import_module(module_name)\n",
    "class_ = getattr(module, class_name)\n",
    "instance = class_()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def run(rasters, treshold=1e4, areas=None, sd=None, moi=None):\n",
    "    treshold = float(treshold)\n",
    "    areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int)\n",
    "    sd = None if sd is None else np.array(sd).astype(np.float)\n",
    "    moi = None if moi is None else np.array(moi).astype(np.float)\n",
    "    return treshold, areas, sd, moi\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "run(**expe['descriptors_param'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "desc."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sha1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import hashlib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "hashlib.md5()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sorted(expe.items())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "expe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "expe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.array(expe['descriptors_param']['areas']).astype(np.float).astype(np.int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.array(None).astype(np.float).astype(np.int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "desc = importlib.import_module(expe['descriptors_script']['path'])\n",
    "desc.run(**expe['descriptors_param'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "A = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "A.pop()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "A.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "continue"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "E = Path('./Enrichment')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "(E / 'Test' / 'aefaef.tif').stem"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "E / ('qwer' + '.tif')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "P = Path('../Enrichment/Tests/')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len([f for f in P.iterdir()])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "list(P.glob('*_checkpointwes'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if not []:\n",
    "    print('yay')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "l = list()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "E = Exception('Nonte')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "str(E.with_traceback())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(I)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "I[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "yaml.dump({'test': 'I dont care\\\\n lel'}, open('../bdq.yml', 'w'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}