742 lines
16 KiB
Plaintext
742 lines
16 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Serialize Attribute Profiles Classification\n",
|
|
"\n",
|
|
"- [X] Read a YAML recipe\n",
|
|
"- [X] Brew recipe\n",
|
|
"- [X] Compute hashes\n",
|
|
"- [X] Write hashes\n",
|
|
"- [X] Time metrics\n",
|
|
"- [X] Result metrics\n",
|
|
"- [X] Write metrics\n",
|
|
"- [X] Write/move results\n",
|
|
"- [X] Watch folder\n",
|
|
"- [X] Main loop\n",
|
|
"- [ ] Logs\n",
|
|
"- [ ] Catch errors\n",
|
|
"- [ ] Custom CVG\n",
|
|
"\n",
|
|
"\n",
|
|
"## Init"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import yaml\n",
|
|
"import numpy as np\n",
|
|
"import importlib\n",
|
|
"import sys\n",
|
|
"import hashlib\n",
|
|
"from collections import OrderedDict\n",
|
|
"import time\n",
|
|
"import os\n",
|
|
"import datetime\n",
|
|
"from sklearn import metrics\n",
|
|
"from pathlib import Path\n",
|
|
"\n",
|
|
"from sklearn.ensemble import RandomForestClassifier\n",
|
|
"\n",
|
|
"sys.path.append('..')\n",
|
|
"import Descriptors\n",
|
|
"from CrossValidationGenerator import APsCVG\n",
|
|
"\n",
|
|
"sys.path.append('../triskele/python')\n",
|
|
"import triskele"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"### Keep yaml ordered\n",
|
|
"\n",
|
|
"def setup_yaml():\n",
|
|
" \"\"\" https://stackoverflow.com/a/8661021 \"\"\"\n",
|
|
" represent_dict_order = lambda self, data: self.represent_mapping('tag:yaml.org,2002:map', data.items())\n",
|
|
" yaml.add_representer(OrderedDict, represent_dict_order) \n",
|
|
"setup_yaml()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Serial Classifier"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"expe_in = '../test.yml'\n",
|
|
"expe_out = '../test_out.yml'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%load_ext autoreload\n",
|
|
"%autoreload 2\n",
|
|
"with open(expe_in) as f:\n",
|
|
" expe = OrderedDict(yaml.safe_load(f)['expe'])\n",
|
|
"display(expe)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Compute hashes"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def compute_hashes(expe):\n",
|
|
" glob = hashlib.sha1()\n",
|
|
"\n",
|
|
" expe_hashes = OrderedDict()\n",
|
|
"\n",
|
|
" for k in ['ground_truth', 'descriptors_script', 'cross_validation', 'classifier']:\n",
|
|
" v = str(expe[k]).encode('utf-8')\n",
|
|
" expe_hashes[k] = hashlib.sha1(v).hexdigest()\n",
|
|
" glob.update(v)\n",
|
|
" expe_hashes['global'] = glob.hexdigest()\n",
|
|
" return expe_hashes\n",
|
|
"\n",
|
|
"expe_hashes = compute_hashes(expe)\n",
|
|
"expe_hashes"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Write hashes"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"with open(expe_out, 'w') as of:\n",
|
|
" yaml.dump({'expe': expe, 'expe_hashes': expe_hashes}, of, default_flow_style=False, encoding=None, allow_unicode=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Keep track of time"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"class Kronos(object):\n",
|
|
" def __init__(self):\n",
|
|
" self._pt = time.process_time()\n",
|
|
" self._times = OrderedDict()\n",
|
|
" self._stime = time.time()\n",
|
|
" \n",
|
|
" def time(self, name):\n",
|
|
" self._times[name + '_process_time'] = time.process_time() - self._pt\n",
|
|
" self._pt = time.process_time()\n",
|
|
" \n",
|
|
" def get_times(self):\n",
|
|
" return self._times\n",
|
|
" \n",
|
|
" def get_start_date(self):\n",
|
|
" return self._stime\n",
|
|
" \n",
|
|
" def get_end_date(self):\n",
|
|
" return time.time()\n",
|
|
" \n",
|
|
"kronos = Kronos()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Compute descriptors"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def compute_descriptors(expe):\n",
|
|
" \"\"\"Compute descriptors from a standard expe recipe\"\"\"\n",
|
|
" script = expe['descriptors_script']\n",
|
|
" desc = importlib.import_module(script['name'])\n",
|
|
" #importlib.reload(Descriptors)\n",
|
|
" att = desc.run(**script['parameters'])\n",
|
|
" \n",
|
|
" return att\n",
|
|
"\n",
|
|
"att = compute_descriptors(expe)\n",
|
|
"kronos.time('description')\n",
|
|
"att.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Compute classification"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def compute_classification(expe, att):\n",
|
|
" \"\"\"Read a standard expe recipe and attributes, return the result classification\"\"\"\n",
|
|
" # Ground truth\n",
|
|
" gt = triskele.read(expe['ground_truth'])\n",
|
|
"\n",
|
|
"\n",
|
|
" # CrossVal and ML\n",
|
|
" cv = expe['cross_validation']\n",
|
|
" cl = expe['classifier']\n",
|
|
"\n",
|
|
" cross_val = getattr(importlib.import_module(cv['package']), cv['name'])\n",
|
|
" classifier = getattr(importlib.import_module(cl['package']), cl['name'])\n",
|
|
" \n",
|
|
" prediction = np.zeros_like(gt)\n",
|
|
"\n",
|
|
" for xt, xv, yt, yv, ti in cross_val(gt, att, **cv['parameters']):\n",
|
|
" rfc = classifier(**cl['parameters'])\n",
|
|
" rfc.fit(xt, yt)\n",
|
|
"\n",
|
|
" ypred = rfc.predict(xv)\n",
|
|
"\n",
|
|
" prediction[ti] = ypred\n",
|
|
" \n",
|
|
" return prediction\n",
|
|
"\n",
|
|
"classification = compute_classification(expe, att)\n",
|
|
"kronos.time('classification')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import sklearn.ensemble.RandomForestClassifier"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"importlib.import_module('RandomForestClassifier', package='sklearn.ensemble')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"getattr(importlib.import_module('sklearn.ensemble'), 'RandomForestClassifier')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Metrics"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def compute_metrics(ground_truth, classication):\n",
|
|
" \"\"\"Return dict of metrics for ground_truth and classification prediction in parameters\"\"\"\n",
|
|
" f = np.nonzero(classification)\n",
|
|
" pred = classification[f].ravel()\n",
|
|
" gt = ground_truth[f].ravel()\n",
|
|
" \n",
|
|
" results = OrderedDict() \n",
|
|
" results['overall_accuracy'] = float(metrics.accuracy_score(gt, pred))\n",
|
|
" results['cohen_kappa'] = float(metrics.cohen_kappa_score(gt, pred))\n",
|
|
" \n",
|
|
" return results\n",
|
|
"\n",
|
|
"def run_metrics(expe, classification):\n",
|
|
" \"\"\"Compute the metrics from a standard expe recipe and an given classification\"\"\"\n",
|
|
" \n",
|
|
" ### Extensible: meta-classes\n",
|
|
" gt = triskele.read(expe['ground_truth'])\n",
|
|
" return compute_metrics(gt, classification)\n",
|
|
"\n",
|
|
"expe_results = run_metrics(expe, classification)\n",
|
|
"expe_results"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"kronos.time('metrics')\n",
|
|
"end_time = time.time()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Report"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def create_report(kronos):\n",
|
|
" expe_report = OrderedDict()\n",
|
|
"\n",
|
|
" expe_report['supervisor'] = os.uname()[1]\n",
|
|
"\n",
|
|
" for timev, datek in zip((kronos.get_start_date(), kronos.get_end_date()), ('start_date', 'end_date')):\n",
|
|
" expe_report[datek] = datetime.datetime.fromtimestamp(timev).strftime('Le %d/%m/%Y à %H:%M:%S')\n",
|
|
"\n",
|
|
" ressources = kronos.get_times()\n",
|
|
" ressources['ram'] = None\n",
|
|
"\n",
|
|
" expe_report['ressources'] = ressources\n",
|
|
" return expe_report\n",
|
|
"\n",
|
|
"expe_report = create_report(kronos)\n",
|
|
"expe_report"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Name and write prediction"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"oname = '{}_{}'.format(Path(expe_in).stem, expe_hashes['global'][:6])\n",
|
|
"oname_tif = oname + '.tif'\n",
|
|
"oname_yml = oname + '.yml'\n",
|
|
"\n",
|
|
"triskele.write(oname_tif, classification)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Write report and results"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"with open(oname_yml, 'w') as of:\n",
|
|
" yaml.dump(OrderedDict({'expe': expe, \n",
|
|
" 'expe_hashes': expe_hashes, \n",
|
|
" 'expe_report': expe_report,\n",
|
|
" 'expe_classification': oname_tif,\n",
|
|
" 'expe_results': expe_results}), of, default_flow_style=False, encoding=None, allow_unicode=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"att.dtype"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import watchdog"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import matplotlib.pyplot as plt\n",
|
|
"\n",
|
|
"plt.figure(figsize=(16, 9))\n",
|
|
"plt.imshow(prediction)\n",
|
|
"plt.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Import from string module, class and instantiate"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import importlib\n",
|
|
"module = importlib.import_module(module_name)\n",
|
|
"class_ = getattr(module, class_name)\n",
|
|
"instance = class_()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def run(rasters, treshold=1e4, areas=None, sd=None, moi=None):\n",
|
|
" treshold = float(treshold)\n",
|
|
" areas = None if areas is None else np.array(areas).astype(np.float).astype(np.int)\n",
|
|
" sd = None if sd is None else np.array(sd).astype(np.float)\n",
|
|
" moi = None if moi is None else np.array(moi).astype(np.float)\n",
|
|
" return treshold, areas, sd, moi\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"run(**expe['descriptors_param'])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"desc."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"sha1"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import hashlib"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"hashlib.md5()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"sorted(expe.items())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"expe"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"expe"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"np.array(expe['descriptors_param']['areas']).astype(np.float).astype(np.int)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"np.array(None).astype(np.float).astype(np.int)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"desc = importlib.import_module(expe['descriptors_script']['path'])\n",
|
|
"desc.run(**expe['descriptors_param'])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"A = []"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"A.pop()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"A.count()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"continue"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"E = Path('./Enrichment')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"(E / 'Test' / 'aefaef.tif').stem"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"E / ('qwer' + '.tif')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"P = Path('../Enrichment/Tests/')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"len([f for f in P.iterdir()])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"list(P.glob('*_checkpointwes'))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"if not []:\n",
|
|
" print('yay')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"l = list()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"E = Exception('Nonte')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"str(E.with_traceback())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"len(I)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"I[0]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"yaml.dump({'test': 'I dont care\\\\n lel'}, open('../bdq.yml', 'w'))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|