Compare commits

...

47 Commits

Author SHA1 Message Date
1f2c9d2df1 Read cache dir from environment 2021-09-17 19:48:50 +02:00
4a3e1bf442 Add Jurse3 protocol with cache 2021-09-17 19:01:16 +02:00
2081f87293 deps 2021-04-15 18:31:15 +02:00
2af2656837 Quick fix on cvgenerator 2020-07-08 17:46:22 +02:00
a2bca010f5 Add GroupStratifiedShuffleSplit 2020-07-08 17:23:50 +02:00
6f92c575f6 Add diff SDAPs and ext diff SDAPs 2020-07-07 17:16:43 +02:00
8156db750a add EDAPs 2020-07-07 15:12:36 +02:00
f72740f0b9 Add daps 2020-07-07 11:07:13 +02:00
7c0c52d6f4 Fix name of features for single AP 2020-06-16 22:26:23 +02:00
a70c782931 Pool for descriptors 2020-06-11 11:45:41 +02:00
4cffb2fc73 Attributes new in sdaps 2020-05-31 20:49:46 +02:00
8eed99109b Allow different thresholds per features 2020-05-31 17:57:58 +02:00
06357dc873 Quickfix 2020-05-31 15:55:57 +02:00
0521490ab3 Fix FPs 2020-05-31 15:30:52 +02:00
480c5b1c5f Patch FPs for origin in desc 2020-05-31 15:07:52 +02:00
78b01a5751 patch fps 2020-05-31 12:55:56 +02:00
eec4684e67 Add feature profiles 2020-05-31 12:38:31 +02:00
769c07373c Remove check 2020-05-31 11:35:55 +02:00
732060d160 Add message on exception 2020-05-31 10:33:42 +02:00
d12afdeb91 Try f32 2020-05-31 02:22:59 +02:00
496ad79f05 Fix feature names 2020-05-31 01:30:26 +02:00
b2874a1d85 Add TOS and MAXT 2020-05-31 00:49:24 +02:00
a4a4a5a188 Fix joblib 2020-05-30 22:53:51 +02:00
eb2a4b022f Add aps 2020-05-30 22:45:30 +02:00
1bcdef0728 Add CLI 2020-05-30 18:39:46 +02:00
72e8355031 Dict keys to list 2020-05-30 17:53:06 +02:00
819001fd84 Fix 2020-05-30 17:05:30 +02:00
a8814e5345 Quick fix 2020-05-30 16:29:38 +02:00
d12197661b Add feature name and importance 2020-05-30 16:23:35 +02:00
7bff8e20d2 Add coords in pipeline 2020-05-30 11:06:20 +02:00
06649497ae Fix coord_tiles 2020-05-29 18:32:32 +02:00
dd76f031fb Update logging 2020-05-29 16:31:47 +02:00
2f35f831a4 new tile loader 2020-05-29 10:03:45 +02:00
1566ea788d Minor 2020-05-27 14:49:33 +02:00
348fcd2531 add process time 2020-05-27 13:52:53 +02:00
cef8fb2f67 fix 2020-05-27 12:36:20 +02:00
d176caac6f sysD 2020-05-27 10:00:22 +02:00
93b815b41f remove serial 2020-05-27 09:56:30 +02:00
8c23ab2541 Merge branch 'develop' 2020-05-27 09:55:06 +02:00
0f75ebe1ea Merge branch 'develop' 2020-05-27 09:39:09 +02:00
5ae55c5411 Merge branch 'develop' 2018-10-13 19:01:44 +02:00
851617c7f6 Merge branch 'develop' 2018-10-12 10:55:53 +02:00
0c6d6b2eb1 Merge branch 'develop' 2018-10-10 17:38:50 +02:00
cdcfa2d284 Merge branch 'develop' 2018-09-21 16:07:29 +02:00
98043dae71 Merge branch 'develop' 2018-09-20 10:24:12 +02:00
347dec5e51 Merge branch 'develop' 2018-09-14 16:46:28 +02:00
15f6960794 Merge branch 'develop' into 'master'
Supervisor and Jurse ready for production

See merge request Florent/minigrida!1
2018-09-11 15:47:08 +00:00
20 changed files with 683 additions and 17 deletions

View File

@ -1,3 +1,3 @@
{ {
"process_count": 2 "process_count": 1
} }

View File

@ -0,0 +1 @@
from .generic import *

View File

@ -0,0 +1,25 @@
#!/usr/bin/env python
# file generic.py
# author Florent Guiotte <florent.guiotte@irisa.fr>
# version 0.0
# date 08 juil. 2020
"""Abstract
doc.
"""
from sklearn.model_selection import GroupKFold, StratifiedShuffleSplit
class GroupStratifiedShuffleSplit(GroupKFold):
def __init__(self, n_splits=5, train_size=None, random_state=None):
super().__init__(n_splits)
self.train_size = train_size
self.random_state = random_state
def split(self, X, y=None, groups=None):
splits = [s for s in super().split(X, y, groups)]
for train, test in splits:
sss_train, sss_test = next(StratifiedShuffleSplit(1, train_size=self.train_size, random_state=self.random_state).split(X[train], y[train], groups[train]))
yield train[sss_train], test

View File

@ -16,7 +16,7 @@ import logging
from .design import Session, Experiment, Project, db from .design import Session, Experiment, Project, db
log = logging.getLogger() log = logging.getLogger(__name__)
def compute_expe_hash(experiment): def compute_expe_hash(experiment):
return hashlib.md5(json.dumps(experiment, sort_keys=True).encode('utf-8')).hexdigest() return hashlib.md5(json.dumps(experiment, sort_keys=True).encode('utf-8')).hexdigest()
@ -34,7 +34,7 @@ def create_experiment(session_name, protocol, expe, urgency=1):
e = q.first() e = q.first()
e.sessions.add(session) e.sessions.add(session)
else: else:
Experiment(sessions=session, protocol=protocol, expe=expe, expe_hash=expe_hash) Experiment(sessions=session, protocol=protocol, expe=expe, expe_hash=expe_hash, urgency=urgency)
@orm.db_session @orm.db_session
def create_project(name): def create_project(name):
@ -58,7 +58,7 @@ def pending_experiments():
return Experiment.select(lambda x: x.status == 'pending').exists() return Experiment.select(lambda x: x.status == 'pending').exists()
@orm.db_session(serializable=True, optimistic=False) @orm.db_session(optimistic=False)
def next_experiment(): def next_experiment():
# TODO: take session urgency into account # TODO: take session urgency into account
expe = orm.select(e for e in Experiment expe = orm.select(e for e in Experiment
@ -81,7 +81,7 @@ def update_experiment(expe, **params):
_update_experiment(expe, **params) _update_experiment(expe, **params)
@orm.db_session @orm.db_session(optimistic=False, retry=3)
def _update_experiment(expe, **params): def _update_experiment(expe, **params):
e = Experiment.get_for_update(id=expe.id) e = Experiment.get_for_update(id=expe.id)
for k, v in params.items(): for k, v in params.items():

View File

@ -0,0 +1,61 @@
#!/usr/bin/env python
# file aps.py
# author Florent Guiotte <florent.guiotte@irisa.fr>
# version 0.0
# date 30 mai 2020
"""Abstract
doc.
"""
import numpy as np
import sap
from multiprocessing import Pool
def _attribute_profiles(*kwargs):
return sap.attribute_profiles(*kwargs)
def run(gt, rasters, coords, remove,
attributes, adjacency='4', filtering='direct', dtype=np.float32):
X = []
y = []
groups = []
Xn = None
for i, (gti, rastersi, coordsi) in enumerate(zip(gt, rasters, coords)):
# Compute EAP
attributes = [attributes] * len(rastersi) if isinstance(attributes, dict) else attributes
pool = Pool()
eap = pool.starmap(_attribute_profiles, [
(raster, attribute, adjacency, name, filtering)
for (name, raster), attribute
in zip(rastersi.items(), attributes)])
pool.close()
pool.join()
eap = sap.concatenate(eap)
Xn = [' '.join((a.description['tree']['image_name'],
a.description['attribute'],
*[str(v) for v in p.values()]))
for a in eap for p in a.description['profiles']] if not Xn else Xn
# Create vectors
X_raw = np.moveaxis(np.array(list(eap.vectorize())), 0, -1).astype(dtype)
y_raw = gti
# Remove unwanted label X, y
lbl = np.ones_like(y_raw, dtype=np.bool)
for l in remove if remove else []:
lbl &= y_raw != l
X += [X_raw[lbl]]
y += [y_raw[lbl]]
groups += [np.repeat(coordsi, lbl.sum())]
X = np.concatenate(X)
y = np.concatenate(y)
groups = np.concatenate(groups)
return X, y, groups, Xn

View File

@ -0,0 +1,67 @@
#!/usr/bin/env python
# file daps.py
# author Florent Guiotte <florent.guiotte@irisa.fr>
# version 0.0
# date 07 juil. 2020
#!/usr/bin/env python
# file aps.py
# author Florent Guiotte <florent.guiotte@irisa.fr>
# version 0.0
# date 30 mai 2020
"""Abstract
doc.
"""
import numpy as np
import sap
from multiprocessing import Pool
def _diff_attribute_profiles(*kwargs):
return sap.attribute_profiles(*kwargs).diff()
def run(gt, rasters, coords, remove,
attributes, adjacency='4', filtering='direct', dtype=np.float32):
X = []
y = []
groups = []
Xn = None
for i, (gti, rastersi, coordsi) in enumerate(zip(gt, rasters, coords)):
# Compute EAP
attributes = [attributes] * len(rastersi) if isinstance(attributes, dict) else attributes
pool = Pool()
eap = pool.starmap(_diff_attribute_profiles, [
(raster, attribute, adjacency, name, filtering)
for (name, raster), attribute
in zip(rastersi.items(), attributes)])
pool.close()
pool.join()
eap = sap.concatenate(eap)
Xn = [' '.join((a.description['tree']['image_name'],
a.description['attribute'],
*[sap.profiles._title(p)]))
for a in eap for p in a.description['profiles']] if not Xn else Xn
# Create vectors
X_raw = np.moveaxis(np.array(list(eap.vectorize())), 0, -1).astype(dtype)
y_raw = gti
# Remove unwanted label X, y
lbl = np.ones_like(y_raw, dtype=np.bool)
for l in remove if remove else []:
lbl &= y_raw != l
X += [X_raw[lbl]]
y += [y_raw[lbl]]
groups += [np.repeat(coordsi, lbl.sum())]
X = np.concatenate(X)
y = np.concatenate(y)
groups = np.concatenate(groups)
return X, y, groups, Xn

View File

@ -0,0 +1,61 @@
#!/usr/bin/env python
# file dsdaps.py
# author Florent Guiotte <florent.guiotte@irisa.fr>
# version 0.0
# date 07 juil. 2020
"""Abstract
doc.
"""
import numpy as np
import sap
from multiprocessing import Pool
def _diff_sd_attribute_profiles(*kwargs):
return sap.self_dual_attribute_profiles(*kwargs).diff()
def run(gt, rasters, coords, remove,
attributes, adjacency='4', filtering='direct', dtype=np.float32):
X = []
y = []
groups = []
Xn = None
for i, (gti, rastersi, coordsi) in enumerate(zip(gt, rasters, coords)):
# Compute EAP
attributes = [attributes] * len(rastersi) if isinstance(attributes, dict) else attributes
pool = Pool()
eap = pool.starmap(_diff_sd_attribute_profiles, [
(raster, attribute, adjacency, name, filtering)
for (name, raster), attribute
in zip(rastersi.items(), attributes)])
pool.close()
pool.join()
eap = sap.concatenate(eap)
Xn = [' '.join((a.description['tree']['image_name'],
a.description['attribute'],
*[sap.profiles._title(p)]))
for a in eap for p in a.description['profiles']] if not Xn else Xn
# Create vectors
X_raw = np.moveaxis(np.array(list(eap.vectorize())), 0, -1).astype(dtype)
y_raw = gti
# Remove unwanted label X, y
lbl = np.ones_like(y_raw, dtype=np.bool)
for l in remove if remove else []:
lbl &= y_raw != l
X += [X_raw[lbl]]
y += [y_raw[lbl]]
groups += [np.repeat(coordsi, lbl.sum())]
X = np.concatenate(X)
y = np.concatenate(y)
groups = np.concatenate(groups)
return X, y, groups, Xn

View File

@ -0,0 +1,68 @@
#!/usr/bin/env python
# file edaps.py
# author Florent Guiotte <florent.guiotte@irisa.fr>
# version 0.0
# date 07 juil. 2020
"""Abstract
doc.
"""
import numpy as np
import sap
from multiprocessing import Pool
def _diff_attribute_profiles(*kwargs):
image = kwargs[0]
name = kwargs[3]
return sap.attribute_profiles(*kwargs).diff() \
+ sap.Profiles([image[None]],
[{'tree': {'image_name': name},
'attribute': 'altitude',
'profiles': [{'operation': 'copy'}]}
])
def run(gt, rasters, coords, remove,
attributes, adjacency='4', filtering='direct', dtype=np.float32):
X = []
y = []
groups = []
Xn = None
for i, (gti, rastersi, coordsi) in enumerate(zip(gt, rasters, coords)):
# Compute EAP
attributes = [attributes] * len(rastersi) if isinstance(attributes, dict) else attributes
pool = Pool()
eap = pool.starmap(_diff_attribute_profiles, [
(raster, attribute, adjacency, name, filtering)
for (name, raster), attribute
in zip(rastersi.items(), attributes)])
pool.close()
pool.join()
eap = sap.concatenate(eap)
Xn = [' '.join((a.description['tree']['image_name'],
a.description['attribute'],
*[sap.profiles._title(p)]))
for a in eap for p in a.description['profiles']] if not Xn else Xn
# Create vectors
X_raw = np.moveaxis(np.array(list(eap.vectorize())), 0, -1).astype(dtype)
y_raw = gti
# Remove unwanted label X, y
lbl = np.ones_like(y_raw, dtype=np.bool)
for l in remove if remove else []:
lbl &= y_raw != l
X += [X_raw[lbl]]
y += [y_raw[lbl]]
groups += [np.repeat(coordsi, lbl.sum())]
X = np.concatenate(X)
y = np.concatenate(y)
groups = np.concatenate(groups)
return X, y, groups, Xn

View File

@ -0,0 +1,68 @@
#!/usr/bin/env python
# file edsdaps.py
# author Florent Guiotte <florent.guiotte@irisa.fr>
# version 0.0
# date 07 juil. 2020
"""Abstract
doc.
"""
import numpy as np
import sap
from multiprocessing import Pool
def _diff_sd_attribute_profiles(*kwargs):
image = kwargs[0]
name = kwargs[3]
return sap.self_dual_attribute_profiles(*kwargs).diff() \
+ sap.Profiles([image[None]],
[{'tree': {'image_name': name},
'attribute': 'altitude',
'profiles': [{'operation': 'copy'}]}
])
def run(gt, rasters, coords, remove,
attributes, adjacency='4', filtering='direct', dtype=np.float32):
X = []
y = []
groups = []
Xn = None
for i, (gti, rastersi, coordsi) in enumerate(zip(gt, rasters, coords)):
# Compute EAP
attributes = [attributes] * len(rastersi) if isinstance(attributes, dict) else attributes
pool = Pool()
eap = pool.starmap(_diff_sd_attribute_profiles, [
(raster, attribute, adjacency, name, filtering)
for (name, raster), attribute
in zip(rastersi.items(), attributes)])
pool.close()
pool.join()
eap = sap.concatenate(eap)
Xn = [' '.join((a.description['tree']['image_name'],
a.description['attribute'],
*[sap.profiles._title(p)]))
for a in eap for p in a.description['profiles']] if not Xn else Xn
# Create vectors
X_raw = np.moveaxis(np.array(list(eap.vectorize())), 0, -1).astype(dtype)
y_raw = gti
# Remove unwanted label X, y
lbl = np.ones_like(y_raw, dtype=np.bool)
for l in remove if remove else []:
lbl &= y_raw != l
X += [X_raw[lbl]]
y += [y_raw[lbl]]
groups += [np.repeat(coordsi, lbl.sum())]
X = np.concatenate(X)
y = np.concatenate(y)
groups = np.concatenate(groups)
return X, y, groups, Xn

View File

@ -0,0 +1,77 @@
#!/usr/bin/env python
# file fps.py
# author Florent Guiotte <florent.guiotte@irisa.fr>
# version 0.0
# date 31 mai 2020
"""Abstract
doc.
"""
import numpy as np
import sap
from joblib import Memory
memory = Memory(location='cache/', verbose=0)
@memory.cache
def _feature_profiles(**kwargs):
return sap.feature_profiles(**kwargs)
def _origin_profile(image, name):
return sap.Profiles([image[None,:]], [{
'attribute': 'Origin',
'filtering rule': None,
'name': 'copy',
'out feature': 'altitude',
'profiles': [{'operation': 'copy feature altitude'}],
'tree': {'adjacency': None, 'image_hash': None, 'image_name': name}}])
def run(gt, rasters, coords, remove, attributes, adjacency=4,
feature='same', filtering='direct'):
X = []
y = []
groups = []
Xn = None
for i, (gti, rastersi, coordsi) in enumerate(zip(gt, rasters, coords)):
# Compute EAP
eap = []
for name, raster in rastersi.items():
eap += [_origin_profile(raster, name)]
eap += [_feature_profiles(image=raster,
attribute=attributes,
adjacency=adjacency,
image_name=name,
out_feature=feature,
filtering_rule=filtering)]
eap = sap.concatenate(eap)
Xn = [' '.join((a['tree']['image_name'],
a['attribute'],
'feature',
a['out feature'],
*[str(v) for v in p.values()]))
for a in eap.description for p in a['profiles']] if not Xn else Xn
# Create vectors
X_raw = np.moveaxis(np.array(list(eap.vectorize())), 0, -1)
y_raw = gti
# Remove unwanted label X, y
lbl = np.ones_like(y_raw, dtype=np.bool)
for l in remove if remove else []:
lbl &= y_raw != l
X += [X_raw[lbl]]
y += [y_raw[lbl]]
groups += [np.repeat(coordsi, lbl.sum())]
X = np.concatenate(X)
y = np.concatenate(y)
groups = np.concatenate(groups)
return X, y, groups, Xn

View File

@ -0,0 +1,62 @@
#!/usr/bin/env python
# file max_aps.py
# author Florent Guiotte <florent.guiotte@irisa.fr>
# version 0.0
# date 31 mai 2020
"""Abstract
doc.
"""
import numpy as np
import sap
from joblib import Memory
memory = Memory(location='cache/', verbose=0)
@memory.cache
def _max_attribute_profiles(image, attribute, adjacency=4, image_name=None,
filtering_rule='direct'):
maxt = sap.MaxTree(image, adjacency, image_name)
return sap.create_profiles(maxt, attribute, 'altitude',
filtering_rule, 'max attribute profiles')
def run(gt, rasters, coords, remove, attributes, adjacency='4',
filtering='direct'):
X = []
y = []
groups = []
Xn = None
for i, (gti, rastersi, coordsi) in enumerate(zip(gt, rasters, coords)):
# Compute EAP
eap = []
for name, raster in rastersi.items():
eap += [_max_attribute_profiles(raster, attributes, adjacency, name, filtering)]
eap = sap.concatenate(eap)
Xn = [' '.join((a['tree']['image_name'],
a['attribute'],
*[str(v) for v in p.values()]))
for a in eap.description for p in a['profiles']] if not Xn else Xn
# Create vectors
X_raw = np.moveaxis(np.array(list(eap.vectorize())), 0, -1)
y_raw = gti
# Remove unwanted label X, y
lbl = np.ones_like(y_raw, dtype=np.bool)
for l in remove if remove else []:
lbl &= y_raw != l
X += [X_raw[lbl]]
y += [y_raw[lbl]]
groups += [np.repeat(coordsi, lbl.sum())]
X = np.concatenate(X)
y = np.concatenate(y)
groups = np.concatenate(groups)
return X, y, groups, Xn

View File

@ -11,12 +11,12 @@ doc.
import numpy as np import numpy as np
def run(gt, rasters, remove=None): def run(gt, rasters, coords, remove=None):
X = [] X = []
y = [] y = []
groups = [] groups = []
for i, (gti, rastersi) in enumerate(zip(gt, rasters)): for i, (gti, rastersi, coordsi) in enumerate(zip(gt, rasters, coords)):
# Create vectors # Create vectors
X_raw = np.moveaxis(np.array(list(rastersi.values())), 0, -1) X_raw = np.moveaxis(np.array(list(rastersi.values())), 0, -1)
y_raw = gti y_raw = gti
@ -28,10 +28,11 @@ def run(gt, rasters, remove=None):
X += [X_raw[lbl]] X += [X_raw[lbl]]
y += [y_raw[lbl]] y += [y_raw[lbl]]
groups += [np.repeat(i, lbl.sum())] groups += [np.repeat(coordsi, lbl.sum())]
X = np.concatenate(X) X = np.concatenate(X)
y = np.concatenate(y) y = np.concatenate(y)
groups = np.concatenate(groups) groups = np.concatenate(groups)
Xn = rasters[0].keys()
return X, y, groups return X, y, groups, Xn

View File

@ -0,0 +1,61 @@
#!/usr/bin/env python
# file sdaps.py
# author Florent Guiotte <florent.guiotte@irisa.fr>
# version 0.0
# date 31 mai 2020
"""Abstract
doc.
"""
import numpy as np
import sap
from multiprocessing import Pool
def _self_dual_attribute_profiles(*kwargs):
return sap.self_dual_attribute_profiles(*kwargs)
def run(gt, rasters, coords, remove,
attributes, adjacency='4', filtering='direct', dtype=np.float32):
X = []
y = []
groups = []
Xn = None
for i, (gti, rastersi, coordsi) in enumerate(zip(gt, rasters, coords)):
# Compute EAP
attributes = [attributes] * len(rastersi) if isinstance(attributes, dict) else attributes
pool = Pool()
eap = pool.starmap(_self_dual_attribute_profiles, [
(raster, attribute, adjacency, name, filtering)
for (name, raster), attribute
in zip(rastersi.items(), attributes)])
pool.close()
pool.join()
eap = sap.concatenate(eap)
Xn = [' '.join((a.description['tree']['image_name'],
a.description['attribute'],
*[str(v) for v in p.values()]))
for a in eap for p in a.description['profiles']] if not Xn else Xn
# Create vectors
X_raw = np.moveaxis(np.array(list(eap.vectorize())), 0, -1).astype(dtype)
y_raw = gti
# Remove unwanted label X, y
lbl = np.ones_like(y_raw, dtype=np.bool)
for l in remove if remove else []:
lbl &= y_raw != l
X += [X_raw[lbl]]
y += [y_raw[lbl]]
groups += [np.repeat(coordsi, lbl.sum())]
X = np.concatenate(X)
y = np.concatenate(y)
groups = np.concatenate(groups)
return X, y, groups, Xn

View File

@ -0,0 +1,39 @@
#!/usr/bin/env python
# file coord_tiles.py
# author Florent Guiotte <florent.guiotte@irisa.fr>
# version 0.0
# date 29 mai 2020
"""Abstract
doc.
"""
from pathlib import Path
import logging
import rasterio as rio
log = logging.getLogger(__name__)
def run(rasters_path, rasters_name, gt_suffix='gt.tif'):
gt_names = list(Path(rasters_path).glob('*' + gt_suffix))
log.info('Found {} ground truth tiles.'.format(len(gt_names)))
gt = []
rasters = []
coords = []
for gtn in gt_names:
gt += [load_tif(gtn)]
rasters += [{Path(n).stem:
load_tif(gtn.as_posix().replace(gt_suffix, '') + n)
for n in rasters_name}]
coords += ['_'.join(gtn.stem.split('_')[:2])]
return gt, rasters, coords
def load_tif(path):
return rio.open(str(path)).read()[0]

View File

@ -10,3 +10,4 @@
#from .jurse import Jurse #from .jurse import Jurse
from .jurse2 import Jurse2 from .jurse2 import Jurse2
from .jurse3 import Jurse3

View File

@ -21,6 +21,7 @@ class Jurse2(Protocol):
def __init__(self, expe): def __init__(self, expe):
super().__init__(expe, self.__class__.__name__) super().__init__(expe, self.__class__.__name__)
self._results = {}
def _run(self): def _run(self):
self._log.info('Load data') self._log.info('Load data')
@ -44,9 +45,7 @@ class Jurse2(Protocol):
self._log.info('Run metrics') self._log.info('Run metrics')
metrics = self._run_metrics(classification, descriptors) metrics = self._run_metrics(classification, descriptors)
results = {} self._results['metrics'] = metrics
results['metrics'] = metrics
self._results = results
def _load_data(self): def _load_data(self):
data_loader = self._expe['data_loader'] data_loader = self._expe['data_loader']
@ -65,7 +64,7 @@ class Jurse2(Protocol):
return att return att
def _compute_classification(self, descriptors): def _compute_classification(self, descriptors):
X, y, groups = descriptors X, y, groups, Xn = descriptors
# CrossVal and ML # CrossVal and ML
cv = self._expe['cross_validation'] cv = self._expe['cross_validation']
@ -76,6 +75,7 @@ class Jurse2(Protocol):
y_pred = np.zeros_like(y) y_pred = np.zeros_like(y)
cl_feature_importances = []
cvi = cross_val(**cv['parameters']) cvi = cross_val(**cv['parameters'])
for train_index, test_index in cvi.split(X, y, groups): for train_index, test_index in cvi.split(X, y, groups):
cli = classifier(**cl['parameters']) cli = classifier(**cl['parameters'])
@ -86,13 +86,21 @@ class Jurse2(Protocol):
self._log.info(' - predict') self._log.info(' - predict')
y_pred[test_index] = cli.predict(X[test_index]) y_pred[test_index] = cli.predict(X[test_index])
cl_feature_importances += [cli.feature_importances_.copy()]
cl_feature_importances = np.array(cl_feature_importances)
self._results['features'] = {
'name': list(Xn),
'importance': cl_feature_importances.tolist()
}
return y_pred return y_pred
def _get_results(self): def _get_results(self):
return self._results return self._results
def _run_metrics(self, classification, descriptors): def _run_metrics(self, classification, descriptors):
X, y_true, groups = descriptors X, y_true, groups, Xn = descriptors
y_pred = classification y_pred = classification
self._log.info(' - Scores') self._log.info(' - Scores')

View File

@ -0,0 +1,41 @@
#!/usr/bin/env python
# file jurse3.py
# author Florent Guiotte <florent.guiotte@irisa.fr>
# version 0.0
# date 17 sept. 2021
import os
import importlib
from joblib import Memory
from pathlib import Path
from . import Jurse2
ENV_KEY = 'MINIGRIDA_CACHE'
DEFAULT_CACHE = './cache'
CACHE = os.environ[ENV_KEY] \
if ENV_KEY in os.environ \
else DEFAULT_CACHE
class Jurse3(Jurse2):
"""Jurse2 protocol with cache
Same as Jurse2 but enable caching results to speed up
hyperparameters tunning.
"""
def __init__(self, expe):
super().__init__(expe)
self.memory = Memory(CACHE if Path(CACHE).exists else None, verbose=0)
def _compute_descriptors(self, data):
script = self._expe['descriptors_script']
desc = importlib.import_module(script['name'])
run = self.memory.cache(desc.run)
att = run(*data, **script['parameters'])
return att

View File

@ -19,13 +19,29 @@ from protocols.protocol import TestError
import database import database
from multiprocessing import Process from multiprocessing import Process
import json import json
import argparse
host = os.uname()[1] host = os.uname()[1]
log = logging.getLogger('Supervisor [{}]'.format(host)) log = logging.getLogger('Supervisor [{}]'.format(host))
parser = argparse.ArgumentParser(description='Run minigrida supervisor')
parser.add_argument('--conf',
metavar='config',
default='config.json',
type=str,
help='the path to the supervisor\'s config file.')
parser.add_argument('--cred',
metavar='credentials',
default='credentials.json',
type=str,
help='the path to the DB credentials file.')
args = parser.parse_args()
def run(expe, hostpid=host): def run(expe, hostpid=host):
database.update_experiment(expe, worker=hostpid, start_date=datetime.now()) database.update_experiment(expe,
worker=hostpid,
status='running',
start_date=datetime.now())
# Load protocol # Load protocol
log.info('Load protocol {}'.format(expe.protocol)) log.info('Load protocol {}'.format(expe.protocol))
@ -53,6 +69,9 @@ def run(expe, hostpid=host):
aa=test.aa, aa=test.aa,
k=test.k, k=test.k,
report=test.get_results(), report=test.get_results(),
ressources={
'process_time': test.get_process_time()
},
status='complete') status='complete')
# End of test # End of test
@ -64,7 +83,7 @@ def main(pid=None):
log.name = 'Supervisor [{}]'.format(hostpid) log.name = 'Supervisor [{}]'.format(hostpid)
log.info('Connecting to database') log.info('Connecting to database')
database.connect('credentials.json') database.connect(args.cred)
while(True): while(True):
if not database.pending_experiments(): if not database.pending_experiments():
@ -88,7 +107,7 @@ if __name__ == '__main__':
logger.setup_logging() logger.setup_logging()
log.info('Starting supervisor') log.info('Starting supervisor')
try: try:
with open('config.json') as f: with open(args.conf) as f:
config = json.load(f) config = json.load(f)
process_count = config['process_count'] process_count = config['process_count']
except Exception as e: except Exception as e:
@ -97,3 +116,4 @@ if __name__ == '__main__':
for i in range(process_count): for i in range(process_count):
Process(target=main, args=(i,)).start() Process(target=main, args=(i,)).start()
time.sleep(1)

View File

@ -2,3 +2,4 @@ sklearn
sap>=0.2.2 sap>=0.2.2
pony pony
psycopg2 psycopg2
pyyml

View File

@ -17,4 +17,8 @@ setup(name='minigrida',
author_email='florent.guiotte@uhb.fr', author_email='florent.guiotte@uhb.fr',
url='https://git.guiotte.fr/Florent/minigrida', url='https://git.guiotte.fr/Florent/minigrida',
packages=['minigrida'],#'cvgenerators', 'descriptors', 'protocols', 'database'], packages=['minigrida'],#'cvgenerators', 'descriptors', 'protocols', 'database'],
install_requires=[
'pony',
'psycopg2-binary',
]
) )