Binning is passing tests

This commit is contained in:
Florent Guiotte 2019-03-22 17:16:39 +01:00
parent dae16f62c7
commit 32f4bb28ff
3 changed files with 188 additions and 31 deletions

View File

@ -15,28 +15,35 @@ import ipdb
log = logging.getLogger(__name__)
def _ui_step(step):
def _ui_step(step, spatial):
'''User input management for step (number or array)
'''
try:
iter(step)
if len(step) != 3:
msg = 'Wrong steps input, 3 steps expected in step = \'{}\''.format(step)
if len(step) != spatial.shape[-1]:
msg = 'Missmatch between steps count and spatial dimensions, {} step(s) expected while step = \'{}\''.format(spatial.shape[-1], step)
log.error(msg)
raise IOError(msg)
raise ValueError(msg)
out_step = step
except TypeError:
step = [step] * 3
return step
out_step = [step] * spatial.shape[-1]
for s in out_step:
if s <= 0:
msg = 'Step should be greater than 0, steps = \'{}\''.format(step)
log.error(msg)
raise ValueError(msg)
return out_step
def get_grid(spatial, step):
'''Return grid bins.
Compute the grid bins of a spatial point cloud or corresponding bounding
box according to given step (or steps for anisotropic grid).
Compute the grid bins of a point cloud or the corresponding bounding box
according to given step (or steps for anisotropic grid).
Parameters
----------
spatial : array (n, 3)
spatial : array (m, n)
The spatial point cloud or the corresponding bounding box to grid.
step : number or array or tuple
The step of the grid, can be a number to get an isotropic grid, or an
@ -44,18 +51,109 @@ def get_grid(spatial, step):
Returns
-------
grid : array of array (3,)
grid : array of array (n,)
Grid of spatial given step. Return three arrays (not necessarily of the
same size) defining the bins of axis `x`, `y` and `z`.
'''
spatial = np.array(spatial)
bb = bbox(spatial)
step = _ui_step(step)
step = _ui_step(step, spatial)
#ipdb.set_trace()
grid = []
for a_min, a_max, a_s in zip(bb[0], bb[1], step):
# Beware of float underflow
bins = np.trunc((a_max - a_min) / a_s).astype(int) + 1
grid += [np.linspace(a_min, a_min + bins * a_s, bins + 1)]
return grid
def bin(grid, spatial, feature=None, method='density'):
'''Bin spatial data in a grid.
Return a voxel grid representing the binned point cloud defined by point
positions in `spatial`. The point cloud can be valued with the `feature`
attribute.
Parameters
----------
grid : array of array (n,)
Grid to bin spatial data.
spatial : array (m, n)
Spatial position of the points in R^n.
feature : array (m)
Point feature to represent in the bins. If None, density method is
mandatory. Default is None.
method : str
Method to synthetize the point features in the grid. If the method is
density, then the feature values are ignored. Implemented methods are:
- 'density': The density of point in each cell.
- 'mean': The mean of feature value in each cell.
- 'mode': The modal (most common) in each cell. Designed for labels on
point cloud, can be long with rich spectral data. If there is an
equal number of elements, then the smallest is returned.
The default is 'density'.
Returns
-------
binned_pc : masked array (i, j, k)
The binned point cloud, "No data" are masked.
'''
log.info('Bining point cloud in grid...')
if method == 'density':
return _bin_density(grid, spatial)
else:
if feature is None:
msg = 'Missing required argument : \'feature\''
log.error(msg)
raise ValueError(msg)
if method == 'mean':
return _bin_mean(grid, spatial, feature)
if method == 'mode':
return _bin_mode(grid, spatial, feature)
msg = 'Method \'{}\' does not exist.'.format(method)
log.error(msg)
raise NotImplementedError(msg)
def _bin_density(grid, spatial):
'''Bin spatial in a grid, density method.
'''
density, edge = np.histogramdd(spatial, grid)
vxl = np.ma.masked_array(density, density == 0)
return vxl
def _bin_mean(grid, spatial, feature):
'''Bin spatial in a grid, mean method.
'''
density, edge = np.histogramdd(spatial, grid)
weightd, edge = np.histogramdd(spatial, grid, weights=feature)
mask = density == 0
return np.ma.masked_array(np.divide(weightd, density, where=~mask), mask)
def _bin_mode(grid, spatial, feature):
'''Bin spatial in a grid, mode method.
This function aim for efficiency with ndarray but is linearly impacted by
number of unique values in spatial.
'''
log.info('Mode binning...')
values = np.unique(feature)
if values.size > 10:
log.warn('Mode called on data with {} unique values, processing may be long.'.format(values.size))
# Init
max_score = np.zeros([len(x) - 1 for x in grid])
max_value = np.zeros_like(max_score, dtype=feature.dtype)
for i, value in enumerate(values):
log.info('Processing value {}/{}'.format(i + 0, values.size))
mask = np.argwhere(feature == value).reshape(-1)
score = _bin_density(grid, spatial[mask])
winner = score > max_score
max_score[winner] = score[winner]
max_value[winner] = value
del score, winner
return np.ma.masked_array(max_value, max_score == 0)

View File

@ -22,14 +22,31 @@ def data_pc(datadir, set_id):
data = np.loadtxt(path)
return Pcloud(data[:,:3], data[:,3])
#def data_vxl(datadir, set_id, step, method):
# pass
def data_vxl(datadir, set_id, grid_id, method):
def _unpack_vxl(spatial, feature):
coords = tuple([spatial[:,i] for i in range(3)])
@pytest.fixture
def data_0_vxl():
def _data_0_vxl(method, resolution):
if method == 'mean':
pass
vxld = np.zeros(spatial.max(axis=0) + 1)
vxld[coords] = feature
vxlm = np.ones_like(vxld, dtype=np.bool)
vxlm[coords] = False
return np.ma.masked_array(vxld, vxlm)
def _load_vxl(fname, feature_name):
fields = ('x', 'y', 'z', 'density', 'mean', 'mode')
i = fields.index(feature_name)
data = np.loadtxt('test/test_vxl/pc0_vxl_s1.txt')
spatial = data[:,:3].astype(np.intp)
feature = data[:,i]
return _unpack_vxl(spatial, feature)
path = datadir.join('pc{}_vxl_s{}.txt'.format(set_id, grid_id))
return _load_vxl(path, method)
def data_grid(datadir, set_id, step_id):
def _read(fname):
@ -41,10 +58,15 @@ def data_grid(datadir, set_id, step_id):
path = datadir.join('pc{}_grid_s{}.txt'.format(set_id, step_id))
return _read(path)
@pytest.mark.parametrize('set_id, step, grid_id', [
('0', 1., '1'),
('0', 2., '2'),
('0', .1, '0_1'),
('0', .6, '0_6'),
('0', .7, '0_7'),
('0', .15, '0_15'),
('0', [1.,1.,2.] , '1-1-2'),
])
def test_get_grid(datadir, set_id, step, grid_id):
spatial = data_pc(datadir, set_id).spatial
@ -62,14 +84,51 @@ def test_get_grid(datadir, set_id, step, grid_id):
for axis_test, axis_truth in zip(test, res):
assert axis_test.size == axis_truth.size, 'Wrong size for axis'
assert (axis_test == axis_truth).all(), 'Axis inequality between truth and test'
def test_grid():
"""
- dtype
- method
- mask
- data
"""
pass
assert np.allclose(axis_test, axis_truth), 'Axis inequality between truth and test'
#assert (axis_test - axis_truth == 0).all(), 'Float overflow in tested grid'
def test_get_grid_ui():
np.random.seed(0)
spatial_2D = np.random.random((100,2))
spatial_3D = np.random.random((100,3))
with pytest.raises(ValueError,) as e_info:
vxl.get_grid(spatial_3D, -1), 'Negativ test'
with pytest.raises(ValueError) as e_info:
vxl.get_grid(spatial_3D, [1., -1., 1.])
with pytest.raises(ValueError) as e_info:
vxl.get_grid(spatial_3D, [1., 1.])
with pytest.raises(ValueError) as e_info:
vxl.get_grid(spatial_2D, [1., 1., 1.])
def test_bin_ui():
spatial = np.random.random((10,3))
feature = np.random.random((10))
grid = [np.arange(0,1,.1)] * 3
with pytest.raises(ValueError) as e_info:
vxl.bin(grid, spatial, method='mean')
with pytest.raises(NotImplementedError) as e_info:
vxl.bin(grid, spatial, feature, method='🍆')
@pytest.mark.parametrize('set_id, grid_id, method', [
('0', '1', 'density'),
('0', '1', 'mean'),
('0', '1', 'mode'),
])
def test_bin(datadir, grid_id, set_id, method):
data = data_pc(datadir, set_id)
grid = data_grid(datadir, set_id, grid_id)
truth = data_vxl(datadir, set_id, grid_id, method)
test = vxl.bin(grid, data.spatial, data.feature, method)
assert test is not None, 'Tested function did not return anything :('
assert hasattr(test, 'mask'), 'The array is not masked!'
assert test.shape == tuple([x.size - 1 for x in grid]), 'Voxel grid shape and test grid missmatch'
assert (test.mask == truth.mask).all(), 'The returned mask is different from test truth'
assert np.allclose(test.compressed(), truth.compressed()), 'The returned values are different from test truth'

View File

@ -1,4 +1,4 @@
# x y z density mean mode
# x y z density mean mode
0 0 0 1 2 2
0 2 1 4 10 5
9 9 9 1 1 1