Add insight function

This commit is contained in:
Florent Guiotte 2019-03-26 17:31:09 +01:00
parent 32f4bb28ff
commit 4f629c9be1
2 changed files with 113 additions and 1 deletions

View File

@ -10,6 +10,7 @@ General functions to transform point clouds to voxels compatible with numpy.
import logging import logging
import numpy as np import numpy as np
import humanize
from .utils import bbox from .utils import bbox
import ipdb import ipdb
@ -153,7 +154,93 @@ def _bin_mode(grid, spatial, feature):
winner = score > max_score winner = score > max_score
max_score[winner] = score[winner] max_score[winner] = score[winner]
max_value[winner] = value max_value[winner] = value
del score, winner del score, winner, mask
return np.ma.masked_array(max_value, max_score == 0) return np.ma.masked_array(max_value, max_score == 0)
def _bin_insight(grid):
'''Return the predicted number of cells contained in grid.
'''
return np.prod([x.size - 1 for x in grid])
def _bin_density_insight(grid, dtype=np.float):
density = np.dtype(dtype).itemsize
res_data = density
res_mask = np.dtype(np.bool).itemsize
return _bin_insight(grid) * (density + res_data + res_mask)
def _bin_mean_insight(grid, feature=None):
density = np.dtype(np.float).itemsize
weight = np.dtype(np.float).itemsize
mask = np.dtype(np.bool).itemsize
res_data = np.dtype(np.float).itemsize
res_mask = np.dtype(np.float).itemsize
return (density + weight + mask + res_data + res_mask) * _bin_insight(grid)
def _bin_mode_insight(grid, feature=None):
max_score = np.dtype(np.float).itemsize
max_value = np.dtype(np.float).itemsize
score = np.dtype(np.float).itemsize
winner = np.dtype(np.bool).itemsize
res_data = np.dtype(np.float).itemsize
res_mask = np.dtype(np.bool).itemsize
return _bin_insight(grid) * (max_score + max_value + max(score + winner, res_data + res_mask))
def insight(grid, feature=None, method='density', mem_limit=None):
'''Display memory usage of binning process.
Display in the logs (INFO level) the predicted memory usage needed by the
binning process. If `mem_limit` is set, then the method will throw an
exception (MemoryError) if the prediction exceed the limit.
Parameters
----------
grid : array of array (n,)
Grid to bin spatial data.
feature : array (m)
Point feature to represent in the bins. If None, default float values
are assumed.
method : str
Method to synthetize the point features in the grid.
mem_limit : number, str
The limit allowed to further process the grid. If the insight
prediction exceed this limit a MemoryError is raised. If the parameter
is a string, it can be set with human readable memory size (e.g.
'3GB'). The default is bytes.
Return
------
mem_usage : number
The future RAM usage required to further process the data binning.
'''
if mem_limit is not None:
mem_limit = _human_to_bytes(mem_limit) if isinstance(mem_limit, str) else mem_limit
if method == 'density':
mem_usage = _bin_density_insight(grid)
elif method == 'mean':
mem_usage = _bin_mean_insight(grid, feature)
elif method == 'mode':
mem_usage = _bin_mode_insight(grid, feature)
else:
raise IOError('Wrong method: \'{}\''.format(method))
log.info('--- GRID INSIGHT ---')
log.info('Grid size: \t{}'.format([x.size - 1 for x in grid]))
log.info('Number of cells:\t{}'.format(humanize.intword(_bin_insight(grid))))
log.info('Predicted RAM usage:\t{}'.format(humanize.naturalsize(mem_usage, binary=True)))
log.info('Allowed max RAM usage:\t{}'.format(humanize.naturalsize(mem_limit, binary=True) if mem_limit else 'Not set'))
humanize.naturalsize(mem_usage)
log.info('--------------------')
if mem_limit and mem_usage > mem_limit:
msg = 'The memory requirement is higher than allowed memory'
log.error(msg)
raise MemoryError(msg)
def _human_to_bytes(human_size):
bytes_count = {'KB': 1, 'MB': 2, 'GB': 3}
for k, v in bytes_count.items():
if human_size.endswith(k):
return float(human_size.strip(k)) * 1024 ** v
raise IOError('Did not understand size: \'{}\''.format(human_size))

View File

@ -132,3 +132,28 @@ def test_bin(datadir, grid_id, set_id, method):
assert test.shape == tuple([x.size - 1 for x in grid]), 'Voxel grid shape and test grid missmatch' assert test.shape == tuple([x.size - 1 for x in grid]), 'Voxel grid shape and test grid missmatch'
assert (test.mask == truth.mask).all(), 'The returned mask is different from test truth' assert (test.mask == truth.mask).all(), 'The returned mask is different from test truth'
assert np.allclose(test.compressed(), truth.compressed()), 'The returned values are different from test truth' assert np.allclose(test.compressed(), truth.compressed()), 'The returned values are different from test truth'
@pytest.mark.parametrize('set_id, grid_id, cells', [
('0', '1', 1000),
('0', '2', 125),
('0', '0_1', 753571),
('0', '0_15', 226981),
])
def test__bin_insight(datadir, set_id, grid_id, cells):
grid = data_grid(datadir, set_id, grid_id)
assert vxl._bin_insight(grid) is not None, 'Tested function did not return anything :('
assert vxl._bin_insight(grid) == cells, 'Private insight function did not return the correct number of cells for grid'
@pytest.mark.parametrize('method', [
('density'), ('mean'), ('mode')])
def test_insight(method):
# Create a huge grid
grid = [np.arange(1, 10, .0001)] * 3
with pytest.raises(MemoryError) as e_info:
vxl.insight(grid, method=method, mem_limit='3GB')
with pytest.raises(MemoryError) as e_info:
vxl.insight(grid, method=method, mem_limit='300 MB')
with pytest.raises(MemoryError) as e_info:
vxl.insight(grid, method=method, mem_limit='3KB')
with pytest.raises(MemoryError) as e_info:
vxl.insight(grid, method=method, mem_limit=3000)