Add insight function

This commit is contained in:
Florent Guiotte 2019-03-26 17:31:09 +01:00
parent 32f4bb28ff
commit 4f629c9be1
2 changed files with 113 additions and 1 deletions

View File

@ -10,6 +10,7 @@ General functions to transform point clouds to voxels compatible with numpy.
import logging
import numpy as np
import humanize
from .utils import bbox
import ipdb
@ -153,7 +154,93 @@ def _bin_mode(grid, spatial, feature):
winner = score > max_score
max_score[winner] = score[winner]
max_value[winner] = value
del score, winner
del score, winner, mask
return np.ma.masked_array(max_value, max_score == 0)
def _bin_insight(grid):
'''Return the predicted number of cells contained in grid.
'''
return np.prod([x.size - 1 for x in grid])
def _bin_density_insight(grid, dtype=np.float):
density = np.dtype(dtype).itemsize
res_data = density
res_mask = np.dtype(np.bool).itemsize
return _bin_insight(grid) * (density + res_data + res_mask)
def _bin_mean_insight(grid, feature=None):
density = np.dtype(np.float).itemsize
weight = np.dtype(np.float).itemsize
mask = np.dtype(np.bool).itemsize
res_data = np.dtype(np.float).itemsize
res_mask = np.dtype(np.float).itemsize
return (density + weight + mask + res_data + res_mask) * _bin_insight(grid)
def _bin_mode_insight(grid, feature=None):
max_score = np.dtype(np.float).itemsize
max_value = np.dtype(np.float).itemsize
score = np.dtype(np.float).itemsize
winner = np.dtype(np.bool).itemsize
res_data = np.dtype(np.float).itemsize
res_mask = np.dtype(np.bool).itemsize
return _bin_insight(grid) * (max_score + max_value + max(score + winner, res_data + res_mask))
def insight(grid, feature=None, method='density', mem_limit=None):
'''Display memory usage of binning process.
Display in the logs (INFO level) the predicted memory usage needed by the
binning process. If `mem_limit` is set, then the method will throw an
exception (MemoryError) if the prediction exceed the limit.
Parameters
----------
grid : array of array (n,)
Grid to bin spatial data.
feature : array (m)
Point feature to represent in the bins. If None, default float values
are assumed.
method : str
Method to synthetize the point features in the grid.
mem_limit : number, str
The limit allowed to further process the grid. If the insight
prediction exceed this limit a MemoryError is raised. If the parameter
is a string, it can be set with human readable memory size (e.g.
'3GB'). The default is bytes.
Return
------
mem_usage : number
The future RAM usage required to further process the data binning.
'''
if mem_limit is not None:
mem_limit = _human_to_bytes(mem_limit) if isinstance(mem_limit, str) else mem_limit
if method == 'density':
mem_usage = _bin_density_insight(grid)
elif method == 'mean':
mem_usage = _bin_mean_insight(grid, feature)
elif method == 'mode':
mem_usage = _bin_mode_insight(grid, feature)
else:
raise IOError('Wrong method: \'{}\''.format(method))
log.info('--- GRID INSIGHT ---')
log.info('Grid size: \t{}'.format([x.size - 1 for x in grid]))
log.info('Number of cells:\t{}'.format(humanize.intword(_bin_insight(grid))))
log.info('Predicted RAM usage:\t{}'.format(humanize.naturalsize(mem_usage, binary=True)))
log.info('Allowed max RAM usage:\t{}'.format(humanize.naturalsize(mem_limit, binary=True) if mem_limit else 'Not set'))
humanize.naturalsize(mem_usage)
log.info('--------------------')
if mem_limit and mem_usage > mem_limit:
msg = 'The memory requirement is higher than allowed memory'
log.error(msg)
raise MemoryError(msg)
def _human_to_bytes(human_size):
bytes_count = {'KB': 1, 'MB': 2, 'GB': 3}
for k, v in bytes_count.items():
if human_size.endswith(k):
return float(human_size.strip(k)) * 1024 ** v
raise IOError('Did not understand size: \'{}\''.format(human_size))

View File

@ -132,3 +132,28 @@ def test_bin(datadir, grid_id, set_id, method):
assert test.shape == tuple([x.size - 1 for x in grid]), 'Voxel grid shape and test grid missmatch'
assert (test.mask == truth.mask).all(), 'The returned mask is different from test truth'
assert np.allclose(test.compressed(), truth.compressed()), 'The returned values are different from test truth'
@pytest.mark.parametrize('set_id, grid_id, cells', [
('0', '1', 1000),
('0', '2', 125),
('0', '0_1', 753571),
('0', '0_15', 226981),
])
def test__bin_insight(datadir, set_id, grid_id, cells):
grid = data_grid(datadir, set_id, grid_id)
assert vxl._bin_insight(grid) is not None, 'Tested function did not return anything :('
assert vxl._bin_insight(grid) == cells, 'Private insight function did not return the correct number of cells for grid'
@pytest.mark.parametrize('method', [
('density'), ('mean'), ('mode')])
def test_insight(method):
# Create a huge grid
grid = [np.arange(1, 10, .0001)] * 3
with pytest.raises(MemoryError) as e_info:
vxl.insight(grid, method=method, mem_limit='3GB')
with pytest.raises(MemoryError) as e_info:
vxl.insight(grid, method=method, mem_limit='300 MB')
with pytest.raises(MemoryError) as e_info:
vxl.insight(grid, method=method, mem_limit='3KB')
with pytest.raises(MemoryError) as e_info:
vxl.insight(grid, method=method, mem_limit=3000)