Add dtype to load_txt

2019-04-01 17:21:48 +02:00 · 2019-04-01 17:21:48 +02:00 · c1def4414f
commit c1def4414f
parent 8fa3208054
2 changed files with 20 additions and 8 deletions
--- a/idefix/io.py
+++ b/idefix/io.py
@ -77,7 +77,7 @@ def load_las(fname):
    return pcloud


-def load_txt(fname, header, delimiter=' '):
+def load_txt(fname, header, delimiter=' ', dtype=None):
    '''Load a text file into idefix point cloud format.

    Read point cloud from text files (CSV like).
@ -95,6 +95,9 @@ def load_txt(fname, header, delimiter=' '):
        Names of the columns contained in the text point cloud file.
    delimiter : str, optional
        String used to separate values. The default is whitespace.
+    dtype : array, tuple
+        Data types of the columns contained in the file. This list must match
+        the `header` parameter. Default is None, data type inferred is float.

    Returns
    -------
@ -109,6 +112,9 @@ def load_txt(fname, header, delimiter=' '):
        log.error(msg)
        raise IOError(msg)

+    if dtype is not None:
+        assert len(dtype) == len(header), 'dtype and header must be the same size'
+
    log.info('Loading TXT file \'{}\'...'.format(fname))
    try:
        log.debug('Loading the first lines of \'{}\'...'.format(fname))
@ -124,8 +130,9 @@ def load_txt(fname, header, delimiter=' '):
        log.error(msg)
        raise IOError(msg)

-    dtype = [(x, np.float) for x in header]
-    raw_txt = np.loadtxt(fname, delimiter=delimiter, dtype=dtype)
+    dtype = (np.float,) * len(header) if not dtype else dtype
+    processed_dtype = [(x, y) for x, y in zip(header, dtype)]
+    raw_txt = np.loadtxt(fname, delimiter=delimiter, dtype=processed_dtype)

    log.debug('Extract spatial data')
    spatial = np.core.records.fromarrays([np.array([raw_txt[x] for x in ('x', 'y', 'z')]).T],
--- a/test/test_io.py
+++ b/test/test_io.py
@ -41,14 +41,15 @@ def test_load_las(datadir, fname, exp_point_count, exp_field_count):

    assert result.spatial.dtype == np.float, "Dtype of spatial is np.float"

-@pytest.mark.parametrize('fname, head, separator, exp_point_count, exp_field_count', [
+@pytest.mark.parametrize('fname, head, separator, exp_point_count, exp_field_count, dtype', [
    # TODO: test different LAS version
    # TODO: test LAS without field
-    ('test.txt', ['x', 'y', 'z', 'class', 'intensity'], ',', 58629, 2, ),
-    ('test_b.txt', ['x', 'y', 'z', 'class', 'intensity'], ' ', 58629, 2, ),
+    ('test.txt', ['x', 'y', 'z', 'class', 'intensity'], ',', 58629, 2, None),
+    ('test_b.txt', ['x', 'y', 'z', 'class', 'intensity'], ' ', 58629, 2, None),
+    ('test.txt', ['x', 'y', 'z', 'class', 'intensity'], ',', 58629, 2, [np.float, np.float, np.float, np.uint8, np.uint8]),
    #('test.laz', 58629, 3, ),
 ])
-def test_load_txt(datadir, fname, head, separator, exp_point_count, exp_field_count):
+def test_load_txt(datadir, fname, head, separator, exp_point_count, exp_field_count, dtype):
    fname = datadir.join(fname)
    
    # Raise "No such file"
@ -67,7 +68,7 @@ def test_load_txt(datadir, fname, head, separator, exp_point_count, exp_field_co
        pytest.fail('Opening legit file without exception')

    try:
-        result = io.load_txt(fname, tuple(head), separator)
+        result = io.load_txt(fname, tuple(head), separator, dtype)
    except Exception:
        pytest.fail('Opening legit file with legit header')

@ -80,3 +81,7 @@ def test_load_txt(datadir, fname, head, separator, exp_point_count, exp_field_co
    assert len(result['feature'].dtype) == exp_field_count, "Return ndarray with attribute fields"

    assert result.spatial.dtype == np.float, "Dtype of spatial is np.float"
+
+    if dtype is not None:
+        for feature_name, feature_dtype in zip(head[3:], dtype[3:]):
+            assert result.feature[feature_name].dtype == feature_dtype, "Missmatch between specified dtype and returned feature dtype"