Commit 7c2e1fb6 authored by Martino Bertoni's avatar Martino Bertoni 🌋
Browse files


parent 276a598b
Pipeline #2581 passed with stages
in 19 minutes and 48 seconds
......@@ -137,9 +137,8 @@ class Sanitizer(object):
"present in reference: %s" % str(add_features))
# we assume that features are in the same order
mask = np.isin(list(features), list(ref_features))
with h5py.File(data, "a") as hf:
self.filter_h5_dataset(data, mask, axis=1, name="V")
self.rewrite_features_h5(data, mask)
data.filter_h5_dataset('V', keep, axis=1)
data.filter_h5_dataset('features', keep, axis=1)
# check features frequencies
if self.check_features:
......@@ -266,11 +265,11 @@ class Sanitizer(object):
# return if input was raw data
if not was_data:
with h5py.File(data, "r") as hf:
with h5py.File(data.data_path, "r") as hf:
V = hf["V"][:]
keys = hf["keys"][:].astype(str)
keys_raw = hf["keys_raw"][:].astype(str)
features = hf["features"][:].astype(str)
return V, keys, keys_raw, features
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment