added option to get signature as pandas dataframe

......@@ -444,6 +444,7 @@ class ChemicalChecker():
params(dict): Optional. The set of parameters to initialize and
compute the signature. If the signature is already initialized
this argument will be ignored.
as_dataframe(bool): True to get the signature as pandas DataFrame.
data(Signature): A `Signature` object, the specific type depends
on the cctype passed.
......@@ -454,8 +455,7 @@ class ChemicalChecker():
data = DataFactory.make_data(
cctype, signature_path, dataset_code, *args, **kwargs)
if as_dataframe:
df = pd.DataFrame(data[:],columns=data.features, index=data.keys)
return df
return data.as_dataframe()
return data
def get_data_signature(self, cctype, dataset_code):
......@@ -9,6 +9,7 @@ import os
import sys
import h5py
import numpy as np
import pandas as pd
from tqdm import tqdm
from bisect import bisect_left
from scipy.spatial.distance import euclidean, cosine
......@@ -484,6 +485,10 @@ class DataSignature(object):
data = hf[h5_dataset_name][mask, :]
return data
def as_dataframe(self):
df = pd.DataFrame(self[:],columns=self.features, index=self.keys)
return df
def get_vectors(self, keys, include_nan=False, dataset_name='V',
"""Get vectors for a list of keys, sorted by default.
