Commit ab6b69a9 authored by nsoler's avatar nsoler
Browse files
parents b04a5791 9811d63c
Pipeline #2361 failed with stages
in 21 minutes and 17 seconds
......@@ -1573,6 +1573,9 @@ class sign3(BaseSignature, DataSignature):
shared_keys = sorted(list(sign0.unique_keys & self.unique_keys))
_, sign0_V = sign0.get_vectors(shared_keys)
_, sign3_V = self.get_vectors(shared_keys)
# sign0 A1 is not exactly like MFP! we need to reorder features as MFP
order = np.argsort(sign0.get_h5_dataset('features').astype(int))
sign0_V = sign0_V[:, order]
smpred = Smilespred(
model_dir=model_path, sign0=sign0_V, sign3=sign3_V,
evaluate=evaluate)
......@@ -1616,6 +1619,9 @@ class sign3(BaseSignature, DataSignature):
_, sign3_app_V = self.get_vectors(shared_keys,
dataset_name='confidence')
sign3_app_V = sign3_app_V.ravel()
# sign0 A1 is not exactly like MFP! we need to reorder features as MFP
order = np.argsort(sign0.get_h5_dataset('features').astype(int))
sign0_V = sign0_V[:, order]
# initialize model and start learning
apppred = ApplicabilityPredictor(
model_dir=model_path, sign0=sign0_V,
......@@ -1715,7 +1721,8 @@ class sign3(BaseSignature, DataSignature):
def predict_from_string(self, molecules, dest_file, keytype='SMILES',
chunk_size=1000, predict_fn=None,
keys=None, components=128, applicability=True):
keys=None, components=128, applicability=True,
y_order=None):
"""Given molecuel string, generate MFP and predict sign3.
Args:
......@@ -1734,6 +1741,9 @@ class sign3(BaseSignature, DataSignature):
# input must be a list, otherwise we make it so
if isinstance(molecules, str):
molecules = [molecules]
# reorder as sign0 A1 or leave it as is
if y_order is None:
y_order = np.arange(2048)
# convert input molecules to InChI
inchies = list()
if keytype.upper() == 'SMILES':
......@@ -1805,7 +1815,7 @@ class sign3(BaseSignature, DataSignature):
finally:
sign0s.append(calc_s0)
# stack input signatures and generate predictions
sign0s = np.vstack(sign0s)
sign0s = np.vstack(sign0s)[:, y_order]
preds = predict_fn(sign0s)
# add NaN when SMILES conversion failed
if failed:
......@@ -1876,14 +1886,11 @@ class sign3(BaseSignature, DataSignature):
self.update_status("Getting data")
cc = self.get_cc()
print("KURAC0",sign2_list)
if sign2_list is None:
sign2_list = list()
print("KURAC1",cc.datasets_exemplary())
for ds in cc.datasets_exemplary():
sign2_list.append(cc.get_signature('sign2', 'full', ds))
self.sign2_list = sign2_list
print("KURAC sign2_list",sign2_list)
self.src_datasets = [sign.dataset for sign in sign2_list]
if sign2_self is None:
sign2_self = self.get_sign('sign2')
......@@ -1995,8 +2002,6 @@ class sign3(BaseSignature, DataSignature):
'applicability_range')
_, trim_mask = self.realistic_subsampling_fn()
# get sorted universe inchikeys
self.universe_inchikeys = self.get_universe_inchikeys()
tot_inks = len(self.universe_inchikeys)
......
......@@ -37,6 +37,14 @@ def pipeline_parser():
'-r', '--reference_cc', type=str, required=False,
help='Root dir of the CC instance to use as reference '
'(i.e. triplet sampling in sign0).')
parser.add_argument(
'-t', '--only_tasks', type=str, nargs="+", default=[],
required=False,
help='Names of tasks that will `exclusively` run by the pipeline.')
parser.add_argument(
'-s', '--exclude_tasks', type=str, nargs="+", default=[],
required=False,
help='Names of tasks that will be skipped.')
parser.add_argument(
'-c', '--config', type=str, required=False,
default=os.environ["CC_CONFIG"],
......@@ -48,11 +56,14 @@ def pipeline_parser():
return parser
@logged(logging.getLogger("[ PIPELINE %s ]" % os.path.basename(__file__)))
def main(args):
# initialize Pipeline
cfg = Config(args.config)
pp = Pipeline(pipeline_path=args.pipeline_dir, keep_jobs=True,
config=Config(args.config))
config=cfg, only_tasks=args.only_tasks,
exclude_tasks=args.exclude_tasks)
# print arguments
for arg in vars(args):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment