Commit 7265cb47 authored by nsoler's avatar nsoler
Browse files

progress on predicting sign1 for A spaces

parent 75f8e5f4
Pipeline #2374 failed with stages
in 2 minutes and 5 seconds
......@@ -477,11 +477,7 @@ class ChemicalChecker():
version (str) : ex: 2020_01
"""
signObj=sign
if sign.molset != 'reference':
signObj=sign.get_molset("reference")
import_models(signObj, version=version)
import_models(sign, version=version)
def import_h5(self):
......
......@@ -120,8 +120,6 @@ class sign1(BaseSignature, DataSignature):
def load_model(self, name):
fn = os.path.join(self.get_molset(
"reference").model_path, "%s.pkl" % name)
#debug
print("\nSHERLOCK fn",fn)
with open(fn, "rb") as f:
mod = pickle.load(f)
......@@ -277,6 +275,9 @@ class sign1(BaseSignature, DataSignature):
else:
mod = None
if mod is not None:
mod.model_path = self.model_path # avoid taking the info from pickle
print(" \n SHERLOCK s1.model_path",s1.model_path)
print(" \n SHERLOCK mod.model_path",mod.model_path)
mod.predict(s1)
self.__log.debug("Prediction done!")
if destination is None:
......
......@@ -6,27 +6,49 @@ import os, shutil
def import_models(sign_object ,version='2020_01'):
"""
Imports the models for predicting sign_objects
i.e copies the models in the model_path of the reference signature object and
create symbolics lincs to those model files in the full signature model_path
"""
fileDir= os.path.abspath(os.path.dirname(__file__))
signRef= sign_object.get_molset("reference")
signFull= sign_object.get_molset("full")
cctype= sign_object.cctype
dataset= sign_object.dataset
molset=sign_object.molset
destination=sign_object.model_path
if molset != 'reference':
print("Please use a reference signature (not full)")
return None
cctype= signRef.cctype
dataset= signRef.dataset
destination = signRef.model_path
destinationLink = signFull.model_path
data= os.path.join(fileDir,version,dataset,cctype)
if not os.path.exists(data):
print("Sorry, no model to import for this signature")
return None
for fichero in os.listdir(data):
if fichero == "fit.ready":
continue
source= os.path.join(data,fichero)
target= os.path.join(destination,fichero)
print("Importing", source, "to", target)
shutil.copyfile(source, target)
symlink= os.path.join(destinationLink,fichero)
if not os.path.exists(target):
print("Importing", source, "to", target)
shutil.copyfile(source, target)
# Symlincs
if not os.path.islink(symlink):
print("Creating symlink", symlink, "from", target)
os.symlink(target, symlink)
......@@ -149,7 +149,7 @@ class Aspaces_prop_calculator(object):
return None
def calculate_mol_properties(self):
def calculate_mol_properties(self,outputfiles):
"""
Calls calculate_data_fn for all spaces
......@@ -157,12 +157,15 @@ class Aspaces_prop_calculator(object):
Arguments:
- space (str): either A1, A2, A3, A4, A5, A5
- dict_inchikey_inchi (dict): mapping of the molecules to calculate properties from
- outputfiles (dict):mapping space : outputfile path
"""
result=dict()
for space in self.Aspaces:
result[space]=self.calculate_data_fn(space)
for space in outputfiles:
if not os.path.exists(outputfiles[space]):
result[space]=self.calculate_data_fn(space)
else:
print("File", outputfiles[space], "already present, nothing to do")
# dictionary {'A1': [{'inchikey': 'ASXBYYWOLISCLQ-UHFFFAOYSA-N', 'raw': ..raw_string}, {}...]}
return result
......@@ -188,8 +191,8 @@ class Aspaces_prop_calculator(object):
# Compute the raw properties
all_properties= self.calculate_mol_properties()
all_features= fetch_features_A()
all_properties= self.calculate_mol_properties(outputfiles)
all_features= fetch_features_A() # features from the fit() method
print('all_properties',all_properties)
print('all_features',all_features)
......@@ -263,8 +266,10 @@ class Aspaces_prop_calculator(object):
for space, fp in dict_of_Aspaces_h5.items():
print("\nCalculating sign0 for space", space)
sign0 = self.cc.get_signature('sign0', 'full',space+'.001')
sign0.clear()
sign0.fit(data_file=fp,do_triplets=False, overwrite=True,sanitize=sanitize)
if not sign0.is_fit():
sign0.fit(data_file=fp,do_triplets=False, overwrite=True,sanitize=sanitize)
else:
print("Sign0 for space", space+'.001', "already fit, nothing to do")
# Then we can use this cc instance to predict sign1
return self.cc
......@@ -284,7 +289,7 @@ class Aspaces_prop_calculator(object):
for space in self.Aspaces:
assert space in dictSpaces.keys(), print("Sign0 for space",space, "not fit!!")
assert space+'.001' in dictSpaces.keys(), print("Sign0 for space",space, "not fit!!")
sign0= self.cc.get_signature('sign0', 'full',space+'.001') # already fitted
sign1 = self.cc.get_signature('sign1', 'full',space+'.001') # will get converted to reference by the next fct
sign1.clear()
......
......@@ -197,6 +197,7 @@ class Lsi(BaseTransform):
for x in mask[0]])
f.write("%s %s\n" % (ks[i], val))
# load dictionary
print("\n SHERLOCK self.model_path",self.model_path)
dictionary = corpora.Dictionary.load(
os.path.join(self.model_path, self.name + ".dict.pkl"))
# corpus
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment