Commit 919fa540 authored by nsoler's avatar nsoler
Browse files

trying to predict sign1

parent d22def42
Pipeline #2373 failed with stages
in 26 minutes and 40 seconds
......@@ -479,8 +479,7 @@ class ChemicalChecker():
"""
signObj=sign
if sign.molset != 'reference':
signObj= self.get_signature(sign.cctype, 'reference', sign.dataset)
signObj=sign.get_molset("reference")
import_models(signObj, version=version)
......
......@@ -120,6 +120,9 @@ class sign1(BaseSignature, DataSignature):
def load_model(self, name):
fn = os.path.join(self.get_molset(
"reference").model_path, "%s.pkl" % name)
#debug
print("\nSHERLOCK fn",fn)
with open(fn, "rb") as f:
mod = pickle.load(f)
return mod
......@@ -236,56 +239,59 @@ class sign1(BaseSignature, DataSignature):
def predict(self, sign0, destination=None):
"""Predict sign1 from sign0"""
if type(sign0) is not DataSignature:
raise Exception(
"Predict requires a DataSignature as input parameter")
# #if type(sign0) is not DataSignature:
# if type(sign0) != type(DataSignature):
# print("type(sign0)", type(sign0))
# print("type(DataSignature)",type(DataSignature))
# raise Exception(
# "Predict requires a DataSignature as input parameter")
if not os.path.isfile(sign0.data_path):
raise Exception("The file " + sign0.data_path + " does not exist")
tag = str(uuid.uuid4())
tmp_path = os.path.join(self.model_path, tag)
try:
cc = ChemicalChecker(tmp_path)
s1 = cc.signature(self.dataset, "sign1")
self.copy_sign0_to_sign1(sign0, s1, just_data=True)
self.__log.debug("Reading pipeline")
fn = self.pipeline_file()
with open(fn, "rb") as f:
pipeline = pickle.load(f)
self.__log.debug("Starting pipeline")
self.__log.debug("Scaling if necessary")
if not pipeline["sparse"] and pipeline["scale"]:
mod = self.load_model("scale")
mod.predict(s1)
self.__log.debug("Transformation")
if pipeline["metric_learning"]:
if pipeline["semisupervised"]:
mod = self.load_model("semiml")
else:
mod = self.load_model("unsupml")
#try:
cc = ChemicalChecker(tmp_path)
s1 = cc.signature(self.dataset, "sign1")
self.copy_sign0_to_sign1(sign0, s1, just_data=True)
self.__log.debug("Reading pipeline")
fn = self.pipeline_file()
with open(fn, "rb") as f:
pipeline = pickle.load(f)
self.__log.debug("Starting pipeline")
self.__log.debug("Scaling if necessary")
if not pipeline["sparse"] and pipeline["scale"]:
mod = self.load_model("scale")
mod.predict(s1)
self.__log.debug("Transformation")
if pipeline["metric_learning"]:
if pipeline["semisupervised"]:
mod = self.load_model("semiml")
else:
if pipeline["latent"]:
if pipeline["sparse"]:
mod = self.load_model("lsi")
else:
mod = self.load_model("pca")
mod = self.load_model("unsupml")
else:
if pipeline["latent"]:
if pipeline["sparse"]:
mod = self.load_model("lsi")
else:
mod = None
if mod is not None:
mod.predict(s1)
self.__log.debug("Prediction done!")
if destination is None:
self.__log.debug("Returning a V, keys dictionary")
results = {
"V": s1[:],
"keys": s1.keys
}
mod = self.load_model("pca")
else:
self.__log.debug("Saving H5 file in %s" % destination)
shutil.copyfile(s1.data_path, destination)
results = None
except Exception as e:
shutil.rmtree(tmp_path)
raise Exception(e)
mod = None
if mod is not None:
mod.predict(s1)
self.__log.debug("Prediction done!")
if destination is None:
self.__log.debug("Returning a V, keys dictionary")
results = {
"V": s1[:],
"keys": s1.keys
}
else:
self.__log.debug("Saving H5 file in %s" % destination)
shutil.copyfile(s1.data_path, destination)
results = None
# except Exception as e:
# shutil.rmtree(tmp_path)
# raise Exception(e)
self.__log.debug("Deleting tmp folder")
shutil.rmtree(tmp_path)
......
......@@ -262,7 +262,8 @@ class Aspaces_prop_calculator(object):
# Now creating sign0 for each of the input raw files
for space, fp in dict_of_Aspaces_h5.items():
print("\nCalculating sign0 for space", space)
sign0 = self.cc.get_signature('sign0', 'full',space)
sign0 = self.cc.get_signature('sign0', 'full',space+'.001')
sign0.clear()
sign0.fit(data_file=fp,do_triplets=False, overwrite=True,sanitize=sanitize)
# Then we can use this cc instance to predict sign1
......@@ -276,15 +277,20 @@ class Aspaces_prop_calculator(object):
dictSpaces= self.cc.report_available()
if "reference" in dictSpaces:
dictSpaces=dictSpaces['reference']
self.cc.import_models_for_prediction(sign1) # Import models A1 to A5
else:
print("No sign0 available in your cc repo")
return
for space in dictSpaces.keys():
if space in ('A1', 'A2', 'A3', 'A4', 'A5'):
sign1 = self.cc.get_signature('sign1', 'full',space) # will get converted to reference by the next fct
sign0= self.cc.get_signature('sign0', 'full',space)
for space in self.Aspaces:
assert space in dictSpaces.keys(), print("Sign0 for space",space, "not fit!!")
sign0= self.cc.get_signature('sign0', 'full',space+'.001') # already fitted
sign1 = self.cc.get_signature('sign1', 'full',space+'.001') # will get converted to reference by the next fct
sign1.clear()
self.cc.import_models_for_prediction(sign1) # Import model for this space
print("\nPredicting sign1 for space",space)
sign1.predict(sign0)
return self.cc
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment