Commit efa263c1 authored by Martino Bertoni's avatar Martino Bertoni 🌋
Browse files

fixed adanet

parent c5651714
Pipeline #2413 passed with stages
in 4 minutes and 59 seconds
...@@ -4,16 +4,15 @@ import shutil ...@@ -4,16 +4,15 @@ import shutil
import pickle import pickle
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from tqdm import tqdm
from time import time from time import time
from scipy.stats import pearsonr from scipy.stats import pearsonr
from sklearn.metrics import r2_score, mean_squared_error from sklearn.metrics import r2_score, mean_squared_error
from sklearn.metrics import explained_variance_score from sklearn.metrics import explained_variance_score
from sklearn.linear_model import LinearRegression
try: try:
import tensorflow as tf import tensorflow.compat.v1 as tf
import tensorflow.contrib.slim as slim import tensorflow as tf2
from tensorflow.contrib import predictor #import tensorflow.contrib.slim as slim
#from tensorflow.contrib import predictor
except ImportError: except ImportError:
raise ImportError("requires tensorflow " + raise ImportError("requires tensorflow " +
"https://www.tensorflow.org/") "https://www.tensorflow.org/")
...@@ -93,23 +92,18 @@ class AdaNetWrapper(object): ...@@ -93,23 +92,18 @@ class AdaNetWrapper(object):
# check the prediction task at hand # check the prediction task at hand
self.prediction_task = kwargs.get("prediction_task", "regression") self.prediction_task = kwargs.get("prediction_task", "regression")
if self.prediction_task == "regression": if self.prediction_task == "regression":
self._estimator_head = tf.contrib.estimator.regression_head( self._estimator_head = tf.estimator.RegressionHead(
label_dimension=self.label_dimension) label_dimension=self.label_dimension)
elif self.prediction_task == "classification": elif self.prediction_task == "classification":
self._estimator_head = \ self._estimator_head = \
tf.contrib.estimator.binary_classification_head() tf.estimator.BinaryClassHead()
if self.n_classes > 2: if self.n_classes > 2:
self._estimator_head = tf.contrib.estimator.multi_class_head( self._estimator_head = tf.estimator.MultiClassHead(
n_classes=self.n_classes) n_classes=self.n_classes)
else: else:
raise Exception("Prediction task '%s' not recognized.", raise Exception("Prediction task '%s' not recognized.",
self.prediction_task) self.prediction_task)
# tensorflow session_config
self.session_config = tf.ConfigProto(
intra_op_parallelism_threads=self.cpu,
inter_op_parallelism_threads=self.cpu,
allow_soft_placement=True,
device_count={'CPU': self.cpu})
# log parameters # log parameters
self.__log.info("**** AdaNet Parameters: ***") self.__log.info("**** AdaNet Parameters: ***")
...@@ -211,8 +205,7 @@ class AdaNetWrapper(object): ...@@ -211,8 +205,7 @@ class AdaNetWrapper(object):
save_checkpoints_secs=18000, # save checkpoints every 5 hours save_checkpoints_secs=18000, # save checkpoints every 5 hours
save_summary_steps=50000, save_summary_steps=50000,
tf_random_seed=self.random_seed, tf_random_seed=self.random_seed,
model_dir=self.model_dir, model_dir=self.model_dir),
session_config=self.session_config),
model_dir=self.model_dir model_dir=self.model_dir
) )
# Train and evaluate using using the tf.estimator tooling. # Train and evaluate using using the tf.estimator tooling.
...@@ -294,7 +287,7 @@ class AdaNetWrapper(object): ...@@ -294,7 +287,7 @@ class AdaNetWrapper(object):
tf.py_function(augmentation, [x, y], tf.py_function(augmentation, [x, y],
[x.dtype, y.dtype])), [x.dtype, y.dtype])),
num_parallel_calls=self.cpu) num_parallel_calls=self.cpu)
iterator = dataset.make_one_shot_iterator() iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
features, labels = iterator.get_next() features, labels = iterator.get_next()
return {'x': features}, labels return {'x': features}, labels
...@@ -314,20 +307,22 @@ class AdaNetWrapper(object): ...@@ -314,20 +307,22 @@ class AdaNetWrapper(object):
(regression only). (regression only).
""" """
if predict_fn is None: if predict_fn is None:
predict_fn = predictor.from_saved_model( imported = tf2.saved_model.load(model_dir)
model_dir, signature_def_key='predict') predict_fn = imported.signatures["predict"]
#predict_fn = predictor.from_saved_model(
# model_dir, signature_def_key='predict')
if mask_fn is None: if mask_fn is None:
# TODO if no subsampling is provided we can apply some noise # TODO if no subsampling is provided we can apply some noise
def mask_fn(data): def mask_fn(data):
return data return data
pred = predict_fn({'x': features[:]}) pred = predict_fn(tf2.convert_to_tensor(features[:]))
if 'predictions' in pred: if 'predictions' in pred:
if consensus: if consensus:
pred_shape = pred['predictions'].shape pred_shape = pred['predictions'].shape
# axis are 0=molecules, 1=samples, 2=components # axis are 0=molecules, 1=samples, 2=components
repeat = features[:].repeat(samples, axis=0) repeat = features[:].repeat(samples, axis=0)
sampling = predict_fn({'x': mask_fn(repeat)})['predictions'] sampling = predict_fn(tf2.convert_to_tensor(mask_fn(repeat)))['predictions']
sampling = sampling.reshape( sampling = sampling.reshape(
pred_shape[0], samples, pred_shape[1]) pred_shape[0], samples, pred_shape[1])
return pred['predictions'], sampling return pred['predictions'], sampling
...@@ -346,8 +341,10 @@ class AdaNetWrapper(object): ...@@ -346,8 +341,10 @@ class AdaNetWrapper(object):
Args: Args:
model_dir(str): path where to save the model. model_dir(str): path where to save the model.
""" """
predict_fn = predictor.from_saved_model( imported = tf2.saved_model.load(model_dir)
model_dir, signature_def_key='predict') predict_fn = imported.signatures["predict"]
#predict_fn = predictor.from_saved_model(
# model_dir, signature_def_key='predict')
return predict_fn return predict_fn
@staticmethod @staticmethod
...@@ -367,8 +364,10 @@ class AdaNetWrapper(object): ...@@ -367,8 +364,10 @@ class AdaNetWrapper(object):
probs(bool): if this is a classifier return the probabilities. probs(bool): if this is a classifier return the probabilities.
""" """
if predict_fn is None: if predict_fn is None:
predict_fn = predictor.from_saved_model( imported = tf2.saved_model.load(model_dir)
model_dir, signature_def_key='predict') predict_fn = imported.signatures["predict"]
#predict_fn = predictor.from_saved_model(
# model_dir, signature_def_key='predict')
shapes, dtypes, fn = Traintest.generator_fn( shapes, dtypes, fn = Traintest.generator_fn(
h5_file, split, batch_size, only_x=False, return_on_epoch=True) h5_file, split, batch_size, only_x=False, return_on_epoch=True)
x_shape, y_shape = shapes x_shape, y_shape = shapes
...@@ -427,7 +426,7 @@ class AdaNetWrapper(object): ...@@ -427,7 +426,7 @@ class AdaNetWrapper(object):
with tf.Session(graph=tf.Graph()) as sess: with tf.Session(graph=tf.Graph()) as sess:
tf.saved_model.loader.load(sess, ["serve"], model_dir) tf.saved_model.loader.load(sess, ["serve"], model_dir)
model_vars = tf.trainable_variables() model_vars = tf.trainable_variables()
slim.model_analyzer.analyze_vars(model_vars, print_info=True) #slim.model_analyzer.analyze_vars(model_vars, print_info=True)
@staticmethod @staticmethod
def get_trainable_variables(model_dir): def get_trainable_variables(model_dir):
......
import functools import functools
import adanet import adanet
import tensorflow as tf import tensorflow.compat.v1 as tf
class SimpleCNNBuilder(adanet.subnetwork.Builder): class SimpleCNNBuilder(adanet.subnetwork.Builder):
......
import functools import functools
import adanet import adanet
import numpy as np import numpy as np
import tensorflow as tf import tensorflow.compat.v1 as tf
from tensorflow import layers from tensorflow.keras import layers
from tensorflow.layers import Dense, Dropout from tensorflow.keras.layers import Dense, Dropout
from chemicalchecker.util import logged from chemicalchecker.util import logged
...@@ -14,7 +14,7 @@ class NanMaskingLayer(layers.Layer): ...@@ -14,7 +14,7 @@ class NanMaskingLayer(layers.Layer):
self.mask_value = mask_value self.mask_value = mask_value
def call(self, input): def call(self, input):
nan_idxs = tf.is_nan(input) nan_idxs = tf.math.is_nan(input)
replace = tf.ones_like(input) * self.mask_value replace = tf.ones_like(input) * self.mask_value
return tf.where(nan_idxs, replace, input) return tf.where(nan_idxs, replace, input)
...@@ -92,11 +92,7 @@ class ExtendDNNBuilder(adanet.subnetwork.Builder): ...@@ -92,11 +92,7 @@ class ExtendDNNBuilder(adanet.subnetwork.Builder):
def _measure_complexity(self): def _measure_complexity(self):
"""Approximates Rademacher complexity as square-root of the depth.""" """Approximates Rademacher complexity as square-root of the depth."""
# depth_cmpl = np.sqrt(float(self._num_layers)) return tf.sqrt(tf.cast(tf.math.reduce_sum(self._layer_sizes), dtype=tf.float32))
# max_width_cmpl = np.sqrt(float(max(self._layer_sizes)))
total_blocks_cmpl = np.sqrt(float(sum(self._layer_sizes)))
# self.__log.debug("\n\n***** COMPLEXITY\ndepth_cmpl: %s\max_width_cmpl %s\total_blocks_cmpl %s\n\n", depth_cmpl, max_width_cmpl, total_blocks_cmpl)
return total_blocks_cmpl
def build_subnetwork_train_op(self, subnetwork, loss, var_list, labels, def build_subnetwork_train_op(self, subnetwork, loss, var_list, labels,
iteration_step, summary, previous_ensemble): iteration_step, summary, previous_ensemble):
...@@ -192,9 +188,9 @@ class ExtendDNNGenerator(adanet.subnetwork.Generator): ...@@ -192,9 +188,9 @@ class ExtendDNNGenerator(adanet.subnetwork.Generator):
last_subnetwork = previous_ensemble.weighted_subnetworks[ last_subnetwork = previous_ensemble.weighted_subnetworks[
-1].subnetwork -1].subnetwork
shared_tensors = last_subnetwork.shared shared_tensors = last_subnetwork.shared
num_layers = tf.contrib.util.constant_value( num_layers = tf.get_static_value(
shared_tensors["num_layers"]) shared_tensors["num_layers"])
layer_sizes = list(tf.contrib.util.constant_value( layer_sizes = list(tf.get_static_value(
shared_tensors["layer_sizes"])) shared_tensors["layer_sizes"]))
# at each iteration try exdending any of the existing layers (width) # at each iteration try exdending any of the existing layers (width)
candidates = list() candidates = list()
......
import functools import functools
import adanet import adanet
import tensorflow as tf import tensorflow.compat.v1 as tf
from tensorflow.keras import layers
from tensorflow import layers from tensorflow.keras.layers import Dense, Dropout
from tensorflow.layers import Dense, Dropout
from chemicalchecker.util import logged from chemicalchecker.util import logged
...@@ -15,7 +13,7 @@ class NanMaskingLayer(layers.Layer): ...@@ -15,7 +13,7 @@ class NanMaskingLayer(layers.Layer):
self.mask_value = mask_value self.mask_value = mask_value
def call(self, input): def call(self, input):
nan_idxs = tf.is_nan(input) nan_idxs = tf.math.is_nan(input)
replace = tf.ones_like(input) * self.mask_value replace = tf.ones_like(input) * self.mask_value
return tf.where(nan_idxs, replace, input) return tf.where(nan_idxs, replace, input)
...@@ -91,7 +89,7 @@ class StackDNNBuilder(adanet.subnetwork.Builder): ...@@ -91,7 +89,7 @@ class StackDNNBuilder(adanet.subnetwork.Builder):
def _measure_complexity(self): def _measure_complexity(self):
"""Approximates Rademacher complexity as square-root of the depth.""" """Approximates Rademacher complexity as square-root of the depth."""
return tf.sqrt(tf.to_float(self._num_layers)) return tf.sqrt(tf.cast(self._num_layers, dtype=tf.float32))
def build_subnetwork_train_op(self, subnetwork, loss, var_list, labels, def build_subnetwork_train_op(self, subnetwork, loss, var_list, labels,
iteration_step, summary, previous_ensemble): iteration_step, summary, previous_ensemble):
...@@ -175,7 +173,7 @@ class StackDNNGenerator(adanet.subnetwork.Generator): ...@@ -175,7 +173,7 @@ class StackDNNGenerator(adanet.subnetwork.Generator):
num_layers = 0 num_layers = 0
seed = self._seed seed = self._seed
if previous_ensemble: if previous_ensemble:
num_layers = tf.contrib.util.constant_value( num_layers = tf.get_static_value(
previous_ensemble.weighted_subnetworks[ previous_ensemble.weighted_subnetworks[
-1].subnetwork.persisted_tensors["num_layers"]) -1].subnetwork.persisted_tensors["num_layers"])
if seed is not None: if seed is not None:
......
...@@ -27,6 +27,7 @@ class TestAdanet(unittest.TestCase): ...@@ -27,6 +27,7 @@ class TestAdanet(unittest.TestCase):
shutil.rmtree(self.adanet_path, ignore_errors=True) shutil.rmtree(self.adanet_path, ignore_errors=True)
def tearDown(self): def tearDown(self):
return
if os.path.exists(self.adanet_path): if os.path.exists(self.adanet_path):
shutil.rmtree(self.adanet_path, ignore_errors=True) shutil.rmtree(self.adanet_path, ignore_errors=True)
...@@ -54,7 +55,7 @@ class TestAdanet(unittest.TestCase): ...@@ -54,7 +55,7 @@ class TestAdanet(unittest.TestCase):
self.assertAlmostEqual(res['accuracy'], 0.981, 2) self.assertAlmostEqual(res['accuracy'], 0.981, 2)
self.assertAlmostEqual(res['auc'], 0.994, 3) self.assertAlmostEqual(res['auc'], 0.994, 3)
self.assertAlmostEqual(res['precision'], 0.981, 2) self.assertAlmostEqual(res['precision'], 0.981, 2)
self.assertAlmostEqual(res['recall'], 0.9822178, 2) self.assertAlmostEqual(res['recall'], 0.98761237, 2)
# check persistency and predict # check persistency and predict
predict_fn = AdaNet.predict_fn(ada.save_dir) predict_fn = AdaNet.predict_fn(ada.save_dir)
y_pred, y_true = AdaNet.predict_online(file_path, 'test', predict_fn) y_pred, y_true = AdaNet.predict_online(file_path, 'test', predict_fn)
...@@ -88,7 +89,7 @@ class TestAdanet(unittest.TestCase): ...@@ -88,7 +89,7 @@ class TestAdanet(unittest.TestCase):
# check results # check results
_, (res, _) = ada.train_and_evaluate() _, (res, _) = ada.train_and_evaluate()
self.assertAlmostEqual(res['accuracy'], 0.9668, 2) self.assertAlmostEqual(res['accuracy'], 0.9668, 2)
self.assertAlmostEqual(res['loss'], 0.12861905, 2) self.assertAlmostEqual(res['loss'], 0.13678956, 2)
# check persistency # check persistency
predict_fn = AdaNet.predict_fn(ada.save_dir) predict_fn = AdaNet.predict_fn(ada.save_dir)
y_pred, y_true = AdaNet.predict_online(file_path, 'test', predict_fn) y_pred, y_true = AdaNet.predict_online(file_path, 'test', predict_fn)
...@@ -120,7 +121,7 @@ class TestAdanet(unittest.TestCase): ...@@ -120,7 +121,7 @@ class TestAdanet(unittest.TestCase):
self.assertEqual(ada.model_dir, self.adanet_path) self.assertEqual(ada.model_dir, self.adanet_path)
# check results # check results
_, (res, _) = ada.train_and_evaluate() _, (res, _) = ada.train_and_evaluate()
self.assertAlmostEqual(res['loss'], 7.290054, 2) self.assertAlmostEqual(res['loss'], 3.0210078, 2)
# check persistency and predict # check persistency and predict
predict_fn = AdaNet.predict_fn(ada.save_dir) predict_fn = AdaNet.predict_fn(ada.save_dir)
y_pred, y_true = AdaNet.predict_online(file_path, 'test', predict_fn) y_pred, y_true = AdaNet.predict_online(file_path, 'test', predict_fn)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment