dnn_stack_generator.py 7 KB
Newer Older
1
2
import functools
import adanet
Martino Bertoni's avatar
Martino Bertoni committed
3
4
5
import tensorflow.compat.v1 as tf
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense, Dropout
6
from chemicalchecker.util import logged
7

8

9
class NanMaskingLayer(layers.Layer):
10
11
12
13
14
15

    def __init__(self, mask_value=0.0):
        super(NanMaskingLayer, self).__init__()
        self.mask_value = mask_value

    def call(self, input):
Martino Bertoni's avatar
Martino Bertoni committed
16
        nan_idxs = tf.math.is_nan(input)
17
18
19
20
21
        replace = tf.ones_like(input) * self.mask_value
        return tf.where(nan_idxs, replace, input)


@logged
22
23
24
25
class StackDNNBuilder(adanet.subnetwork.Builder):
    """Builds a DNN subnetwork for AdaNet."""

    def __init__(self, optimizer, layer_size, num_layers,
26
                 learn_mixture_weights, dropout, seed, activation,
27
                 input_shape, nan_mask_value=0.0):
28
29
30
31
32
33
34
35
36
37
38
        """Initializes a `_DNNBuilder`.

        Args:
          optimizer: An `Optimizer` instance for training both the subnetwork
            and the mixture weights.
          layer_size: The number of nodes to output at each hidden layer.
          num_layers: The number of hidden layers.
          learn_mixture_weights: Whether to solve a learning problem to find
            best mixture weights, or use their default value according to the
            mixture weight type. When `False`, the subnetworks will return a
            no_op for the mixture weight train op.
Martino Bertoni's avatar
Martino Bertoni committed
39
40
41
          dropout: The dropout rate, between 0 and 1. E.g. "rate=0.1" would
            drop out 10% of input units.
          activation: The activation function to be used.
42
43
44
45
46
47
48
49
50
51
52
          seed: A random seed.

        Returns:
          An instance of `StackDNNBuilder`.
        """

        self._optimizer = optimizer
        self._layer_size = layer_size
        self._num_layers = num_layers
        self._learn_mixture_weights = learn_mixture_weights
        self._seed = seed
Martino Bertoni's avatar
Martino Bertoni committed
53
        self._dropout = dropout
54
        self._input_shape = input_shape
55
        self._activation = activation
56
        self._nan_mask_value = nan_mask_value
57
58
59
60
61
62
63

    def build_subnetwork(self,
                         features,
                         logits_dimension,
                         training,
                         iteration_step,
                         summary,
64
                         previous_ensemble=None):
65
66
        """See `adanet.subnetwork.Builder`."""

67
68
69
        input_layer = tf.cast(features['x'], tf.float32)
        # forcing to input shape as dataset uses tf.py_func (loosing shape)
        input_layer = tf.reshape(features['x'], [-1, self._input_shape])
70
        last_layer = input_layer
71
72
        if self._nan_mask_value is not None:
            last_layer = NanMaskingLayer(self._nan_mask_value)(last_layer)
73
        for _ in range(self._num_layers):
74
75
76
77
            last_layer = Dense(
                self._layer_size,
                activation=self._activation)(last_layer)
            last_layer = Dropout(
Martino Bertoni's avatar
Martino Bertoni committed
78
                rate=self._dropout,
79
                seed=self._seed)(last_layer, training=training)
Martino Bertoni's avatar
Martino Bertoni committed
80

81
        logits = Dense(units=logits_dimension)(last_layer)
82
83

        persisted_tensors = {"num_layers": tf.constant(self._num_layers)}
84
85
86
87
88
89
90
91
        return adanet.Subnetwork(
            last_layer=last_layer,
            logits=logits,
            complexity=self._measure_complexity(),
            persisted_tensors=persisted_tensors)

    def _measure_complexity(self):
        """Approximates Rademacher complexity as square-root of the depth."""
Martino Bertoni's avatar
Martino Bertoni committed
92
        return tf.sqrt(tf.cast(self._num_layers, dtype=tf.float32))
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128

    def build_subnetwork_train_op(self, subnetwork, loss, var_list, labels,
                                  iteration_step, summary, previous_ensemble):
        """See `adanet.subnetwork.Builder`."""
        return self._optimizer.minimize(loss=loss, var_list=var_list)

    def build_mixture_weights_train_op(self, loss, var_list, logits, labels,
                                       iteration_step, summary):
        """See `adanet.subnetwork.Builder`."""

        if not self._learn_mixture_weights:
            return tf.no_op()
        return self._optimizer.minimize(loss=loss, var_list=var_list)

    @property
    def name(self):
        """See `adanet.subnetwork.Builder`."""

        if self._num_layers == 0:
            # A DNN with no hidden layers is a linear model.
            return "linear"
        return "{}_layer_dnn".format(self._num_layers)


class StackDNNGenerator(adanet.subnetwork.Generator):
    """Generates a two DNN subnetworks at each iteration.

    The first DNN has an identical shape to the most recently added subnetwork
    in `previous_ensemble`. The second has the same shape plus one more dense
    layer on top. This is similar to the adaptive network presented in Fig2
    [Cortes et al. ICML 2017](https://arxiv.org/abs/1607.01097), without the
    connections to hidden layers of networks from previous iterations.
    """

    def __init__(self,
                 optimizer,
129
                 input_shape,
130
                 nan_mask_value=0.0,
131
132
                 layer_size=32,
                 learn_mixture_weights=False,
Martino Bertoni's avatar
Martino Bertoni committed
133
                 dropout=0.0,
134
                 activation=tf.nn.relu,
135
136
                 seed=None,
                 **kwargs):
137
138
139
140
141
142
143
144
145
146
147
148
        """Initializes a DNN `Generator`.

        Args:
          optimizer: An `Optimizer` instance for training both the subnetwork
            and the mixture weights.
          layer_size: Number of nodes in each hidden layer of the subnetwork
            candidates. Note that this parameter is ignored in a DNN with no
            hidden layers.
          learn_mixture_weights: Whether to solve a learning problem to find
            best mixture weights, or use their default value according to the
            mixture weight type. When `False`, the subnetworks will return a
            no_op for the mixture weight train op.
Martino Bertoni's avatar
Martino Bertoni committed
149
150
151
          dropout: The dropout rate, between 0 and 1. E.g. "rate=0.1" would
            drop out 10% of input units.
          activation: The activation function to be used.
152
153
154
155
156
157
158
159
160
161
162
          seed: A random seed.

        Returns:
          An instance of `Generator`.
        """

        self._seed = seed
        self._dnn_builder_fn = functools.partial(
            StackDNNBuilder,
            optimizer=optimizer,
            layer_size=layer_size,
Martino Bertoni's avatar
Martino Bertoni committed
163
            dropout=dropout,
164
            input_shape=input_shape,
165
            nan_mask_value=nan_mask_value,
166
167
168
169
170
171
            activation=activation,
            learn_mixture_weights=learn_mixture_weights)

    def generate_candidates(self, previous_ensemble, iteration_number,
                            previous_ensemble_reports, all_reports):
        """See `adanet.subnetwork.Generator`."""
172

173
        num_layers = 0
174
        seed = self._seed
175
        if previous_ensemble:
Martino Bertoni's avatar
Martino Bertoni committed
176
            num_layers = tf.get_static_value(
177
178
179
180
                previous_ensemble.weighted_subnetworks[
                    -1].subnetwork.persisted_tensors["num_layers"])
        if seed is not None:
            seed += iteration_number
181
        return [
182
183
            self._dnn_builder_fn(num_layers=num_layers, seed=seed),
            self._dnn_builder_fn(num_layers=num_layers + 1, seed=seed),
184
        ]