From 28795f14ba734d0d97cf292e601d7ab00039f401 Mon Sep 17 00:00:00 2001 From: andhus Date: Sun, 24 Sep 2017 23:26:52 +0200 Subject: [PATCH 01/13] Added support for passing external constants to RNN, which will pass them on to the cell --- keras/layers/recurrent.py | 164 +++++++++++++++++++++------ tests/keras/layers/recurrent_test.py | 72 ++++++++++++ 2 files changed, 203 insertions(+), 33 deletions(-) diff --git a/keras/layers/recurrent.py b/keras/layers/recurrent.py index 4fd4edc50f80..bfcaa0c27114 100644 --- a/keras/layers/recurrent.py +++ b/keras/layers/recurrent.py @@ -185,7 +185,9 @@ class RNN(Layer): # Arguments cell: A RNN cell instance. A RNN cell is a class that has: - a `call(input_at_t, states_at_t)` method, returning - `(output_at_t, states_at_t_plus_1)`. + `(output_at_t, states_at_t_plus_1)`. The call method of the + cell can also take the optional argument `constants`, see + section "Note on passing external constants" below. - a `state_size` attribute. This can be a single integer (single state) in which case it is the size of the recurrent state @@ -276,6 +278,14 @@ class RNN(Layer): `states` should be a numpy array or list of numpy arrays representing the initial state of the RNN layer. + # Note on passing external constants to RNNs + You can pass "external" constants to the cell using the `constants` + keyword argument of RNN.__call__ (as well as RNN.call) method. This + requires that the `cell.call` method accepts the same keyword argument + `constants`. Such constants can be used to condition the cell + transformation on additional static inputs (not changing over time) + (a.k.a. an attention mechanism). + # Examples ```python @@ -354,6 +364,8 @@ def __init__(self, cell, self.state_spec = InputSpec(shape=(None, self.cell.state_size)) self._states = None + self.external_constants_spec = None + @property def states(self): if self._states is None: @@ -399,6 +411,14 @@ def compute_mask(self, inputs, mask): return output_mask def build(self, input_shape): + # Note input_shape will be list of shapes of initial states and + # constants if these are passed in __call__. + if self.external_constants_spec is not None: + # input_shape must be list + constants_shape = input_shape[-len(self.external_constants_spec):] + else: + constants_shape = None + if isinstance(input_shape, list): input_shape = input_shape[0] @@ -411,7 +431,10 @@ def build(self, input_shape): if isinstance(self.cell, Layer): step_input_shape = (input_shape[0],) + input_shape[2:] - self.cell.build(step_input_shape) + if constants_shape is not None: + self.cell.build([step_input_shape] + constants_shape) + else: + self.cell.build(step_input_shape) def get_initial_state(self, inputs): # build an all-zero tensor of shape (samples, output_dim) @@ -424,43 +447,58 @@ def get_initial_state(self, inputs): else: return [K.tile(initial_state, [1, self.cell.state_size])] - def __call__(self, inputs, initial_state=None, **kwargs): - # If there are multiple inputs, then - # they should be the main input and `initial_state` - # e.g. when loading model from file - if isinstance(inputs, (list, tuple)) and len(inputs) > 1 and initial_state is None: - initial_state = inputs[1:] - inputs = inputs[0] - - # If `initial_state` is specified, - # and if it a Keras tensor, - # then add it to the inputs and temporarily - # modify the input spec to include the state. - if initial_state is None: + def __call__(self, inputs, initial_state=None, constants=None, **kwargs): + # If there are multiple inputs, then they should be the main input, + # `initial_state` and (optionally) `constants` e.g. when loading model + # from file # TODO ask for clarification + inputs, initial_state, constants = self._normalize_args( + inputs, initial_state, constants) + + # we need to know length of constants in build + if constants: + self.external_constants_spec = [ + InputSpec(shape=K.int_shape(constant)) + for constant in constants + ] + + if initial_state is None and constants is None: return super(RNN, self).__call__(inputs, **kwargs) - if not isinstance(initial_state, (list, tuple)): - initial_state = [initial_state] - - is_keras_tensor = hasattr(initial_state[0], '_keras_history') - for tensor in initial_state: + # If any of `initial_state` or `constants` are specified and are Keras + # tensors, then add them to the inputs and temporarily modify the + # input_spec to include them. + + check_list = [] + if initial_state: + check_list += initial_state + if constants: + check_list += constants + # at this point check_list cannot be empty + is_keras_tensor = hasattr(check_list[0], '_keras_history') + for tensor in check_list: if hasattr(tensor, '_keras_history') != is_keras_tensor: - raise ValueError('The initial state of an RNN layer cannot be' - ' specified with a mix of Keras tensors and' - ' non-Keras tensors') + raise ValueError('The initial state and constants of an RNN' + ' layer cannot be specified with a mix of' + ' Keras tensors and non-Keras tensors') if is_keras_tensor: - # Compute the full input spec, including state + # Compute the full input spec, including state and constants input_spec = self.input_spec state_spec = self.state_spec if not isinstance(input_spec, list): input_spec = [input_spec] if not isinstance(state_spec, list): state_spec = [state_spec] - self.input_spec = input_spec + state_spec - - # Compute the full inputs, including state - inputs = [inputs] + list(initial_state) + self.input_spec = input_spec + inputs = [inputs] + if initial_state: + self.input_spec += state_spec + inputs += initial_state + kwargs['initial_state'] = initial_state + if constants: + self.input_spec += self.external_constants_spec + inputs += constants + kwargs['constants'] = constants # Perform the call output = super(RNN, self).__call__(inputs, **kwargs) @@ -470,16 +508,22 @@ def __call__(self, inputs, initial_state=None, **kwargs): return output else: kwargs['initial_state'] = initial_state + if constants is not None: + kwargs['constants'] = constants return super(RNN, self).__call__(inputs, **kwargs) - def call(self, inputs, mask=None, training=None, initial_state=None): + def call(self, + inputs, + mask=None, + training=None, + initial_state=None, + constants=None): # input shape: `(samples, time (padded with zeros), input_dim)` # note that the .build() method of subclasses MUST define # self.input_spec and self.state_spec with complete input shapes. if isinstance(inputs, list): - initial_state = inputs[1:] inputs = inputs[0] - elif initial_state is not None: + if initial_state is not None: pass elif self.stateful: initial_state = self.states @@ -508,9 +552,17 @@ def call(self, inputs, mask=None, training=None, initial_state=None): '- If using the functional API, specify ' 'the time dimension by passing a `shape` ' 'or `batch_shape` argument to your Input layer.') - + cell_kwargs = {} if has_arg(self.cell.call, 'training'): - step = functools.partial(self.cell.call, training=training) + cell_kwargs['training'] = training + + if constants is not None: + if not has_arg(self.cell.call, 'constants'): + raise TypeError('cell does not take keyword argument constants') + cell_kwargs['constants'] = constants + + if cell_kwargs: + step = functools.partial(self.cell.call, **cell_kwargs) else: step = self.cell.call last_output, outputs, states = K.rnn(step, @@ -544,6 +596,52 @@ def call(self, inputs, mask=None, training=None, initial_state=None): else: return output + def _normalize_args(self, inputs, initial_state=None, constants=None): + """The inputs `initial_state` and `constants` can be passed to + RNN.__call__ either by separate arguments or as part of `inputs`. In + this case `inputs` is a list of tensors of which the first one is the + actual (sequence) input followed by initial states, followed by + constants. + + This method separates and noramlizes the different groups of inputs. + + # Arguments + inputs: tensor of list/tuple of tensors + initial_state: tensor or list of tensors or None + constants: tensor or list of tensors or None + + # Returns + inputs: tensor + initial_state: list of tensors or None + constants: list of tensors or None + """ + if isinstance(inputs, (list, tuple)): + remaining_inputs = inputs[1:] + inputs = inputs[0] + if remaining_inputs and initial_state is None: + if isinstance(self.state_spec, list): + n_states = len(self.state_spec) + else: + n_states = 1 + initial_state = remaining_inputs[:n_states] + remaining_inputs = remaining_inputs[n_states:] + if remaining_inputs and constants is None: + constants = remaining_inputs + if len(remaining_inputs) > 0: + raise ValueError('too many inputs were passed') + + def to_list_or_none(x): # TODO break out? + if x is None or isinstance(x, list): + return x + if isinstance(x, tuple): + return list(x) + return [x] + + initial_state = to_list_or_none(initial_state) + constants = to_list_or_none(constants) + + return inputs, initial_state, constants + def reset_states(self, states=None): if not self.stateful: raise AttributeError('Layer must be stateful.') diff --git a/tests/keras/layers/recurrent_test.py b/tests/keras/layers/recurrent_test.py index fc328caf57d0..24122e1dce99 100644 --- a/tests/keras/layers/recurrent_test.py +++ b/tests/keras/layers/recurrent_test.py @@ -564,5 +564,77 @@ def test_batch_size_equal_one(layer_class): model.train_on_batch(x, y) +def test_rnn_cell_with_constants_layer(): + + class RNNCellWithConstants(keras.layers.Layer): + + def __init__(self, units, **kwargs): + self.units = units + self.state_size = units + super(RNNCellWithConstants, self).__init__(**kwargs) + + def build(self, input_shape): + if not isinstance(input_shape, list): + raise TypeError('expects constants shape') + [input_shape, constant_shape] = input_shape + # will (and should) raise if more than one constant passed + + self.input_kernel = self.add_weight( + shape=(input_shape[-1], self.units), + initializer='uniform', + name='kernel') + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units), + initializer='uniform', + name='recurrent_kernel') + self.constant_kernel = self.add_weight( + shape=(constant_shape[-1], self.units), + initializer='uniform', + name='constant_kernel') + self.built = True + + def call(self, inputs, states, constants): + [prev_output] = states + [constant] = constants + h_input = keras.backend.dot(inputs, self.input_kernel) + h_state = keras.backend.dot(prev_output, self.recurrent_kernel) + h_const = keras.backend.dot(constant, self.constant_kernel) + output = h_input + h_state + h_const + return output, [output] + + def get_config(self): + config = {'units': self.units} + base_config = super(RNNCellWithConstants, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + # Test basic case. + x = keras.Input((None, 5)) + c = keras.Input((3,)) + cell = RNNCellWithConstants(32) + layer = recurrent.RNN(cell) + y = layer(x, constants=c) + model = keras.models.Model([x, c], y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch( + [np.zeros((6, 5, 5)), np.zeros((6, 3))], + np.zeros((6, 32)) + ) + + # Test basic case serialization. + x_np = np.random.random((6, 5, 5)) + c_np = np.random.random((6, 3)) + y_np = model.predict([x_np, c_np]) + weights = model.get_weights() + config = layer.get_config() + with keras.utils.CustomObjectScope( + {'RNNCellWithConstants': RNNCellWithConstants}): + layer = recurrent.RNN.from_config(config) + y = layer(x, constants=c) + model = keras.models.Model([x, c], y) + model.set_weights(weights) + y_np_2 = model.predict([x_np, c_np]) + assert_allclose(y_np, y_np_2, atol=1e-4) + + if __name__ == '__main__': pytest.main([__file__]) From 03b8fdad429e5451a05064a839dec179450d1dec Mon Sep 17 00:00:00 2001 From: andhus Date: Mon, 25 Sep 2017 01:49:10 +0200 Subject: [PATCH 02/13] Added class for allowing functional composition of RNN Cells, supporting constants --- keras/layers/recurrent.py | 132 ++- tests/keras/layers/recurrent_test.py | 1262 ++++++++++++++------------ 2 files changed, 781 insertions(+), 613 deletions(-) diff --git a/keras/layers/recurrent.py b/keras/layers/recurrent.py index bfcaa0c27114..54e59b7c2ead 100644 --- a/keras/layers/recurrent.py +++ b/keras/layers/recurrent.py @@ -4,6 +4,8 @@ import functools import warnings +from keras.engine import Model +from keras.layers.wrappers import Wrapper from .. import backend as K from .. import activations from .. import initializers @@ -179,6 +181,117 @@ def get_losses_for(self, inputs=None): return losses +class FunctionalRNNCell(Wrapper): + """Wrapper for allowing composition of RNN Cells using functional API. + + # Arguments: + inputs: input tensor at a single time step + outputs: output tensor at a single timestep + input_states: state tensor(s) from previous time step + output_states: state tensor(s) after cell transformation + constants: tensor(s) or None, represents inputs that should be static + (the same) for each time step. + + # Examples + + ```python + # Use functional API to define RNN Cell transformation (in this case + # simple vanilla RNN) for a single time step: + + units = 32 + input_size = 5 + x = Input((input_size,)) + h_tm1 = Input((units,)) + h_ = add([Dense(units)(x), Dense(units, use_bias=False)(h_tm1)]) + h = Activation('tanh')(h_) + + # Create the cell: + + cell = FunctionalRNNCell( + inputs=x, outputs=h, input_states=h_tm1, output_states=h) + + x_sequence = Input((None, input_size)) + rnn = RNN(cell) + y = rnn(x_sequence) + + # We can also define cells that make use of "external" constants, to + # implement attention mechanisms: + + constant_shape = (10,) + c = Input(constant_shape) + density = Dense(constant_shape[0], activation='softmax')( + concatenate([x, h_tm1])) + attention = multiply([density, c]) + h2_ = add([h_, Dense(units)(attention)]) + h2 = Activation('tanh')(h2_) + + attention_cell = FunctionalRNNCell( + inputs=x, outputs=h2, input_states=h_tm1, output_states=h2, + constants=c) + + attention_rnn = RNN(attention_cell) + y2 = attention_rnn(x_sequence, constants=c) + + # Remember to pass the constant to the RNN layer (which will pass it on to + # the cell). Also note that shape of c is same as in cell (no time + # dimension added) + + attention_model = Model([x_sequence, c], y2) + ``` + """ + def __init__( + self, + inputs, + outputs, + input_states, + output_states, + constants=None, + **kwargs + ): + input_states = _to_list_or_none(input_states) + output_states = _to_list_or_none(output_states) + constants = _to_list_or_none(constants) + model = Model( + inputs=self._get_model_inputs(inputs, input_states, constants), + outputs=[outputs] + output_states + ) + super(FunctionalRNNCell, self).__init__(layer=model, **kwargs) + + in_states_shape = [K.int_shape(state) for state in input_states] + out_states_shape = [K.int_shape(state) for state in output_states] + if not in_states_shape == out_states_shape: + raise ValueError( + 'shape of input_states: {} are not same as shape of ' + 'output_states: {}'.format(in_states_shape, out_states_shape)) + self._state_size = [state_shape[-1] for state_shape in in_states_shape] + + @property + def state_size(self): + return self._state_size + + def call(self, inputs, states, constants=None): + """Defines the cell transformation for a single time step. + + # Arguments + inputs: Tensor representing input at current time step. + states: Tensor or list/tuple of tensors representing states from + previous time step. + constants: Tensor or list of tensors or None representing inputs + that should be the same at each time step. + """ + outputs = self.layer(self._get_model_inputs(inputs, states, constants)) + output, states = outputs[0], outputs[1:] + + return output, states + + def _get_model_inputs(self, inputs, input_states, constants): + inputs = [inputs] + list(input_states) + if constants is not None: + inputs += constants + + return inputs + + class RNN(Layer): """Base class for recurrent layers. @@ -630,15 +743,8 @@ def _normalize_args(self, inputs, initial_state=None, constants=None): if len(remaining_inputs) > 0: raise ValueError('too many inputs were passed') - def to_list_or_none(x): # TODO break out? - if x is None or isinstance(x, list): - return x - if isinstance(x, tuple): - return list(x) - return [x] - - initial_state = to_list_or_none(initial_state) - constants = to_list_or_none(constants) + initial_state = _to_list_or_none(initial_state) + constants = _to_list_or_none(constants) return inputs, initial_state, constants @@ -2001,3 +2107,11 @@ def from_config(cls, config): if 'implementation' in config and config['implementation'] == 0: config['implementation'] = 1 return cls(**config) + + +def _to_list_or_none(x): # TODO move? Very similar to topology._to_list + if x is None or isinstance(x, list): + return x + if isinstance(x, tuple): + return list(x) + return [x] diff --git a/tests/keras/layers/recurrent_test.py b/tests/keras/layers/recurrent_test.py index 24122e1dce99..47992eca5af9 100644 --- a/tests/keras/layers/recurrent_test.py +++ b/tests/keras/layers/recurrent_test.py @@ -16,625 +16,679 @@ num_samples, timesteps, embedding_dim, units = 2, 5, 4, 3 embedding_num = 12 - - -@keras_test -def rnn_test(f): - """ - All the recurrent layers share the same interface, - so we can run through them with a single function. - """ - f = keras_test(f) - return pytest.mark.parametrize('layer_class', [ - recurrent.SimpleRNN, - recurrent.GRU, - recurrent.LSTM - ])(f) - - -@rnn_test -def test_return_sequences(layer_class): - layer_test(layer_class, - kwargs={'units': units, - 'return_sequences': True}, - input_shape=(num_samples, timesteps, embedding_dim)) - - -@rnn_test -def test_dynamic_behavior(layer_class): - layer = layer_class(units, input_shape=(None, embedding_dim)) - model = Sequential() - model.add(layer) - model.compile('sgd', 'mse') - x = np.random.random((num_samples, timesteps, embedding_dim)) - y = np.random.random((num_samples, units)) - model.train_on_batch(x, y) - - -@rnn_test -def test_stateful_invalid_use(layer_class): - layer = layer_class(units, - stateful=True, - batch_input_shape=(num_samples, - timesteps, - embedding_dim)) - model = Sequential() - model.add(layer) - model.compile('sgd', 'mse') - x = np.random.random((num_samples * 2, timesteps, embedding_dim)) - y = np.random.random((num_samples * 2, units)) - with pytest.raises(ValueError): - model.fit(x, y) - with pytest.raises(ValueError): - model.predict(x, batch_size=num_samples + 1) - - -@rnn_test -@pytest.mark.skipif((K.backend() == 'cntk'), - reason='Not yet supported.') -def test_dropout(layer_class): - for unroll in [True, False]: - layer_test(layer_class, - kwargs={'units': units, - 'dropout': 0.1, - 'recurrent_dropout': 0.1, - 'unroll': unroll}, - input_shape=(num_samples, timesteps, embedding_dim)) - - # Test that dropout is applied during training - x = K.ones((num_samples, timesteps, embedding_dim)) - layer = layer_class(units, dropout=0.5, recurrent_dropout=0.5, - input_shape=(timesteps, embedding_dim)) - y = layer(x) - assert y._uses_learning_phase - - y = layer(x, training=True) - assert not getattr(y, '_uses_learning_phase') - - # Test that dropout is not applied during testing - x = np.random.random((num_samples, timesteps, embedding_dim)) - layer = layer_class(units, dropout=0.5, recurrent_dropout=0.5, - unroll=unroll, - input_shape=(timesteps, embedding_dim)) - model = Sequential([layer]) - assert model.uses_learning_phase - y1 = model.predict(x) - y2 = model.predict(x) - assert_allclose(y1, y2) - - -@rnn_test -def test_statefulness(layer_class): - model = Sequential() - model.add(embeddings.Embedding(embedding_num, embedding_dim, - mask_zero=True, - input_length=timesteps, - batch_input_shape=(num_samples, timesteps))) - layer = layer_class(units, return_sequences=False, - stateful=True, - weights=None) - model.add(layer) - model.compile(optimizer='sgd', loss='mse') - out1 = model.predict(np.ones((num_samples, timesteps))) - assert(out1.shape == (num_samples, units)) - - # train once so that the states change - model.train_on_batch(np.ones((num_samples, timesteps)), - np.ones((num_samples, units))) - out2 = model.predict(np.ones((num_samples, timesteps))) - - # if the state is not reset, output should be different - assert(out1.max() != out2.max()) - - # check that output changes after states are reset - # (even though the model itself didn't change) - layer.reset_states() - out3 = model.predict(np.ones((num_samples, timesteps))) - assert(out2.max() != out3.max()) - - # check that container-level reset_states() works - model.reset_states() - out4 = model.predict(np.ones((num_samples, timesteps))) - assert_allclose(out3, out4, atol=1e-5) - - # check that the call to `predict` updated the states - out5 = model.predict(np.ones((num_samples, timesteps))) - assert(out4.max() != out5.max()) - - -@rnn_test -def test_masking_correctness(layer_class): - # Check masking: output with left padding and right padding - # should be the same. - model = Sequential() - model.add(embeddings.Embedding(embedding_num, embedding_dim, - mask_zero=True, - input_length=timesteps, - batch_input_shape=(num_samples, timesteps))) - layer = layer_class(units, return_sequences=False) - model.add(layer) - model.compile(optimizer='sgd', loss='mse') - - left_padded_input = np.ones((num_samples, timesteps)) - left_padded_input[0, :1] = 0 - left_padded_input[1, :2] = 0 - out6 = model.predict(left_padded_input) - - right_padded_input = np.ones((num_samples, timesteps)) - right_padded_input[0, -1:] = 0 - right_padded_input[1, -2:] = 0 - out7 = model.predict(right_padded_input) - - assert_allclose(out7, out6, atol=1e-5) - - -@rnn_test -def test_implementation_mode(layer_class): - for mode in [1, 2]: - # Without dropout - layer_test(layer_class, - kwargs={'units': units, - 'implementation': mode}, - input_shape=(num_samples, timesteps, embedding_dim)) - # With dropout - layer_test(layer_class, - kwargs={'units': units, - 'implementation': mode, - 'dropout': 0.1, - 'recurrent_dropout': 0.1}, - input_shape=(num_samples, timesteps, embedding_dim)) - - -@rnn_test -def test_regularizer(layer_class): - layer = layer_class(units, return_sequences=False, weights=None, - input_shape=(timesteps, embedding_dim), - kernel_regularizer=regularizers.l1(0.01), - recurrent_regularizer=regularizers.l1(0.01), - bias_regularizer='l2') - layer.build((None, None, embedding_dim)) - assert len(layer.losses) == 3 - assert len(layer.cell.losses) == 3 - - layer = layer_class(units, return_sequences=False, weights=None, - input_shape=(timesteps, embedding_dim), - activity_regularizer='l2') - assert layer.activity_regularizer - x = K.variable(np.ones((num_samples, timesteps, embedding_dim))) - layer(x) - assert len(layer.cell.get_losses_for(x)) == 0 - assert len(layer.get_losses_for(x)) == 1 - - -@keras_test -def test_masking_layer(): - ''' This test based on a previously failing issue here: - https://github.com/fchollet/keras/issues/1567 - ''' - inputs = np.random.random((6, 3, 4)) - targets = np.abs(np.random.random((6, 3, 5))) - targets /= targets.sum(axis=-1, keepdims=True) - - model = Sequential() - model.add(Masking(input_shape=(3, 4))) - model.add(recurrent.SimpleRNN(units=5, return_sequences=True, unroll=False)) - model.compile(loss='categorical_crossentropy', optimizer='adam') - model.fit(inputs, targets, epochs=1, batch_size=100, verbose=1) - - model = Sequential() - model.add(Masking(input_shape=(3, 4))) - model.add(recurrent.SimpleRNN(units=5, return_sequences=True, unroll=True)) - model.compile(loss='categorical_crossentropy', optimizer='adam') - model.fit(inputs, targets, epochs=1, batch_size=100, verbose=1) - - -@rnn_test -def test_from_config(layer_class): - stateful_flags = (False, True) - for stateful in stateful_flags: - l1 = layer_class(units=1, stateful=stateful) - l2 = layer_class.from_config(l1.get_config()) - assert l1.get_config() == l2.get_config() - - -@rnn_test -def test_specify_initial_state_keras_tensor(layer_class): - num_states = 2 if layer_class is recurrent.LSTM else 1 - - # Test with Keras tensor - inputs = Input((timesteps, embedding_dim)) - initial_state = [Input((units,)) for _ in range(num_states)] - layer = layer_class(units) - if len(initial_state) == 1: - output = layer(inputs, initial_state=initial_state[0]) - else: - output = layer(inputs, initial_state=initial_state) - assert initial_state[0] in layer.inbound_nodes[0].input_tensors - - model = Model([inputs] + initial_state, output) - model.compile(loss='categorical_crossentropy', optimizer='adam') - - inputs = np.random.random((num_samples, timesteps, embedding_dim)) - initial_state = [np.random.random((num_samples, units)) - for _ in range(num_states)] - targets = np.random.random((num_samples, units)) - model.fit([inputs] + initial_state, targets) - - -@rnn_test -def test_specify_initial_state_non_keras_tensor(layer_class): - num_states = 2 if layer_class is recurrent.LSTM else 1 - - # Test with non-Keras tensor - inputs = Input((timesteps, embedding_dim)) - initial_state = [K.random_normal_variable((num_samples, units), 0, 1) - for _ in range(num_states)] - layer = layer_class(units) - output = layer(inputs, initial_state=initial_state) - - model = Model(inputs, output) - model.compile(loss='categorical_crossentropy', optimizer='adam') - - inputs = np.random.random((num_samples, timesteps, embedding_dim)) - targets = np.random.random((num_samples, units)) - model.fit(inputs, targets) - - -@rnn_test -def test_reset_states_with_values(layer_class): - num_states = 2 if layer_class is recurrent.LSTM else 1 - - layer = layer_class(units, stateful=True) - layer.build((num_samples, timesteps, embedding_dim)) - layer.reset_states() - assert len(layer.states) == num_states - assert layer.states[0] is not None - np.testing.assert_allclose(K.eval(layer.states[0]), - np.zeros(K.int_shape(layer.states[0])), - atol=1e-4) - state_shapes = [K.int_shape(state) for state in layer.states] - values = [np.ones(shape) for shape in state_shapes] - if len(values) == 1: - values = values[0] - layer.reset_states(values) - np.testing.assert_allclose(K.eval(layer.states[0]), - np.ones(K.int_shape(layer.states[0])), - atol=1e-4) - - # Test fit with invalid data - with pytest.raises(ValueError): - layer.reset_states([1] * (len(layer.states) + 1)) - - -@rnn_test -def test_initial_states_as_other_inputs(layer_class): - num_states = 2 if layer_class is recurrent.LSTM else 1 - - # Test with Keras tensor - main_inputs = Input((timesteps, embedding_dim)) - initial_state = [Input((units,)) for _ in range(num_states)] - inputs = [main_inputs] + initial_state - - layer = layer_class(units) - output = layer(inputs) - assert initial_state[0] in layer.inbound_nodes[0].input_tensors - - model = Model(inputs, output) - model.compile(loss='categorical_crossentropy', optimizer='adam') - - main_inputs = np.random.random((num_samples, timesteps, embedding_dim)) - initial_state = [np.random.random((num_samples, units)) - for _ in range(num_states)] - targets = np.random.random((num_samples, units)) - model.train_on_batch([main_inputs] + initial_state, targets) - - -@rnn_test -def test_specify_state_with_masking(layer_class): - ''' This test based on a previously failing issue here: - https://github.com/fchollet/keras/issues/1567 - ''' - num_states = 2 if layer_class is recurrent.LSTM else 1 - - inputs = Input((timesteps, embedding_dim)) - _ = Masking()(inputs) - initial_state = [Input((units,)) for _ in range(num_states)] - output = layer_class(units)(inputs, initial_state=initial_state) - - model = Model([inputs] + initial_state, output) - model.compile(loss='categorical_crossentropy', optimizer='adam') - - inputs = np.random.random((num_samples, timesteps, embedding_dim)) - initial_state = [np.random.random((num_samples, units)) - for _ in range(num_states)] - targets = np.random.random((num_samples, units)) - model.fit([inputs] + initial_state, targets) - - -@rnn_test -def test_return_state(layer_class): - num_states = 2 if layer_class is recurrent.LSTM else 1 - - inputs = Input(batch_shape=(num_samples, timesteps, embedding_dim)) - layer = layer_class(units, return_state=True, stateful=True) - outputs = layer(inputs) - output, state = outputs[0], outputs[1:] - assert len(state) == num_states - model = Model(inputs, state[0]) - - inputs = np.random.random((num_samples, timesteps, embedding_dim)) - state = model.predict(inputs) - np.testing.assert_allclose(K.eval(layer.states[0]), state, atol=1e-4) - - -@rnn_test -def test_state_reuse(layer_class): - inputs = Input(batch_shape=(num_samples, timesteps, embedding_dim)) - layer = layer_class(units, return_state=True, return_sequences=True) - outputs = layer(inputs) - output, state = outputs[0], outputs[1:] - output = layer_class(units)(output, initial_state=state) - model = Model(inputs, output) - - inputs = np.random.random((num_samples, timesteps, embedding_dim)) - outputs = model.predict(inputs) - - -def test_minimal_rnn_cell_non_layer(): - - class MinimalRNNCell(object): - - def __init__(self, units, input_dim): - self.units = units - self.state_size = units - self.kernel = keras.backend.variable( - np.random.random((input_dim, units))) - - def call(self, inputs, states): - prev_output = states[0] - output = keras.backend.dot(inputs, self.kernel) + prev_output - return output, [output] - - # Basic test case. - cell = MinimalRNNCell(32, 5) - x = keras.Input((None, 5)) - layer = recurrent.RNN(cell) - y = layer(x) - model = keras.models.Model(x, y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - # Test stacking. - cells = [MinimalRNNCell(8, 5), - MinimalRNNCell(32, 8), - MinimalRNNCell(32, 32)] - layer = recurrent.RNN(cells) - y = layer(x) - model = keras.models.Model(x, y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - -def test_minimal_rnn_cell_non_layer_multiple_states(): - - class MinimalRNNCell(object): - - def __init__(self, units, input_dim): - self.units = units - self.state_size = (units, units) - self.kernel = keras.backend.variable( - np.random.random((input_dim, units))) - - def call(self, inputs, states): - prev_output_1 = states[0] - prev_output_2 = states[1] - output = keras.backend.dot(inputs, self.kernel) - output += prev_output_1 - output -= prev_output_2 - return output, [output * 2, output * 3] - - # Basic test case. - cell = MinimalRNNCell(32, 5) - x = keras.Input((None, 5)) - layer = recurrent.RNN(cell) - y = layer(x) - model = keras.models.Model(x, y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - # Test stacking. - cells = [MinimalRNNCell(8, 5), - MinimalRNNCell(16, 8), - MinimalRNNCell(32, 16)] - layer = recurrent.RNN(cells) - assert layer.cell.state_size == (32, 32, 16, 16, 8, 8) - y = layer(x) - model = keras.models.Model(x, y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - -def test_minimal_rnn_cell_layer(): - - class MinimalRNNCell(keras.layers.Layer): - - def __init__(self, units, **kwargs): - self.units = units - self.state_size = units - super(MinimalRNNCell, self).__init__(**kwargs) - - def build(self, input_shape): - self.kernel = self.add_weight(shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') - self.built = True - - def call(self, inputs, states): - prev_output = states[0] - h = keras.backend.dot(inputs, self.kernel) - output = h + keras.backend.dot(prev_output, self.recurrent_kernel) - return output, [output] - - def get_config(self): - config = {'units': self.units} - base_config = super(MinimalRNNCell, self).get_config() - return dict(list(base_config.items()) + list(config.items())) +# +# +# @keras_test +# def rnn_test(f): +# """ +# All the recurrent layers share the same interface, +# so we can run through them with a single function. +# """ +# f = keras_test(f) +# return pytest.mark.parametrize('layer_class', [ +# recurrent.SimpleRNN, +# recurrent.GRU, +# recurrent.LSTM +# ])(f) +# +# +# @rnn_test +# def test_return_sequences(layer_class): +# layer_test(layer_class, +# kwargs={'units': units, +# 'return_sequences': True}, +# input_shape=(num_samples, timesteps, embedding_dim)) +# +# +# @rnn_test +# def test_dynamic_behavior(layer_class): +# layer = layer_class(units, input_shape=(None, embedding_dim)) +# model = Sequential() +# model.add(layer) +# model.compile('sgd', 'mse') +# x = np.random.random((num_samples, timesteps, embedding_dim)) +# y = np.random.random((num_samples, units)) +# model.train_on_batch(x, y) +# +# +# @rnn_test +# def test_stateful_invalid_use(layer_class): +# layer = layer_class(units, +# stateful=True, +# batch_input_shape=(num_samples, +# timesteps, +# embedding_dim)) +# model = Sequential() +# model.add(layer) +# model.compile('sgd', 'mse') +# x = np.random.random((num_samples * 2, timesteps, embedding_dim)) +# y = np.random.random((num_samples * 2, units)) +# with pytest.raises(ValueError): +# model.fit(x, y) +# with pytest.raises(ValueError): +# model.predict(x, batch_size=num_samples + 1) +# +# +# @rnn_test +# @pytest.mark.skipif((K.backend() == 'cntk'), +# reason='Not yet supported.') +# def test_dropout(layer_class): +# for unroll in [True, False]: +# layer_test(layer_class, +# kwargs={'units': units, +# 'dropout': 0.1, +# 'recurrent_dropout': 0.1, +# 'unroll': unroll}, +# input_shape=(num_samples, timesteps, embedding_dim)) +# +# # Test that dropout is applied during training +# x = K.ones((num_samples, timesteps, embedding_dim)) +# layer = layer_class(units, dropout=0.5, recurrent_dropout=0.5, +# input_shape=(timesteps, embedding_dim)) +# y = layer(x) +# assert y._uses_learning_phase +# +# y = layer(x, training=True) +# assert not getattr(y, '_uses_learning_phase') +# +# # Test that dropout is not applied during testing +# x = np.random.random((num_samples, timesteps, embedding_dim)) +# layer = layer_class(units, dropout=0.5, recurrent_dropout=0.5, +# unroll=unroll, +# input_shape=(timesteps, embedding_dim)) +# model = Sequential([layer]) +# assert model.uses_learning_phase +# y1 = model.predict(x) +# y2 = model.predict(x) +# assert_allclose(y1, y2) +# +# +# @rnn_test +# def test_statefulness(layer_class): +# model = Sequential() +# model.add(embeddings.Embedding(embedding_num, embedding_dim, +# mask_zero=True, +# input_length=timesteps, +# batch_input_shape=(num_samples, timesteps))) +# layer = layer_class(units, return_sequences=False, +# stateful=True, +# weights=None) +# model.add(layer) +# model.compile(optimizer='sgd', loss='mse') +# out1 = model.predict(np.ones((num_samples, timesteps))) +# assert(out1.shape == (num_samples, units)) +# +# # train once so that the states change +# model.train_on_batch(np.ones((num_samples, timesteps)), +# np.ones((num_samples, units))) +# out2 = model.predict(np.ones((num_samples, timesteps))) +# +# # if the state is not reset, output should be different +# assert(out1.max() != out2.max()) +# +# # check that output changes after states are reset +# # (even though the model itself didn't change) +# layer.reset_states() +# out3 = model.predict(np.ones((num_samples, timesteps))) +# assert(out2.max() != out3.max()) +# +# # check that container-level reset_states() works +# model.reset_states() +# out4 = model.predict(np.ones((num_samples, timesteps))) +# assert_allclose(out3, out4, atol=1e-5) +# +# # check that the call to `predict` updated the states +# out5 = model.predict(np.ones((num_samples, timesteps))) +# assert(out4.max() != out5.max()) +# +# +# @rnn_test +# def test_masking_correctness(layer_class): +# # Check masking: output with left padding and right padding +# # should be the same. +# model = Sequential() +# model.add(embeddings.Embedding(embedding_num, embedding_dim, +# mask_zero=True, +# input_length=timesteps, +# batch_input_shape=(num_samples, timesteps))) +# layer = layer_class(units, return_sequences=False) +# model.add(layer) +# model.compile(optimizer='sgd', loss='mse') +# +# left_padded_input = np.ones((num_samples, timesteps)) +# left_padded_input[0, :1] = 0 +# left_padded_input[1, :2] = 0 +# out6 = model.predict(left_padded_input) +# +# right_padded_input = np.ones((num_samples, timesteps)) +# right_padded_input[0, -1:] = 0 +# right_padded_input[1, -2:] = 0 +# out7 = model.predict(right_padded_input) +# +# assert_allclose(out7, out6, atol=1e-5) +# +# +# @rnn_test +# def test_implementation_mode(layer_class): +# for mode in [1, 2]: +# # Without dropout +# layer_test(layer_class, +# kwargs={'units': units, +# 'implementation': mode}, +# input_shape=(num_samples, timesteps, embedding_dim)) +# # With dropout +# layer_test(layer_class, +# kwargs={'units': units, +# 'implementation': mode, +# 'dropout': 0.1, +# 'recurrent_dropout': 0.1}, +# input_shape=(num_samples, timesteps, embedding_dim)) +# +# +# @rnn_test +# def test_regularizer(layer_class): +# layer = layer_class(units, return_sequences=False, weights=None, +# input_shape=(timesteps, embedding_dim), +# kernel_regularizer=regularizers.l1(0.01), +# recurrent_regularizer=regularizers.l1(0.01), +# bias_regularizer='l2') +# layer.build((None, None, embedding_dim)) +# assert len(layer.losses) == 3 +# assert len(layer.cell.losses) == 3 +# +# layer = layer_class(units, return_sequences=False, weights=None, +# input_shape=(timesteps, embedding_dim), +# activity_regularizer='l2') +# assert layer.activity_regularizer +# x = K.variable(np.ones((num_samples, timesteps, embedding_dim))) +# layer(x) +# assert len(layer.cell.get_losses_for(x)) == 0 +# assert len(layer.get_losses_for(x)) == 1 +# +# +# @keras_test +# def test_masking_layer(): +# ''' This test based on a previously failing issue here: +# https://github.com/fchollet/keras/issues/1567 +# ''' +# inputs = np.random.random((6, 3, 4)) +# targets = np.abs(np.random.random((6, 3, 5))) +# targets /= targets.sum(axis=-1, keepdims=True) +# +# model = Sequential() +# model.add(Masking(input_shape=(3, 4))) +# model.add(recurrent.SimpleRNN(units=5, return_sequences=True, unroll=False)) +# model.compile(loss='categorical_crossentropy', optimizer='adam') +# model.fit(inputs, targets, epochs=1, batch_size=100, verbose=1) +# +# model = Sequential() +# model.add(Masking(input_shape=(3, 4))) +# model.add(recurrent.SimpleRNN(units=5, return_sequences=True, unroll=True)) +# model.compile(loss='categorical_crossentropy', optimizer='adam') +# model.fit(inputs, targets, epochs=1, batch_size=100, verbose=1) +# +# +# @rnn_test +# def test_from_config(layer_class): +# stateful_flags = (False, True) +# for stateful in stateful_flags: +# l1 = layer_class(units=1, stateful=stateful) +# l2 = layer_class.from_config(l1.get_config()) +# assert l1.get_config() == l2.get_config() +# +# +# @rnn_test +# def test_specify_initial_state_keras_tensor(layer_class): +# num_states = 2 if layer_class is recurrent.LSTM else 1 +# +# # Test with Keras tensor +# inputs = Input((timesteps, embedding_dim)) +# initial_state = [Input((units,)) for _ in range(num_states)] +# layer = layer_class(units) +# if len(initial_state) == 1: +# output = layer(inputs, initial_state=initial_state[0]) +# else: +# output = layer(inputs, initial_state=initial_state) +# assert initial_state[0] in layer.inbound_nodes[0].input_tensors +# +# model = Model([inputs] + initial_state, output) +# model.compile(loss='categorical_crossentropy', optimizer='adam') +# +# inputs = np.random.random((num_samples, timesteps, embedding_dim)) +# initial_state = [np.random.random((num_samples, units)) +# for _ in range(num_states)] +# targets = np.random.random((num_samples, units)) +# model.fit([inputs] + initial_state, targets) +# +# +# @rnn_test +# def test_specify_initial_state_non_keras_tensor(layer_class): +# num_states = 2 if layer_class is recurrent.LSTM else 1 +# +# # Test with non-Keras tensor +# inputs = Input((timesteps, embedding_dim)) +# initial_state = [K.random_normal_variable((num_samples, units), 0, 1) +# for _ in range(num_states)] +# layer = layer_class(units) +# output = layer(inputs, initial_state=initial_state) +# +# model = Model(inputs, output) +# model.compile(loss='categorical_crossentropy', optimizer='adam') +# +# inputs = np.random.random((num_samples, timesteps, embedding_dim)) +# targets = np.random.random((num_samples, units)) +# model.fit(inputs, targets) +# +# +# @rnn_test +# def test_reset_states_with_values(layer_class): +# num_states = 2 if layer_class is recurrent.LSTM else 1 +# +# layer = layer_class(units, stateful=True) +# layer.build((num_samples, timesteps, embedding_dim)) +# layer.reset_states() +# assert len(layer.states) == num_states +# assert layer.states[0] is not None +# np.testing.assert_allclose(K.eval(layer.states[0]), +# np.zeros(K.int_shape(layer.states[0])), +# atol=1e-4) +# state_shapes = [K.int_shape(state) for state in layer.states] +# values = [np.ones(shape) for shape in state_shapes] +# if len(values) == 1: +# values = values[0] +# layer.reset_states(values) +# np.testing.assert_allclose(K.eval(layer.states[0]), +# np.ones(K.int_shape(layer.states[0])), +# atol=1e-4) +# +# # Test fit with invalid data +# with pytest.raises(ValueError): +# layer.reset_states([1] * (len(layer.states) + 1)) +# +# +# @rnn_test +# def test_initial_states_as_other_inputs(layer_class): +# num_states = 2 if layer_class is recurrent.LSTM else 1 +# +# # Test with Keras tensor +# main_inputs = Input((timesteps, embedding_dim)) +# initial_state = [Input((units,)) for _ in range(num_states)] +# inputs = [main_inputs] + initial_state +# +# layer = layer_class(units) +# output = layer(inputs) +# assert initial_state[0] in layer.inbound_nodes[0].input_tensors +# +# model = Model(inputs, output) +# model.compile(loss='categorical_crossentropy', optimizer='adam') +# +# main_inputs = np.random.random((num_samples, timesteps, embedding_dim)) +# initial_state = [np.random.random((num_samples, units)) +# for _ in range(num_states)] +# targets = np.random.random((num_samples, units)) +# model.train_on_batch([main_inputs] + initial_state, targets) +# +# +# @rnn_test +# def test_specify_state_with_masking(layer_class): +# ''' This test based on a previously failing issue here: +# https://github.com/fchollet/keras/issues/1567 +# ''' +# num_states = 2 if layer_class is recurrent.LSTM else 1 +# +# inputs = Input((timesteps, embedding_dim)) +# _ = Masking()(inputs) +# initial_state = [Input((units,)) for _ in range(num_states)] +# output = layer_class(units)(inputs, initial_state=initial_state) +# +# model = Model([inputs] + initial_state, output) +# model.compile(loss='categorical_crossentropy', optimizer='adam') +# +# inputs = np.random.random((num_samples, timesteps, embedding_dim)) +# initial_state = [np.random.random((num_samples, units)) +# for _ in range(num_states)] +# targets = np.random.random((num_samples, units)) +# model.fit([inputs] + initial_state, targets) +# +# +# @rnn_test +# def test_return_state(layer_class): +# num_states = 2 if layer_class is recurrent.LSTM else 1 +# +# inputs = Input(batch_shape=(num_samples, timesteps, embedding_dim)) +# layer = layer_class(units, return_state=True, stateful=True) +# outputs = layer(inputs) +# output, state = outputs[0], outputs[1:] +# assert len(state) == num_states +# model = Model(inputs, state[0]) +# +# inputs = np.random.random((num_samples, timesteps, embedding_dim)) +# state = model.predict(inputs) +# np.testing.assert_allclose(K.eval(layer.states[0]), state, atol=1e-4) +# +# +# @rnn_test +# def test_state_reuse(layer_class): +# inputs = Input(batch_shape=(num_samples, timesteps, embedding_dim)) +# layer = layer_class(units, return_state=True, return_sequences=True) +# outputs = layer(inputs) +# output, state = outputs[0], outputs[1:] +# output = layer_class(units)(output, initial_state=state) +# model = Model(inputs, output) +# +# inputs = np.random.random((num_samples, timesteps, embedding_dim)) +# outputs = model.predict(inputs) +# +# +# def test_minimal_rnn_cell_non_layer(): +# +# class MinimalRNNCell(object): +# +# def __init__(self, units, input_dim): +# self.units = units +# self.state_size = units +# self.kernel = keras.backend.variable( +# np.random.random((input_dim, units))) +# +# def call(self, inputs, states): +# prev_output = states[0] +# output = keras.backend.dot(inputs, self.kernel) + prev_output +# return output, [output] +# +# # Basic test case. +# cell = MinimalRNNCell(32, 5) +# x = keras.Input((None, 5)) +# layer = recurrent.RNN(cell) +# y = layer(x) +# model = keras.models.Model(x, y) +# model.compile(optimizer='rmsprop', loss='mse') +# model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) +# +# # Test stacking. +# cells = [MinimalRNNCell(8, 5), +# MinimalRNNCell(32, 8), +# MinimalRNNCell(32, 32)] +# layer = recurrent.RNN(cells) +# y = layer(x) +# model = keras.models.Model(x, y) +# model.compile(optimizer='rmsprop', loss='mse') +# model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) +# +# +# def test_minimal_rnn_cell_non_layer_multiple_states(): +# +# class MinimalRNNCell(object): +# +# def __init__(self, units, input_dim): +# self.units = units +# self.state_size = (units, units) +# self.kernel = keras.backend.variable( +# np.random.random((input_dim, units))) +# +# def call(self, inputs, states): +# prev_output_1 = states[0] +# prev_output_2 = states[1] +# output = keras.backend.dot(inputs, self.kernel) +# output += prev_output_1 +# output -= prev_output_2 +# return output, [output * 2, output * 3] +# +# # Basic test case. +# cell = MinimalRNNCell(32, 5) +# x = keras.Input((None, 5)) +# layer = recurrent.RNN(cell) +# y = layer(x) +# model = keras.models.Model(x, y) +# model.compile(optimizer='rmsprop', loss='mse') +# model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) +# +# # Test stacking. +# cells = [MinimalRNNCell(8, 5), +# MinimalRNNCell(16, 8), +# MinimalRNNCell(32, 16)] +# layer = recurrent.RNN(cells) +# assert layer.cell.state_size == (32, 32, 16, 16, 8, 8) +# y = layer(x) +# model = keras.models.Model(x, y) +# model.compile(optimizer='rmsprop', loss='mse') +# model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) +# +# +# def test_minimal_rnn_cell_layer(): +# +# class MinimalRNNCell(keras.layers.Layer): +# +# def __init__(self, units, **kwargs): +# self.units = units +# self.state_size = units +# super(MinimalRNNCell, self).__init__(**kwargs) +# +# def build(self, input_shape): +# self.kernel = self.add_weight(shape=(input_shape[-1], self.units), +# initializer='uniform', +# name='kernel') +# self.recurrent_kernel = self.add_weight( +# shape=(self.units, self.units), +# initializer='uniform', +# name='recurrent_kernel') +# self.built = True +# +# def call(self, inputs, states): +# prev_output = states[0] +# h = keras.backend.dot(inputs, self.kernel) +# output = h + keras.backend.dot(prev_output, self.recurrent_kernel) +# return output, [output] +# +# def get_config(self): +# config = {'units': self.units} +# base_config = super(MinimalRNNCell, self).get_config() +# return dict(list(base_config.items()) + list(config.items())) +# +# # Test basic case. +# x = keras.Input((None, 5)) +# cell = MinimalRNNCell(32) +# layer = recurrent.RNN(cell) +# y = layer(x) +# model = keras.models.Model(x, y) +# model.compile(optimizer='rmsprop', loss='mse') +# model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) +# +# # Test basic case serialization. +# x_np = np.random.random((6, 5, 5)) +# y_np = model.predict(x_np) +# weights = model.get_weights() +# config = layer.get_config() +# with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}): +# layer = recurrent.RNN.from_config(config) +# y = layer(x) +# model = keras.models.Model(x, y) +# model.set_weights(weights) +# y_np_2 = model.predict(x_np) +# assert_allclose(y_np, y_np_2, atol=1e-4) +# +# # Test stacking. +# cells = [MinimalRNNCell(8), +# MinimalRNNCell(12), +# MinimalRNNCell(32)] +# layer = recurrent.RNN(cells) +# y = layer(x) +# model = keras.models.Model(x, y) +# model.compile(optimizer='rmsprop', loss='mse') +# model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) +# +# # Test stacked RNN serialization. +# x_np = np.random.random((6, 5, 5)) +# y_np = model.predict(x_np) +# weights = model.get_weights() +# config = layer.get_config() +# with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}): +# layer = recurrent.RNN.from_config(config) +# y = layer(x) +# model = keras.models.Model(x, y) +# model.set_weights(weights) +# y_np_2 = model.predict(x_np) +# assert_allclose(y_np, y_np_2, atol=1e-4) +# +# +# def test_stacked_rnn_attributes(): +# cells = [recurrent.LSTMCell(3), +# recurrent.LSTMCell(3, kernel_regularizer='l2')] +# layer = recurrent.RNN(cells) +# layer.build((None, None, 5)) +# +# # Test regularization losses +# assert len(layer.losses) == 1 +# +# # Test weights +# assert len(layer.trainable_weights) == 6 +# cells[0].trainable = False +# assert len(layer.trainable_weights) == 3 +# assert len(layer.non_trainable_weights) == 3 +# +# # Test `get_losses_for` +# x = keras.Input((None, 5)) +# y = K.sum(x) +# cells[0].add_loss(y, inputs=x) +# assert layer.get_losses_for(x) == [y] +# +# +# @rnn_test +# def test_batch_size_equal_one(layer_class): +# inputs = Input(batch_shape=(1, timesteps, embedding_dim)) +# layer = layer_class(units) +# outputs = layer(inputs) +# model = Model(inputs, outputs) +# model.compile('sgd', 'mse') +# x = np.random.random((1, timesteps, embedding_dim)) +# y = np.random.random((1, units)) +# model.train_on_batch(x, y) +# +# +# def test_rnn_cell_with_constants_layer(): +# +# class RNNCellWithConstants(keras.layers.Layer): +# +# def __init__(self, units, **kwargs): +# self.units = units +# self.state_size = units +# super(RNNCellWithConstants, self).__init__(**kwargs) +# +# def build(self, input_shape): +# if not isinstance(input_shape, list): +# raise TypeError('expects constants shape') +# [input_shape, constant_shape] = input_shape +# # will (and should) raise if more than one constant passed +# +# self.input_kernel = self.add_weight( +# shape=(input_shape[-1], self.units), +# initializer='uniform', +# name='kernel') +# self.recurrent_kernel = self.add_weight( +# shape=(self.units, self.units), +# initializer='uniform', +# name='recurrent_kernel') +# self.constant_kernel = self.add_weight( +# shape=(constant_shape[-1], self.units), +# initializer='uniform', +# name='constant_kernel') +# self.built = True +# +# def call(self, inputs, states, constants): +# [prev_output] = states +# [constant] = constants +# h_input = keras.backend.dot(inputs, self.input_kernel) +# h_state = keras.backend.dot(prev_output, self.recurrent_kernel) +# h_const = keras.backend.dot(constant, self.constant_kernel) +# output = h_input + h_state + h_const +# return output, [output] +# +# def get_config(self): +# config = {'units': self.units} +# base_config = super(RNNCellWithConstants, self).get_config() +# return dict(list(base_config.items()) + list(config.items())) +# +# # Test basic case. +# x = keras.Input((None, 5)) +# c = keras.Input((3,)) +# cell = RNNCellWithConstants(32) +# layer = recurrent.RNN(cell) +# y = layer(x, constants=c) +# model = keras.models.Model([x, c], y) +# model.compile(optimizer='rmsprop', loss='mse') +# model.train_on_batch( +# [np.zeros((6, 5, 5)), np.zeros((6, 3))], +# np.zeros((6, 32)) +# ) +# +# # Test basic case serialization. +# x_np = np.random.random((6, 5, 5)) +# c_np = np.random.random((6, 3)) +# y_np = model.predict([x_np, c_np]) +# weights = model.get_weights() +# config = layer.get_config() +# with keras.utils.CustomObjectScope( +# {'RNNCellWithConstants': RNNCellWithConstants}): +# layer = recurrent.RNN.from_config(config) +# y = layer(x, constants=c) +# model = keras.models.Model([x, c], y) +# model.set_weights(weights) +# y_np_2 = model.predict([x_np, c_np]) +# assert_allclose(y_np, y_np_2, atol=1e-4) + + +def test_functional_rnn_cell(): + layers = keras.layers + + # Create the cell: + units = 8 + input_size = 5 + x = Input((input_size,)) + h_tm1 = Input((units,)) + h_ = layers.add([layers.Dense(units)(x), layers.Dense(units)(h_tm1)]) + h = layers.Activation('tanh')(h_) + cell = recurrent.FunctionalRNNCell( + inputs=x, outputs=h, input_states=h_tm1, output_states=h) # Test basic case. - x = keras.Input((None, 5)) - cell = MinimalRNNCell(32) + x_seq = Input((None, input_size)) layer = recurrent.RNN(cell) - y = layer(x) - model = keras.models.Model(x, y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - # Test basic case serialization. - x_np = np.random.random((6, 5, 5)) - y_np = model.predict(x_np) - weights = model.get_weights() - config = layer.get_config() - with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}): - layer = recurrent.RNN.from_config(config) - y = layer(x) - model = keras.models.Model(x, y) - model.set_weights(weights) - y_np_2 = model.predict(x_np) - assert_allclose(y_np, y_np_2, atol=1e-4) - - # Test stacking. - cells = [MinimalRNNCell(8), - MinimalRNNCell(12), - MinimalRNNCell(32)] - layer = recurrent.RNN(cells) - y = layer(x) - model = keras.models.Model(x, y) + y = layer(x_seq) + model = keras.models.Model(x_seq, y) model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + model.train_on_batch(np.zeros((6, 5, input_size)), np.zeros((6, units))) - # Test stacked RNN serialization. - x_np = np.random.random((6, 5, 5)) - y_np = model.predict(x_np) - weights = model.get_weights() - config = layer.get_config() - with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}): - layer = recurrent.RNN.from_config(config) - y = layer(x) - model = keras.models.Model(x, y) - model.set_weights(weights) - y_np_2 = model.predict(x_np) - assert_allclose(y_np, y_np_2, atol=1e-4) +def test_functional_rnn_cell_with_constants(): + layers = keras.layers -def test_stacked_rnn_attributes(): - cells = [recurrent.LSTMCell(3), - recurrent.LSTMCell(3, kernel_regularizer='l2')] - layer = recurrent.RNN(cells) - layer.build((None, None, 5)) + # Create the cell: + units = 8 + input_size = 5 + constant_shape = (10,) + x = Input((input_size,)) + h_tm1 = Input((units,)) + c = Input(constant_shape) + h_ = layers.add([ + layers.Dense(units)(x), + layers.Dense(units)(h_tm1), + layers.Dense(units)(c) + ]) + h = layers.Activation('tanh')(h_) - # Test regularization losses - assert len(layer.losses) == 1 - - # Test weights - assert len(layer.trainable_weights) == 6 - cells[0].trainable = False - assert len(layer.trainable_weights) == 3 - assert len(layer.non_trainable_weights) == 3 - - # Test `get_losses_for` - x = keras.Input((None, 5)) - y = K.sum(x) - cells[0].add_loss(y, inputs=x) - assert layer.get_losses_for(x) == [y] - - -@rnn_test -def test_batch_size_equal_one(layer_class): - inputs = Input(batch_shape=(1, timesteps, embedding_dim)) - layer = layer_class(units) - outputs = layer(inputs) - model = Model(inputs, outputs) - model.compile('sgd', 'mse') - x = np.random.random((1, timesteps, embedding_dim)) - y = np.random.random((1, units)) - model.train_on_batch(x, y) - - -def test_rnn_cell_with_constants_layer(): - - class RNNCellWithConstants(keras.layers.Layer): - - def __init__(self, units, **kwargs): - self.units = units - self.state_size = units - super(RNNCellWithConstants, self).__init__(**kwargs) - - def build(self, input_shape): - if not isinstance(input_shape, list): - raise TypeError('expects constants shape') - [input_shape, constant_shape] = input_shape - # will (and should) raise if more than one constant passed - - self.input_kernel = self.add_weight( - shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') - self.constant_kernel = self.add_weight( - shape=(constant_shape[-1], self.units), - initializer='uniform', - name='constant_kernel') - self.built = True - - def call(self, inputs, states, constants): - [prev_output] = states - [constant] = constants - h_input = keras.backend.dot(inputs, self.input_kernel) - h_state = keras.backend.dot(prev_output, self.recurrent_kernel) - h_const = keras.backend.dot(constant, self.constant_kernel) - output = h_input + h_state + h_const - return output, [output] - - def get_config(self): - config = {'units': self.units} - base_config = super(RNNCellWithConstants, self).get_config() - return dict(list(base_config.items()) + list(config.items())) + cell = recurrent.FunctionalRNNCell( + inputs=x, outputs=h, input_states=h_tm1, output_states=h, constants=c) # Test basic case. - x = keras.Input((None, 5)) - c = keras.Input((3,)) - cell = RNNCellWithConstants(32) + x_seq = Input((None, input_size)) layer = recurrent.RNN(cell) - y = layer(x, constants=c) - model = keras.models.Model([x, c], y) + y = layer(x_seq, constants=c) + model = keras.models.Model([x_seq, c], y) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch( - [np.zeros((6, 5, 5)), np.zeros((6, 3))], - np.zeros((6, 32)) + [np.zeros((6, 5, input_size)), np.zeros((6, constant_shape[0]))], + np.zeros((6, units)) ) - # Test basic case serialization. - x_np = np.random.random((6, 5, 5)) - c_np = np.random.random((6, 3)) - y_np = model.predict([x_np, c_np]) - weights = model.get_weights() - config = layer.get_config() - with keras.utils.CustomObjectScope( - {'RNNCellWithConstants': RNNCellWithConstants}): - layer = recurrent.RNN.from_config(config) - y = layer(x, constants=c) - model = keras.models.Model([x, c], y) - model.set_weights(weights) - y_np_2 = model.predict([x_np, c_np]) - assert_allclose(y_np, y_np_2, atol=1e-4) - if __name__ == '__main__': pytest.main([__file__]) From 53deca2c9be2e8ca8d723780b978e99c68611132 Mon Sep 17 00:00:00 2001 From: andhus Date: Mon, 25 Sep 2017 01:58:49 +0200 Subject: [PATCH 03/13] put back accidentally commented out recurrent tests --- tests/keras/layers/recurrent_test.py | 1236 +++++++++++++------------- 1 file changed, 618 insertions(+), 618 deletions(-) diff --git a/tests/keras/layers/recurrent_test.py b/tests/keras/layers/recurrent_test.py index 47992eca5af9..ddb00abcb0bc 100644 --- a/tests/keras/layers/recurrent_test.py +++ b/tests/keras/layers/recurrent_test.py @@ -16,624 +16,624 @@ num_samples, timesteps, embedding_dim, units = 2, 5, 4, 3 embedding_num = 12 -# -# -# @keras_test -# def rnn_test(f): -# """ -# All the recurrent layers share the same interface, -# so we can run through them with a single function. -# """ -# f = keras_test(f) -# return pytest.mark.parametrize('layer_class', [ -# recurrent.SimpleRNN, -# recurrent.GRU, -# recurrent.LSTM -# ])(f) -# -# -# @rnn_test -# def test_return_sequences(layer_class): -# layer_test(layer_class, -# kwargs={'units': units, -# 'return_sequences': True}, -# input_shape=(num_samples, timesteps, embedding_dim)) -# -# -# @rnn_test -# def test_dynamic_behavior(layer_class): -# layer = layer_class(units, input_shape=(None, embedding_dim)) -# model = Sequential() -# model.add(layer) -# model.compile('sgd', 'mse') -# x = np.random.random((num_samples, timesteps, embedding_dim)) -# y = np.random.random((num_samples, units)) -# model.train_on_batch(x, y) -# -# -# @rnn_test -# def test_stateful_invalid_use(layer_class): -# layer = layer_class(units, -# stateful=True, -# batch_input_shape=(num_samples, -# timesteps, -# embedding_dim)) -# model = Sequential() -# model.add(layer) -# model.compile('sgd', 'mse') -# x = np.random.random((num_samples * 2, timesteps, embedding_dim)) -# y = np.random.random((num_samples * 2, units)) -# with pytest.raises(ValueError): -# model.fit(x, y) -# with pytest.raises(ValueError): -# model.predict(x, batch_size=num_samples + 1) -# -# -# @rnn_test -# @pytest.mark.skipif((K.backend() == 'cntk'), -# reason='Not yet supported.') -# def test_dropout(layer_class): -# for unroll in [True, False]: -# layer_test(layer_class, -# kwargs={'units': units, -# 'dropout': 0.1, -# 'recurrent_dropout': 0.1, -# 'unroll': unroll}, -# input_shape=(num_samples, timesteps, embedding_dim)) -# -# # Test that dropout is applied during training -# x = K.ones((num_samples, timesteps, embedding_dim)) -# layer = layer_class(units, dropout=0.5, recurrent_dropout=0.5, -# input_shape=(timesteps, embedding_dim)) -# y = layer(x) -# assert y._uses_learning_phase -# -# y = layer(x, training=True) -# assert not getattr(y, '_uses_learning_phase') -# -# # Test that dropout is not applied during testing -# x = np.random.random((num_samples, timesteps, embedding_dim)) -# layer = layer_class(units, dropout=0.5, recurrent_dropout=0.5, -# unroll=unroll, -# input_shape=(timesteps, embedding_dim)) -# model = Sequential([layer]) -# assert model.uses_learning_phase -# y1 = model.predict(x) -# y2 = model.predict(x) -# assert_allclose(y1, y2) -# -# -# @rnn_test -# def test_statefulness(layer_class): -# model = Sequential() -# model.add(embeddings.Embedding(embedding_num, embedding_dim, -# mask_zero=True, -# input_length=timesteps, -# batch_input_shape=(num_samples, timesteps))) -# layer = layer_class(units, return_sequences=False, -# stateful=True, -# weights=None) -# model.add(layer) -# model.compile(optimizer='sgd', loss='mse') -# out1 = model.predict(np.ones((num_samples, timesteps))) -# assert(out1.shape == (num_samples, units)) -# -# # train once so that the states change -# model.train_on_batch(np.ones((num_samples, timesteps)), -# np.ones((num_samples, units))) -# out2 = model.predict(np.ones((num_samples, timesteps))) -# -# # if the state is not reset, output should be different -# assert(out1.max() != out2.max()) -# -# # check that output changes after states are reset -# # (even though the model itself didn't change) -# layer.reset_states() -# out3 = model.predict(np.ones((num_samples, timesteps))) -# assert(out2.max() != out3.max()) -# -# # check that container-level reset_states() works -# model.reset_states() -# out4 = model.predict(np.ones((num_samples, timesteps))) -# assert_allclose(out3, out4, atol=1e-5) -# -# # check that the call to `predict` updated the states -# out5 = model.predict(np.ones((num_samples, timesteps))) -# assert(out4.max() != out5.max()) -# -# -# @rnn_test -# def test_masking_correctness(layer_class): -# # Check masking: output with left padding and right padding -# # should be the same. -# model = Sequential() -# model.add(embeddings.Embedding(embedding_num, embedding_dim, -# mask_zero=True, -# input_length=timesteps, -# batch_input_shape=(num_samples, timesteps))) -# layer = layer_class(units, return_sequences=False) -# model.add(layer) -# model.compile(optimizer='sgd', loss='mse') -# -# left_padded_input = np.ones((num_samples, timesteps)) -# left_padded_input[0, :1] = 0 -# left_padded_input[1, :2] = 0 -# out6 = model.predict(left_padded_input) -# -# right_padded_input = np.ones((num_samples, timesteps)) -# right_padded_input[0, -1:] = 0 -# right_padded_input[1, -2:] = 0 -# out7 = model.predict(right_padded_input) -# -# assert_allclose(out7, out6, atol=1e-5) -# -# -# @rnn_test -# def test_implementation_mode(layer_class): -# for mode in [1, 2]: -# # Without dropout -# layer_test(layer_class, -# kwargs={'units': units, -# 'implementation': mode}, -# input_shape=(num_samples, timesteps, embedding_dim)) -# # With dropout -# layer_test(layer_class, -# kwargs={'units': units, -# 'implementation': mode, -# 'dropout': 0.1, -# 'recurrent_dropout': 0.1}, -# input_shape=(num_samples, timesteps, embedding_dim)) -# -# -# @rnn_test -# def test_regularizer(layer_class): -# layer = layer_class(units, return_sequences=False, weights=None, -# input_shape=(timesteps, embedding_dim), -# kernel_regularizer=regularizers.l1(0.01), -# recurrent_regularizer=regularizers.l1(0.01), -# bias_regularizer='l2') -# layer.build((None, None, embedding_dim)) -# assert len(layer.losses) == 3 -# assert len(layer.cell.losses) == 3 -# -# layer = layer_class(units, return_sequences=False, weights=None, -# input_shape=(timesteps, embedding_dim), -# activity_regularizer='l2') -# assert layer.activity_regularizer -# x = K.variable(np.ones((num_samples, timesteps, embedding_dim))) -# layer(x) -# assert len(layer.cell.get_losses_for(x)) == 0 -# assert len(layer.get_losses_for(x)) == 1 -# -# -# @keras_test -# def test_masking_layer(): -# ''' This test based on a previously failing issue here: -# https://github.com/fchollet/keras/issues/1567 -# ''' -# inputs = np.random.random((6, 3, 4)) -# targets = np.abs(np.random.random((6, 3, 5))) -# targets /= targets.sum(axis=-1, keepdims=True) -# -# model = Sequential() -# model.add(Masking(input_shape=(3, 4))) -# model.add(recurrent.SimpleRNN(units=5, return_sequences=True, unroll=False)) -# model.compile(loss='categorical_crossentropy', optimizer='adam') -# model.fit(inputs, targets, epochs=1, batch_size=100, verbose=1) -# -# model = Sequential() -# model.add(Masking(input_shape=(3, 4))) -# model.add(recurrent.SimpleRNN(units=5, return_sequences=True, unroll=True)) -# model.compile(loss='categorical_crossentropy', optimizer='adam') -# model.fit(inputs, targets, epochs=1, batch_size=100, verbose=1) -# -# -# @rnn_test -# def test_from_config(layer_class): -# stateful_flags = (False, True) -# for stateful in stateful_flags: -# l1 = layer_class(units=1, stateful=stateful) -# l2 = layer_class.from_config(l1.get_config()) -# assert l1.get_config() == l2.get_config() -# -# -# @rnn_test -# def test_specify_initial_state_keras_tensor(layer_class): -# num_states = 2 if layer_class is recurrent.LSTM else 1 -# -# # Test with Keras tensor -# inputs = Input((timesteps, embedding_dim)) -# initial_state = [Input((units,)) for _ in range(num_states)] -# layer = layer_class(units) -# if len(initial_state) == 1: -# output = layer(inputs, initial_state=initial_state[0]) -# else: -# output = layer(inputs, initial_state=initial_state) -# assert initial_state[0] in layer.inbound_nodes[0].input_tensors -# -# model = Model([inputs] + initial_state, output) -# model.compile(loss='categorical_crossentropy', optimizer='adam') -# -# inputs = np.random.random((num_samples, timesteps, embedding_dim)) -# initial_state = [np.random.random((num_samples, units)) -# for _ in range(num_states)] -# targets = np.random.random((num_samples, units)) -# model.fit([inputs] + initial_state, targets) -# -# -# @rnn_test -# def test_specify_initial_state_non_keras_tensor(layer_class): -# num_states = 2 if layer_class is recurrent.LSTM else 1 -# -# # Test with non-Keras tensor -# inputs = Input((timesteps, embedding_dim)) -# initial_state = [K.random_normal_variable((num_samples, units), 0, 1) -# for _ in range(num_states)] -# layer = layer_class(units) -# output = layer(inputs, initial_state=initial_state) -# -# model = Model(inputs, output) -# model.compile(loss='categorical_crossentropy', optimizer='adam') -# -# inputs = np.random.random((num_samples, timesteps, embedding_dim)) -# targets = np.random.random((num_samples, units)) -# model.fit(inputs, targets) -# -# -# @rnn_test -# def test_reset_states_with_values(layer_class): -# num_states = 2 if layer_class is recurrent.LSTM else 1 -# -# layer = layer_class(units, stateful=True) -# layer.build((num_samples, timesteps, embedding_dim)) -# layer.reset_states() -# assert len(layer.states) == num_states -# assert layer.states[0] is not None -# np.testing.assert_allclose(K.eval(layer.states[0]), -# np.zeros(K.int_shape(layer.states[0])), -# atol=1e-4) -# state_shapes = [K.int_shape(state) for state in layer.states] -# values = [np.ones(shape) for shape in state_shapes] -# if len(values) == 1: -# values = values[0] -# layer.reset_states(values) -# np.testing.assert_allclose(K.eval(layer.states[0]), -# np.ones(K.int_shape(layer.states[0])), -# atol=1e-4) -# -# # Test fit with invalid data -# with pytest.raises(ValueError): -# layer.reset_states([1] * (len(layer.states) + 1)) -# -# -# @rnn_test -# def test_initial_states_as_other_inputs(layer_class): -# num_states = 2 if layer_class is recurrent.LSTM else 1 -# -# # Test with Keras tensor -# main_inputs = Input((timesteps, embedding_dim)) -# initial_state = [Input((units,)) for _ in range(num_states)] -# inputs = [main_inputs] + initial_state -# -# layer = layer_class(units) -# output = layer(inputs) -# assert initial_state[0] in layer.inbound_nodes[0].input_tensors -# -# model = Model(inputs, output) -# model.compile(loss='categorical_crossentropy', optimizer='adam') -# -# main_inputs = np.random.random((num_samples, timesteps, embedding_dim)) -# initial_state = [np.random.random((num_samples, units)) -# for _ in range(num_states)] -# targets = np.random.random((num_samples, units)) -# model.train_on_batch([main_inputs] + initial_state, targets) -# -# -# @rnn_test -# def test_specify_state_with_masking(layer_class): -# ''' This test based on a previously failing issue here: -# https://github.com/fchollet/keras/issues/1567 -# ''' -# num_states = 2 if layer_class is recurrent.LSTM else 1 -# -# inputs = Input((timesteps, embedding_dim)) -# _ = Masking()(inputs) -# initial_state = [Input((units,)) for _ in range(num_states)] -# output = layer_class(units)(inputs, initial_state=initial_state) -# -# model = Model([inputs] + initial_state, output) -# model.compile(loss='categorical_crossentropy', optimizer='adam') -# -# inputs = np.random.random((num_samples, timesteps, embedding_dim)) -# initial_state = [np.random.random((num_samples, units)) -# for _ in range(num_states)] -# targets = np.random.random((num_samples, units)) -# model.fit([inputs] + initial_state, targets) -# -# -# @rnn_test -# def test_return_state(layer_class): -# num_states = 2 if layer_class is recurrent.LSTM else 1 -# -# inputs = Input(batch_shape=(num_samples, timesteps, embedding_dim)) -# layer = layer_class(units, return_state=True, stateful=True) -# outputs = layer(inputs) -# output, state = outputs[0], outputs[1:] -# assert len(state) == num_states -# model = Model(inputs, state[0]) -# -# inputs = np.random.random((num_samples, timesteps, embedding_dim)) -# state = model.predict(inputs) -# np.testing.assert_allclose(K.eval(layer.states[0]), state, atol=1e-4) -# -# -# @rnn_test -# def test_state_reuse(layer_class): -# inputs = Input(batch_shape=(num_samples, timesteps, embedding_dim)) -# layer = layer_class(units, return_state=True, return_sequences=True) -# outputs = layer(inputs) -# output, state = outputs[0], outputs[1:] -# output = layer_class(units)(output, initial_state=state) -# model = Model(inputs, output) -# -# inputs = np.random.random((num_samples, timesteps, embedding_dim)) -# outputs = model.predict(inputs) -# -# -# def test_minimal_rnn_cell_non_layer(): -# -# class MinimalRNNCell(object): -# -# def __init__(self, units, input_dim): -# self.units = units -# self.state_size = units -# self.kernel = keras.backend.variable( -# np.random.random((input_dim, units))) -# -# def call(self, inputs, states): -# prev_output = states[0] -# output = keras.backend.dot(inputs, self.kernel) + prev_output -# return output, [output] -# -# # Basic test case. -# cell = MinimalRNNCell(32, 5) -# x = keras.Input((None, 5)) -# layer = recurrent.RNN(cell) -# y = layer(x) -# model = keras.models.Model(x, y) -# model.compile(optimizer='rmsprop', loss='mse') -# model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) -# -# # Test stacking. -# cells = [MinimalRNNCell(8, 5), -# MinimalRNNCell(32, 8), -# MinimalRNNCell(32, 32)] -# layer = recurrent.RNN(cells) -# y = layer(x) -# model = keras.models.Model(x, y) -# model.compile(optimizer='rmsprop', loss='mse') -# model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) -# -# -# def test_minimal_rnn_cell_non_layer_multiple_states(): -# -# class MinimalRNNCell(object): -# -# def __init__(self, units, input_dim): -# self.units = units -# self.state_size = (units, units) -# self.kernel = keras.backend.variable( -# np.random.random((input_dim, units))) -# -# def call(self, inputs, states): -# prev_output_1 = states[0] -# prev_output_2 = states[1] -# output = keras.backend.dot(inputs, self.kernel) -# output += prev_output_1 -# output -= prev_output_2 -# return output, [output * 2, output * 3] -# -# # Basic test case. -# cell = MinimalRNNCell(32, 5) -# x = keras.Input((None, 5)) -# layer = recurrent.RNN(cell) -# y = layer(x) -# model = keras.models.Model(x, y) -# model.compile(optimizer='rmsprop', loss='mse') -# model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) -# -# # Test stacking. -# cells = [MinimalRNNCell(8, 5), -# MinimalRNNCell(16, 8), -# MinimalRNNCell(32, 16)] -# layer = recurrent.RNN(cells) -# assert layer.cell.state_size == (32, 32, 16, 16, 8, 8) -# y = layer(x) -# model = keras.models.Model(x, y) -# model.compile(optimizer='rmsprop', loss='mse') -# model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) -# -# -# def test_minimal_rnn_cell_layer(): -# -# class MinimalRNNCell(keras.layers.Layer): -# -# def __init__(self, units, **kwargs): -# self.units = units -# self.state_size = units -# super(MinimalRNNCell, self).__init__(**kwargs) -# -# def build(self, input_shape): -# self.kernel = self.add_weight(shape=(input_shape[-1], self.units), -# initializer='uniform', -# name='kernel') -# self.recurrent_kernel = self.add_weight( -# shape=(self.units, self.units), -# initializer='uniform', -# name='recurrent_kernel') -# self.built = True -# -# def call(self, inputs, states): -# prev_output = states[0] -# h = keras.backend.dot(inputs, self.kernel) -# output = h + keras.backend.dot(prev_output, self.recurrent_kernel) -# return output, [output] -# -# def get_config(self): -# config = {'units': self.units} -# base_config = super(MinimalRNNCell, self).get_config() -# return dict(list(base_config.items()) + list(config.items())) -# -# # Test basic case. -# x = keras.Input((None, 5)) -# cell = MinimalRNNCell(32) -# layer = recurrent.RNN(cell) -# y = layer(x) -# model = keras.models.Model(x, y) -# model.compile(optimizer='rmsprop', loss='mse') -# model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) -# -# # Test basic case serialization. -# x_np = np.random.random((6, 5, 5)) -# y_np = model.predict(x_np) -# weights = model.get_weights() -# config = layer.get_config() -# with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}): -# layer = recurrent.RNN.from_config(config) -# y = layer(x) -# model = keras.models.Model(x, y) -# model.set_weights(weights) -# y_np_2 = model.predict(x_np) -# assert_allclose(y_np, y_np_2, atol=1e-4) -# -# # Test stacking. -# cells = [MinimalRNNCell(8), -# MinimalRNNCell(12), -# MinimalRNNCell(32)] -# layer = recurrent.RNN(cells) -# y = layer(x) -# model = keras.models.Model(x, y) -# model.compile(optimizer='rmsprop', loss='mse') -# model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) -# -# # Test stacked RNN serialization. -# x_np = np.random.random((6, 5, 5)) -# y_np = model.predict(x_np) -# weights = model.get_weights() -# config = layer.get_config() -# with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}): -# layer = recurrent.RNN.from_config(config) -# y = layer(x) -# model = keras.models.Model(x, y) -# model.set_weights(weights) -# y_np_2 = model.predict(x_np) -# assert_allclose(y_np, y_np_2, atol=1e-4) -# -# -# def test_stacked_rnn_attributes(): -# cells = [recurrent.LSTMCell(3), -# recurrent.LSTMCell(3, kernel_regularizer='l2')] -# layer = recurrent.RNN(cells) -# layer.build((None, None, 5)) -# -# # Test regularization losses -# assert len(layer.losses) == 1 -# -# # Test weights -# assert len(layer.trainable_weights) == 6 -# cells[0].trainable = False -# assert len(layer.trainable_weights) == 3 -# assert len(layer.non_trainable_weights) == 3 -# -# # Test `get_losses_for` -# x = keras.Input((None, 5)) -# y = K.sum(x) -# cells[0].add_loss(y, inputs=x) -# assert layer.get_losses_for(x) == [y] -# -# -# @rnn_test -# def test_batch_size_equal_one(layer_class): -# inputs = Input(batch_shape=(1, timesteps, embedding_dim)) -# layer = layer_class(units) -# outputs = layer(inputs) -# model = Model(inputs, outputs) -# model.compile('sgd', 'mse') -# x = np.random.random((1, timesteps, embedding_dim)) -# y = np.random.random((1, units)) -# model.train_on_batch(x, y) -# -# -# def test_rnn_cell_with_constants_layer(): -# -# class RNNCellWithConstants(keras.layers.Layer): -# -# def __init__(self, units, **kwargs): -# self.units = units -# self.state_size = units -# super(RNNCellWithConstants, self).__init__(**kwargs) -# -# def build(self, input_shape): -# if not isinstance(input_shape, list): -# raise TypeError('expects constants shape') -# [input_shape, constant_shape] = input_shape -# # will (and should) raise if more than one constant passed -# -# self.input_kernel = self.add_weight( -# shape=(input_shape[-1], self.units), -# initializer='uniform', -# name='kernel') -# self.recurrent_kernel = self.add_weight( -# shape=(self.units, self.units), -# initializer='uniform', -# name='recurrent_kernel') -# self.constant_kernel = self.add_weight( -# shape=(constant_shape[-1], self.units), -# initializer='uniform', -# name='constant_kernel') -# self.built = True -# -# def call(self, inputs, states, constants): -# [prev_output] = states -# [constant] = constants -# h_input = keras.backend.dot(inputs, self.input_kernel) -# h_state = keras.backend.dot(prev_output, self.recurrent_kernel) -# h_const = keras.backend.dot(constant, self.constant_kernel) -# output = h_input + h_state + h_const -# return output, [output] -# -# def get_config(self): -# config = {'units': self.units} -# base_config = super(RNNCellWithConstants, self).get_config() -# return dict(list(base_config.items()) + list(config.items())) -# -# # Test basic case. -# x = keras.Input((None, 5)) -# c = keras.Input((3,)) -# cell = RNNCellWithConstants(32) -# layer = recurrent.RNN(cell) -# y = layer(x, constants=c) -# model = keras.models.Model([x, c], y) -# model.compile(optimizer='rmsprop', loss='mse') -# model.train_on_batch( -# [np.zeros((6, 5, 5)), np.zeros((6, 3))], -# np.zeros((6, 32)) -# ) -# -# # Test basic case serialization. -# x_np = np.random.random((6, 5, 5)) -# c_np = np.random.random((6, 3)) -# y_np = model.predict([x_np, c_np]) -# weights = model.get_weights() -# config = layer.get_config() -# with keras.utils.CustomObjectScope( -# {'RNNCellWithConstants': RNNCellWithConstants}): -# layer = recurrent.RNN.from_config(config) -# y = layer(x, constants=c) -# model = keras.models.Model([x, c], y) -# model.set_weights(weights) -# y_np_2 = model.predict([x_np, c_np]) -# assert_allclose(y_np, y_np_2, atol=1e-4) + + +@keras_test +def rnn_test(f): + """ + All the recurrent layers share the same interface, + so we can run through them with a single function. + """ + f = keras_test(f) + return pytest.mark.parametrize('layer_class', [ + recurrent.SimpleRNN, + recurrent.GRU, + recurrent.LSTM + ])(f) + + +@rnn_test +def test_return_sequences(layer_class): + layer_test(layer_class, + kwargs={'units': units, + 'return_sequences': True}, + input_shape=(num_samples, timesteps, embedding_dim)) + + +@rnn_test +def test_dynamic_behavior(layer_class): + layer = layer_class(units, input_shape=(None, embedding_dim)) + model = Sequential() + model.add(layer) + model.compile('sgd', 'mse') + x = np.random.random((num_samples, timesteps, embedding_dim)) + y = np.random.random((num_samples, units)) + model.train_on_batch(x, y) + + +@rnn_test +def test_stateful_invalid_use(layer_class): + layer = layer_class(units, + stateful=True, + batch_input_shape=(num_samples, + timesteps, + embedding_dim)) + model = Sequential() + model.add(layer) + model.compile('sgd', 'mse') + x = np.random.random((num_samples * 2, timesteps, embedding_dim)) + y = np.random.random((num_samples * 2, units)) + with pytest.raises(ValueError): + model.fit(x, y) + with pytest.raises(ValueError): + model.predict(x, batch_size=num_samples + 1) + + +@rnn_test +@pytest.mark.skipif((K.backend() == 'cntk'), + reason='Not yet supported.') +def test_dropout(layer_class): + for unroll in [True, False]: + layer_test(layer_class, + kwargs={'units': units, + 'dropout': 0.1, + 'recurrent_dropout': 0.1, + 'unroll': unroll}, + input_shape=(num_samples, timesteps, embedding_dim)) + + # Test that dropout is applied during training + x = K.ones((num_samples, timesteps, embedding_dim)) + layer = layer_class(units, dropout=0.5, recurrent_dropout=0.5, + input_shape=(timesteps, embedding_dim)) + y = layer(x) + assert y._uses_learning_phase + + y = layer(x, training=True) + assert not getattr(y, '_uses_learning_phase') + + # Test that dropout is not applied during testing + x = np.random.random((num_samples, timesteps, embedding_dim)) + layer = layer_class(units, dropout=0.5, recurrent_dropout=0.5, + unroll=unroll, + input_shape=(timesteps, embedding_dim)) + model = Sequential([layer]) + assert model.uses_learning_phase + y1 = model.predict(x) + y2 = model.predict(x) + assert_allclose(y1, y2) + + +@rnn_test +def test_statefulness(layer_class): + model = Sequential() + model.add(embeddings.Embedding(embedding_num, embedding_dim, + mask_zero=True, + input_length=timesteps, + batch_input_shape=(num_samples, timesteps))) + layer = layer_class(units, return_sequences=False, + stateful=True, + weights=None) + model.add(layer) + model.compile(optimizer='sgd', loss='mse') + out1 = model.predict(np.ones((num_samples, timesteps))) + assert(out1.shape == (num_samples, units)) + + # train once so that the states change + model.train_on_batch(np.ones((num_samples, timesteps)), + np.ones((num_samples, units))) + out2 = model.predict(np.ones((num_samples, timesteps))) + + # if the state is not reset, output should be different + assert(out1.max() != out2.max()) + + # check that output changes after states are reset + # (even though the model itself didn't change) + layer.reset_states() + out3 = model.predict(np.ones((num_samples, timesteps))) + assert(out2.max() != out3.max()) + + # check that container-level reset_states() works + model.reset_states() + out4 = model.predict(np.ones((num_samples, timesteps))) + assert_allclose(out3, out4, atol=1e-5) + + # check that the call to `predict` updated the states + out5 = model.predict(np.ones((num_samples, timesteps))) + assert(out4.max() != out5.max()) + + +@rnn_test +def test_masking_correctness(layer_class): + # Check masking: output with left padding and right padding + # should be the same. + model = Sequential() + model.add(embeddings.Embedding(embedding_num, embedding_dim, + mask_zero=True, + input_length=timesteps, + batch_input_shape=(num_samples, timesteps))) + layer = layer_class(units, return_sequences=False) + model.add(layer) + model.compile(optimizer='sgd', loss='mse') + + left_padded_input = np.ones((num_samples, timesteps)) + left_padded_input[0, :1] = 0 + left_padded_input[1, :2] = 0 + out6 = model.predict(left_padded_input) + + right_padded_input = np.ones((num_samples, timesteps)) + right_padded_input[0, -1:] = 0 + right_padded_input[1, -2:] = 0 + out7 = model.predict(right_padded_input) + + assert_allclose(out7, out6, atol=1e-5) + + +@rnn_test +def test_implementation_mode(layer_class): + for mode in [1, 2]: + # Without dropout + layer_test(layer_class, + kwargs={'units': units, + 'implementation': mode}, + input_shape=(num_samples, timesteps, embedding_dim)) + # With dropout + layer_test(layer_class, + kwargs={'units': units, + 'implementation': mode, + 'dropout': 0.1, + 'recurrent_dropout': 0.1}, + input_shape=(num_samples, timesteps, embedding_dim)) + + +@rnn_test +def test_regularizer(layer_class): + layer = layer_class(units, return_sequences=False, weights=None, + input_shape=(timesteps, embedding_dim), + kernel_regularizer=regularizers.l1(0.01), + recurrent_regularizer=regularizers.l1(0.01), + bias_regularizer='l2') + layer.build((None, None, embedding_dim)) + assert len(layer.losses) == 3 + assert len(layer.cell.losses) == 3 + + layer = layer_class(units, return_sequences=False, weights=None, + input_shape=(timesteps, embedding_dim), + activity_regularizer='l2') + assert layer.activity_regularizer + x = K.variable(np.ones((num_samples, timesteps, embedding_dim))) + layer(x) + assert len(layer.cell.get_losses_for(x)) == 0 + assert len(layer.get_losses_for(x)) == 1 + + +@keras_test +def test_masking_layer(): + ''' This test based on a previously failing issue here: + https://github.com/fchollet/keras/issues/1567 + ''' + inputs = np.random.random((6, 3, 4)) + targets = np.abs(np.random.random((6, 3, 5))) + targets /= targets.sum(axis=-1, keepdims=True) + + model = Sequential() + model.add(Masking(input_shape=(3, 4))) + model.add(recurrent.SimpleRNN(units=5, return_sequences=True, unroll=False)) + model.compile(loss='categorical_crossentropy', optimizer='adam') + model.fit(inputs, targets, epochs=1, batch_size=100, verbose=1) + + model = Sequential() + model.add(Masking(input_shape=(3, 4))) + model.add(recurrent.SimpleRNN(units=5, return_sequences=True, unroll=True)) + model.compile(loss='categorical_crossentropy', optimizer='adam') + model.fit(inputs, targets, epochs=1, batch_size=100, verbose=1) + + +@rnn_test +def test_from_config(layer_class): + stateful_flags = (False, True) + for stateful in stateful_flags: + l1 = layer_class(units=1, stateful=stateful) + l2 = layer_class.from_config(l1.get_config()) + assert l1.get_config() == l2.get_config() + + +@rnn_test +def test_specify_initial_state_keras_tensor(layer_class): + num_states = 2 if layer_class is recurrent.LSTM else 1 + + # Test with Keras tensor + inputs = Input((timesteps, embedding_dim)) + initial_state = [Input((units,)) for _ in range(num_states)] + layer = layer_class(units) + if len(initial_state) == 1: + output = layer(inputs, initial_state=initial_state[0]) + else: + output = layer(inputs, initial_state=initial_state) + assert initial_state[0] in layer.inbound_nodes[0].input_tensors + + model = Model([inputs] + initial_state, output) + model.compile(loss='categorical_crossentropy', optimizer='adam') + + inputs = np.random.random((num_samples, timesteps, embedding_dim)) + initial_state = [np.random.random((num_samples, units)) + for _ in range(num_states)] + targets = np.random.random((num_samples, units)) + model.fit([inputs] + initial_state, targets) + + +@rnn_test +def test_specify_initial_state_non_keras_tensor(layer_class): + num_states = 2 if layer_class is recurrent.LSTM else 1 + + # Test with non-Keras tensor + inputs = Input((timesteps, embedding_dim)) + initial_state = [K.random_normal_variable((num_samples, units), 0, 1) + for _ in range(num_states)] + layer = layer_class(units) + output = layer(inputs, initial_state=initial_state) + + model = Model(inputs, output) + model.compile(loss='categorical_crossentropy', optimizer='adam') + + inputs = np.random.random((num_samples, timesteps, embedding_dim)) + targets = np.random.random((num_samples, units)) + model.fit(inputs, targets) + + +@rnn_test +def test_reset_states_with_values(layer_class): + num_states = 2 if layer_class is recurrent.LSTM else 1 + + layer = layer_class(units, stateful=True) + layer.build((num_samples, timesteps, embedding_dim)) + layer.reset_states() + assert len(layer.states) == num_states + assert layer.states[0] is not None + np.testing.assert_allclose(K.eval(layer.states[0]), + np.zeros(K.int_shape(layer.states[0])), + atol=1e-4) + state_shapes = [K.int_shape(state) for state in layer.states] + values = [np.ones(shape) for shape in state_shapes] + if len(values) == 1: + values = values[0] + layer.reset_states(values) + np.testing.assert_allclose(K.eval(layer.states[0]), + np.ones(K.int_shape(layer.states[0])), + atol=1e-4) + + # Test fit with invalid data + with pytest.raises(ValueError): + layer.reset_states([1] * (len(layer.states) + 1)) + + +@rnn_test +def test_initial_states_as_other_inputs(layer_class): + num_states = 2 if layer_class is recurrent.LSTM else 1 + + # Test with Keras tensor + main_inputs = Input((timesteps, embedding_dim)) + initial_state = [Input((units,)) for _ in range(num_states)] + inputs = [main_inputs] + initial_state + + layer = layer_class(units) + output = layer(inputs) + assert initial_state[0] in layer.inbound_nodes[0].input_tensors + + model = Model(inputs, output) + model.compile(loss='categorical_crossentropy', optimizer='adam') + + main_inputs = np.random.random((num_samples, timesteps, embedding_dim)) + initial_state = [np.random.random((num_samples, units)) + for _ in range(num_states)] + targets = np.random.random((num_samples, units)) + model.train_on_batch([main_inputs] + initial_state, targets) + + +@rnn_test +def test_specify_state_with_masking(layer_class): + ''' This test based on a previously failing issue here: + https://github.com/fchollet/keras/issues/1567 + ''' + num_states = 2 if layer_class is recurrent.LSTM else 1 + + inputs = Input((timesteps, embedding_dim)) + _ = Masking()(inputs) + initial_state = [Input((units,)) for _ in range(num_states)] + output = layer_class(units)(inputs, initial_state=initial_state) + + model = Model([inputs] + initial_state, output) + model.compile(loss='categorical_crossentropy', optimizer='adam') + + inputs = np.random.random((num_samples, timesteps, embedding_dim)) + initial_state = [np.random.random((num_samples, units)) + for _ in range(num_states)] + targets = np.random.random((num_samples, units)) + model.fit([inputs] + initial_state, targets) + + +@rnn_test +def test_return_state(layer_class): + num_states = 2 if layer_class is recurrent.LSTM else 1 + + inputs = Input(batch_shape=(num_samples, timesteps, embedding_dim)) + layer = layer_class(units, return_state=True, stateful=True) + outputs = layer(inputs) + output, state = outputs[0], outputs[1:] + assert len(state) == num_states + model = Model(inputs, state[0]) + + inputs = np.random.random((num_samples, timesteps, embedding_dim)) + state = model.predict(inputs) + np.testing.assert_allclose(K.eval(layer.states[0]), state, atol=1e-4) + + +@rnn_test +def test_state_reuse(layer_class): + inputs = Input(batch_shape=(num_samples, timesteps, embedding_dim)) + layer = layer_class(units, return_state=True, return_sequences=True) + outputs = layer(inputs) + output, state = outputs[0], outputs[1:] + output = layer_class(units)(output, initial_state=state) + model = Model(inputs, output) + + inputs = np.random.random((num_samples, timesteps, embedding_dim)) + outputs = model.predict(inputs) + + +def test_minimal_rnn_cell_non_layer(): + + class MinimalRNNCell(object): + + def __init__(self, units, input_dim): + self.units = units + self.state_size = units + self.kernel = keras.backend.variable( + np.random.random((input_dim, units))) + + def call(self, inputs, states): + prev_output = states[0] + output = keras.backend.dot(inputs, self.kernel) + prev_output + return output, [output] + + # Basic test case. + cell = MinimalRNNCell(32, 5) + x = keras.Input((None, 5)) + layer = recurrent.RNN(cell) + y = layer(x) + model = keras.models.Model(x, y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + # Test stacking. + cells = [MinimalRNNCell(8, 5), + MinimalRNNCell(32, 8), + MinimalRNNCell(32, 32)] + layer = recurrent.RNN(cells) + y = layer(x) + model = keras.models.Model(x, y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + +def test_minimal_rnn_cell_non_layer_multiple_states(): + + class MinimalRNNCell(object): + + def __init__(self, units, input_dim): + self.units = units + self.state_size = (units, units) + self.kernel = keras.backend.variable( + np.random.random((input_dim, units))) + + def call(self, inputs, states): + prev_output_1 = states[0] + prev_output_2 = states[1] + output = keras.backend.dot(inputs, self.kernel) + output += prev_output_1 + output -= prev_output_2 + return output, [output * 2, output * 3] + + # Basic test case. + cell = MinimalRNNCell(32, 5) + x = keras.Input((None, 5)) + layer = recurrent.RNN(cell) + y = layer(x) + model = keras.models.Model(x, y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + # Test stacking. + cells = [MinimalRNNCell(8, 5), + MinimalRNNCell(16, 8), + MinimalRNNCell(32, 16)] + layer = recurrent.RNN(cells) + assert layer.cell.state_size == (32, 32, 16, 16, 8, 8) + y = layer(x) + model = keras.models.Model(x, y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + +def test_minimal_rnn_cell_layer(): + + class MinimalRNNCell(keras.layers.Layer): + + def __init__(self, units, **kwargs): + self.units = units + self.state_size = units + super(MinimalRNNCell, self).__init__(**kwargs) + + def build(self, input_shape): + self.kernel = self.add_weight(shape=(input_shape[-1], self.units), + initializer='uniform', + name='kernel') + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units), + initializer='uniform', + name='recurrent_kernel') + self.built = True + + def call(self, inputs, states): + prev_output = states[0] + h = keras.backend.dot(inputs, self.kernel) + output = h + keras.backend.dot(prev_output, self.recurrent_kernel) + return output, [output] + + def get_config(self): + config = {'units': self.units} + base_config = super(MinimalRNNCell, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + # Test basic case. + x = keras.Input((None, 5)) + cell = MinimalRNNCell(32) + layer = recurrent.RNN(cell) + y = layer(x) + model = keras.models.Model(x, y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + # Test basic case serialization. + x_np = np.random.random((6, 5, 5)) + y_np = model.predict(x_np) + weights = model.get_weights() + config = layer.get_config() + with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}): + layer = recurrent.RNN.from_config(config) + y = layer(x) + model = keras.models.Model(x, y) + model.set_weights(weights) + y_np_2 = model.predict(x_np) + assert_allclose(y_np, y_np_2, atol=1e-4) + + # Test stacking. + cells = [MinimalRNNCell(8), + MinimalRNNCell(12), + MinimalRNNCell(32)] + layer = recurrent.RNN(cells) + y = layer(x) + model = keras.models.Model(x, y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) + + # Test stacked RNN serialization. + x_np = np.random.random((6, 5, 5)) + y_np = model.predict(x_np) + weights = model.get_weights() + config = layer.get_config() + with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}): + layer = recurrent.RNN.from_config(config) + y = layer(x) + model = keras.models.Model(x, y) + model.set_weights(weights) + y_np_2 = model.predict(x_np) + assert_allclose(y_np, y_np_2, atol=1e-4) + + +def test_stacked_rnn_attributes(): + cells = [recurrent.LSTMCell(3), + recurrent.LSTMCell(3, kernel_regularizer='l2')] + layer = recurrent.RNN(cells) + layer.build((None, None, 5)) + + # Test regularization losses + assert len(layer.losses) == 1 + + # Test weights + assert len(layer.trainable_weights) == 6 + cells[0].trainable = False + assert len(layer.trainable_weights) == 3 + assert len(layer.non_trainable_weights) == 3 + + # Test `get_losses_for` + x = keras.Input((None, 5)) + y = K.sum(x) + cells[0].add_loss(y, inputs=x) + assert layer.get_losses_for(x) == [y] + + +@rnn_test +def test_batch_size_equal_one(layer_class): + inputs = Input(batch_shape=(1, timesteps, embedding_dim)) + layer = layer_class(units) + outputs = layer(inputs) + model = Model(inputs, outputs) + model.compile('sgd', 'mse') + x = np.random.random((1, timesteps, embedding_dim)) + y = np.random.random((1, units)) + model.train_on_batch(x, y) + + +def test_rnn_cell_with_constants_layer(): + + class RNNCellWithConstants(keras.layers.Layer): + + def __init__(self, units, **kwargs): + self.units = units + self.state_size = units + super(RNNCellWithConstants, self).__init__(**kwargs) + + def build(self, input_shape): + if not isinstance(input_shape, list): + raise TypeError('expects constants shape') + [input_shape, constant_shape] = input_shape + # will (and should) raise if more than one constant passed + + self.input_kernel = self.add_weight( + shape=(input_shape[-1], self.units), + initializer='uniform', + name='kernel') + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units), + initializer='uniform', + name='recurrent_kernel') + self.constant_kernel = self.add_weight( + shape=(constant_shape[-1], self.units), + initializer='uniform', + name='constant_kernel') + self.built = True + + def call(self, inputs, states, constants): + [prev_output] = states + [constant] = constants + h_input = keras.backend.dot(inputs, self.input_kernel) + h_state = keras.backend.dot(prev_output, self.recurrent_kernel) + h_const = keras.backend.dot(constant, self.constant_kernel) + output = h_input + h_state + h_const + return output, [output] + + def get_config(self): + config = {'units': self.units} + base_config = super(RNNCellWithConstants, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + # Test basic case. + x = keras.Input((None, 5)) + c = keras.Input((3,)) + cell = RNNCellWithConstants(32) + layer = recurrent.RNN(cell) + y = layer(x, constants=c) + model = keras.models.Model([x, c], y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch( + [np.zeros((6, 5, 5)), np.zeros((6, 3))], + np.zeros((6, 32)) + ) + + # Test basic case serialization. + x_np = np.random.random((6, 5, 5)) + c_np = np.random.random((6, 3)) + y_np = model.predict([x_np, c_np]) + weights = model.get_weights() + config = layer.get_config() + with keras.utils.CustomObjectScope( + {'RNNCellWithConstants': RNNCellWithConstants}): + layer = recurrent.RNN.from_config(config) + y = layer(x, constants=c) + model = keras.models.Model([x, c], y) + model.set_weights(weights) + y_np_2 = model.predict([x_np, c_np]) + assert_allclose(y_np, y_np_2, atol=1e-4) def test_functional_rnn_cell(): From 568fd2e9f8521d0f755bf1b4b6ef3b06d1675d23 Mon Sep 17 00:00:00 2001 From: andhus Date: Sat, 7 Oct 2017 07:42:31 +0200 Subject: [PATCH 04/13] added basic example of functional cell --- examples/functional_rnn_cell.py | 45 +++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 examples/functional_rnn_cell.py diff --git a/examples/functional_rnn_cell.py b/examples/functional_rnn_cell.py new file mode 100644 index 000000000000..f3e67b2d104b --- /dev/null +++ b/examples/functional_rnn_cell.py @@ -0,0 +1,45 @@ +from __future__ import division, print_function + +import numpy as np + +from keras import Input +from keras.layers import add, Dense, Activation, FunctionalRNNCell, RNN, \ + concatenate, multiply, Model + +units = 32 +input_size = 5 +x = Input((input_size,)) +h_tm1 = Input((units,)) +h_ = add([Dense(units)(x), Dense(units, use_bias=False)(h_tm1)]) +h = Activation('tanh')(h_) + +# Create the cell: + +cell = FunctionalRNNCell( + inputs=x, outputs=h, input_states=h_tm1, output_states=h) + +x_sequence = Input((None, input_size)) +rnn = RNN(cell) +y = rnn(x_sequence) + +# Now we can modify the cell to make use of "external" constants: +constant_shape = (10,) +c = Input(constant_shape) +density = Dense(constant_shape[0], activation='softmax')( + concatenate([x, h_tm1])) +attention = multiply([density, c]) +h2_ = add([h, Dense(units)(attention)]) +h2 = Activation('tanh')(h2_) + +attention_cell = FunctionalRNNCell( + inputs=x, outputs=h2, input_states=h_tm1, output_states=h2, constants=c) + +attention_rnn = RNN(attention_cell) +y2 = attention_rnn(x_sequence, constants=c) +# Note that shape of c is same as in cell (no time dimension added) + +attention_model = Model([x_sequence, c], y2) + +x_sequence_arr = np.random.randn(3, 5, input_size) +c_arr = np.random.randn(3, constant_shape[0]) +y2_arr = attention_model.predict([x_sequence_arr, c_arr]) From 2f9f6f07d2652f773ae79d832205d6793accf111 Mon Sep 17 00:00:00 2001 From: andhus Date: Sun, 8 Oct 2017 18:43:58 +0200 Subject: [PATCH 05/13] new class AttentionRNN --- keras/layers/recurrent.py | 260 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 260 insertions(+) diff --git a/keras/layers/recurrent.py b/keras/layers/recurrent.py index 54e59b7c2ead..73384df16554 100644 --- a/keras/layers/recurrent.py +++ b/keras/layers/recurrent.py @@ -844,6 +844,266 @@ def get_losses_for(self, inputs=None): return super(RNN, self).get_losses_for(inputs) +class AttentionRNN(RNN): + """Base class for attentive recurrent layers. + + # Arguments + cell: A RNN cell instance supporting attention. It should implement: + - a `call(input_at_t, states_at_t, attended)` method, returning + `(output_at_t, states_at_t_plus_1)`. It must accept the keyword + argument `attended` which refers to the input(s) (tensor or + list of tensors) that is attended to an will be presented as a + whole at each timestep. + - a `state_size` attribute. This can be a single integer + (single state) in which case it is the size of the recurrent + state (which should be the same as the size of the cell + output). This can also be a list/tuple of integers + (one size per state). In this case, the first entry + (`state_size[0]`) should be the same as the size of the cell + output. + If the RNN cell is a keras layer, the input_shape passed to its + `build` method will be a list of the input shape of the regular + sequence input followed by the shape(s) of the attended. + **kwargs: See docs of super class RNN. + + # Input shapes + 3D tensor with shape `(batch_size, timesteps, input_dim)`, + (Optional) 2D tensors with shape `(batch_size, output_dim)`. + + # Attended shapes + ND tensor of the shape expected by the attentive cell. + + # Examples + + ```python + + TODO: minimal example (using functional API?) + ``` + """ + + def __init__(self, cell, **kwargs): + if isinstance(cell, (list, tuple)): + # Note: not obviously how one would want to propagate the attended + # for stacked cells, user should stack them manually into a single + # cell + raise ValueError('AttentionRNN only supports a single cell') + super(AttentionRNN, self).__init__(cell=cell, **kwargs) + # we let base class check that cel has call function before checking + # for the additional argument + if not has_arg(cell.call, 'attended'): + raise ValueError('`cell.call` does not take the keyword argument' + ' attended') + + self._n_attended = None # set in __call__, needed in build to split + # input_shape + self.attended_spec = None + + def build(self, input_shape): + attended_shapes = input_shape[-self._n_attended:] + input_shape = input_shape[0] + batch_size = input_shape[0] if self.stateful else None + input_dim = input_shape[-1] + self.input_spec[0] = InputSpec(shape=(batch_size, None, input_dim)) + + attended_specs = [InputSpec(shape=(batch_size,) + attended_shape[1:]) + for attended_shape in attended_shapes] + if len(attended_specs) > 1: + self.attended_spec = attended_specs + else: + self.attended_spec = attended_specs[0] + + if self.stateful: + self.reset_states() + + if isinstance(self.cell, Layer): + step_input_shape = (input_shape[0],) + input_shape[2:] + self.cell.build([step_input_shape] + attended_shapes) + + def __call__(self, inputs, initial_state=None, attended=None, **kwargs): + # If there are multiple inputs, then they should be the main input, + # `initial_state` and `attended` + # TODO what is meant by "e.g. when loading model from file" in comment + # in base class RNN, can there be a problem if initial states are not + # passed in the Attentive RNN with respect ot this!? + inputs, initial_state, attended = self._normalize_args( + inputs, initial_state, attended) + + if attended is None: + raise ValueError('attended input must be passed') + # we need to know length of attended in build + self._n_attended = len(attended) + + check_list = [] + if initial_state: + check_list += initial_state + if attended: + check_list += attended + # at this point check_list cannot be empty + is_keras_tensor = hasattr(check_list[0], '_keras_history') + for tensor in check_list: + if hasattr(tensor, '_keras_history') != is_keras_tensor: + raise ValueError('The initial state and attended of an RNN' + ' layer cannot be specified with a mix of' + ' Keras tensors and non-Keras tensors') + + if is_keras_tensor: + # Compute the full input spec, including state and attended + input_spec = self.input_spec + state_spec = self.state_spec + if not isinstance(input_spec, list): + input_spec = [input_spec] + if not isinstance(state_spec, list): + state_spec = [state_spec] + self.input_spec = input_spec + inputs = [inputs] + if initial_state: + self.input_spec += state_spec + inputs += initial_state + kwargs['initial_state'] = initial_state + if attended: + self.input_spec += self.external_constants_spec + inputs += attended + kwargs['attended'] = attended + + # Perform the call + output = Layer.__call__(self, inputs, **kwargs) + + # Restore original input spec + self.input_spec = input_spec + return output + else: + kwargs['initial_state'] = initial_state + if attended is not None: + kwargs['attended'] = attended + return Layer.__call__(self, inputs, **kwargs) + + def call(self, + inputs, + mask=None, + training=None, + initial_state=None, + attended=None): + # TODO this method duplicates almost everything in RNN.call, + # better solution? + + # input shape: `(samples, time (padded with zeros), input_dim)` + # note that the .build() method of subclasses MUST define + # self.input_spec and self.state_spec with complete input shapes. + if isinstance(inputs, list): + inputs = inputs[0] + if initial_state is not None: + pass + elif self.stateful: + initial_state = self.states + else: + initial_state = self.get_initial_state(inputs) + + if isinstance(mask, list): + mask = mask[0] + + if len(initial_state) != len(self.states): + raise ValueError('Layer has ' + str(len(self.states)) + + ' states but was passed ' + + str(len(initial_state)) + + ' initial states.') + input_shape = K.int_shape(inputs) + timesteps = input_shape[1] + if self.unroll and timesteps in [None, 1]: + raise ValueError('Cannot unroll a RNN if the ' + 'time dimension is undefined or equal to 1. \n' + '- If using a Sequential model, ' + 'specify the time dimension by passing ' + 'an `input_shape` or `batch_input_shape` ' + 'argument to your first layer. If your ' + 'first layer is an Embedding, you can ' + 'also use the `input_length` argument.\n' + '- If using the functional API, specify ' + 'the time dimension by passing a `shape` ' + 'or `batch_shape` argument to your Input layer.') + + cell_kwargs = {'attended': attended} + if has_arg(self.cell.call, 'training'): + cell_kwargs['training'] = training + + # NOTE: by passing the attended implicitly into the K.rnn it is not + # possible for theano backend to optimise the scan op, see section: + # "Explicitly passing inputs of the inner function to scan" in: + # http://deeplearning.net/software/theano/library/scan.html#lib-scan-shared-variables + # but on the other hand we are not passed weights (shared variables) + # of the cell transformation anyway. + step = functools.partial(self.cell.call, **cell_kwargs) + + last_output, outputs, states = K.rnn(step, + inputs, + initial_state, + go_backwards=self.go_backwards, + mask=mask, + unroll=self.unroll, + input_length=timesteps) + if self.stateful: + updates = [] + for i in range(len(states)): + updates.append((self.states[i], states[i])) + self.add_update(updates, inputs) + + if self.return_sequences: + output = outputs + else: + output = last_output + + # Properly set learning phase + if getattr(last_output, '_uses_learning_phase', False): + output._uses_learning_phase = True + + if self.return_state: + if not isinstance(states, (list, tuple)): + states = [states] + else: + states = list(states) + return [output] + states + else: + return output + + def _normalize_args(self, inputs, initial_state, attended): + """The inputs `initial_state` and `attended` can be passed to + AttentionRNN.__call__ either by separate arguments or as part of + `inputs`. In this case `inputs` is a list of tensors of which the first + one is the actual (sequence) input followed by initial states followed + by the attended. + + This method separates and normalizes the different groups of inputs. + + # Arguments + inputs: tensor of list/tuple of tensors + initial_state: tensor or list of tensors or None + attended: tensor or list of tensors or None + + # Returns + inputs: tensor + initial_state: list of tensors or None + attended: list of tensors or None + """ + if isinstance(inputs, (list, tuple)): + remaining_inputs = inputs[1:] + inputs = inputs[0] + if remaining_inputs and initial_state is None: + if isinstance(self.state_spec, list): + n_states = len(self.state_spec) + else: + n_states = 1 + initial_state = remaining_inputs[:n_states] + remaining_inputs = remaining_inputs[n_states:] + if remaining_inputs and attended is None: + attended = remaining_inputs + if len(remaining_inputs) > 0: + raise ValueError('too many inputs were passed') + + initial_state = _to_list_or_none(initial_state) + attended = _to_list_or_none(attended) + + return inputs, initial_state, attended + + class SimpleRNNCell(Layer): """Cell class for SimpleRNN. From 1b90731dd0b1bbab0fb6025d8664c447e358fded Mon Sep 17 00:00:00 2001 From: andhus Date: Sun, 8 Oct 2017 18:57:25 +0200 Subject: [PATCH 06/13] restored RNN layer --- keras/layers/recurrent.py | 1126 +++++++++++++++++-------------------- 1 file changed, 518 insertions(+), 608 deletions(-) diff --git a/keras/layers/recurrent.py b/keras/layers/recurrent.py index 73384df16554..165bebaaeb3d 100644 --- a/keras/layers/recurrent.py +++ b/keras/layers/recurrent.py @@ -298,9 +298,7 @@ class RNN(Layer): # Arguments cell: A RNN cell instance. A RNN cell is a class that has: - a `call(input_at_t, states_at_t)` method, returning - `(output_at_t, states_at_t_plus_1)`. The call method of the - cell can also take the optional argument `constants`, see - section "Note on passing external constants" below. + `(output_at_t, states_at_t_plus_1)`. - a `state_size` attribute. This can be a single integer (single state) in which case it is the size of the recurrent state @@ -329,7 +327,8 @@ class RNN(Layer): although it tends to be more memory-intensive. Unrolling is only suitable for short sequences. input_dim: dimensionality of the input (integer). - This argument (or alternatively, the keyword argument `input_shape`) + This argument (or alternatively, + the keyword argument `input_shape`) is required when using this layer as the first layer in a model. input_length: Length of input sequences, to be specified when it is constant. @@ -391,55 +390,47 @@ class RNN(Layer): `states` should be a numpy array or list of numpy arrays representing the initial state of the RNN layer. - # Note on passing external constants to RNNs - You can pass "external" constants to the cell using the `constants` - keyword argument of RNN.__call__ (as well as RNN.call) method. This - requires that the `cell.call` method accepts the same keyword argument - `constants`. Such constants can be used to condition the cell - transformation on additional static inputs (not changing over time) - (a.k.a. an attention mechanism). - # Examples ```python - # First, let's define a RNN Cell, as a layer subclass. - - class MinimalRNNCell(keras.layers.Layer): - - def __init__(self, units, **kwargs): - self.units = units - self.state_size = units - super(MinimalRNNCell, self).__init__(**kwargs) - - def build(self, input_shape): - self.kernel = self.add_weight(shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') - self.built = True - - def call(self, inputs, states): - prev_output = states[0] - h = K.dot(inputs, self.kernel) - output = h + K.dot(prev_output, self.recurrent_kernel) - return output, [output] - - # Let's use this cell in a RNN layer: - - cell = MinimalRNNCell(32) - x = keras.Input((None, 5)) - layer = RNN(cell) - y = layer(x) - - # Here's how to use the cell to build a stacked RNN: - - cells = [MinimalRNNCell(32), MinimalRNNCell(64)] - x = keras.Input((None, 5)) - layer = RNN(cells) - y = layer(x) + # First, let's define a RNN Cell, as a layer subclass. + + class MinimalRNNCell(keras.layers.Layer): + + def __init__(self, units, **kwargs): + self.units = units + self.state_size = units + super(MinimalRNNCell, self).__init__(**kwargs) + + def build(self, input_shape): + self.kernel = self.add_weight(shape=(input_shape[-1], self.units), + initializer='uniform', + name='kernel') + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units), + initializer='uniform', + name='recurrent_kernel') + self.built = True + + def call(self, inputs, states): + prev_output = states[0] + h = K.dot(inputs, self.kernel) + output = h + K.dot(prev_output, self.recurrent_kernel) + return output, [output] + + # Let's use this cell in a RNN layer: + + cell = MinimalRNNCell(32) + x = keras.Input((None, 5)) + layer = RNN(cell) + y = layer(x) + + # Here's how to use the cell to build a stacked RNN: + + cells = [MinimalRNNCell(32), MinimalRNNCell(64)] + x = keras.Input((None, 5)) + layer = RNN(cells) + y = layer(x) ``` """ @@ -477,8 +468,6 @@ def __init__(self, cell, self.state_spec = InputSpec(shape=(None, self.cell.state_size)) self._states = None - self.external_constants_spec = None - @property def states(self): if self._states is None: @@ -524,14 +513,6 @@ def compute_mask(self, inputs, mask): return output_mask def build(self, input_shape): - # Note input_shape will be list of shapes of initial states and - # constants if these are passed in __call__. - if self.external_constants_spec is not None: - # input_shape must be list - constants_shape = input_shape[-len(self.external_constants_spec):] - else: - constants_shape = None - if isinstance(input_shape, list): input_shape = input_shape[0] @@ -544,10 +525,7 @@ def build(self, input_shape): if isinstance(self.cell, Layer): step_input_shape = (input_shape[0],) + input_shape[2:] - if constants_shape is not None: - self.cell.build([step_input_shape] + constants_shape) - else: - self.cell.build(step_input_shape) + self.cell.build(step_input_shape) def get_initial_state(self, inputs): # build an all-zero tensor of shape (samples, output_dim) @@ -560,58 +538,43 @@ def get_initial_state(self, inputs): else: return [K.tile(initial_state, [1, self.cell.state_size])] - def __call__(self, inputs, initial_state=None, constants=None, **kwargs): - # If there are multiple inputs, then they should be the main input, - # `initial_state` and (optionally) `constants` e.g. when loading model - # from file # TODO ask for clarification - inputs, initial_state, constants = self._normalize_args( - inputs, initial_state, constants) - - # we need to know length of constants in build - if constants: - self.external_constants_spec = [ - InputSpec(shape=K.int_shape(constant)) - for constant in constants - ] - - if initial_state is None and constants is None: + def __call__(self, inputs, initial_state=None, **kwargs): + # If there are multiple inputs, then + # they should be the main input and `initial_state` + # e.g. when loading model from file + if isinstance(inputs, (list, tuple)) and len(inputs) > 1 and initial_state is None: + initial_state = inputs[1:] + inputs = inputs[0] + + # If `initial_state` is specified, + # and if it a Keras tensor, + # then add it to the inputs and temporarily + # modify the input spec to include the state. + if initial_state is None: return super(RNN, self).__call__(inputs, **kwargs) - # If any of `initial_state` or `constants` are specified and are Keras - # tensors, then add them to the inputs and temporarily modify the - # input_spec to include them. + if not isinstance(initial_state, (list, tuple)): + initial_state = [initial_state] - check_list = [] - if initial_state: - check_list += initial_state - if constants: - check_list += constants - # at this point check_list cannot be empty - is_keras_tensor = hasattr(check_list[0], '_keras_history') - for tensor in check_list: + is_keras_tensor = hasattr(initial_state[0], '_keras_history') + for tensor in initial_state: if hasattr(tensor, '_keras_history') != is_keras_tensor: - raise ValueError('The initial state and constants of an RNN' - ' layer cannot be specified with a mix of' - ' Keras tensors and non-Keras tensors') + raise ValueError('The initial state of an RNN layer cannot be' + ' specified with a mix of Keras tensors and' + ' non-Keras tensors') if is_keras_tensor: - # Compute the full input spec, including state and constants + # Compute the full input spec, including state input_spec = self.input_spec state_spec = self.state_spec if not isinstance(input_spec, list): input_spec = [input_spec] if not isinstance(state_spec, list): state_spec = [state_spec] - self.input_spec = input_spec - inputs = [inputs] - if initial_state: - self.input_spec += state_spec - inputs += initial_state - kwargs['initial_state'] = initial_state - if constants: - self.input_spec += self.external_constants_spec - inputs += constants - kwargs['constants'] = constants + self.input_spec = input_spec + state_spec + + # Compute the full inputs, including state + inputs = [inputs] + list(initial_state) # Perform the call output = super(RNN, self).__call__(inputs, **kwargs) @@ -621,22 +584,16 @@ def __call__(self, inputs, initial_state=None, constants=None, **kwargs): return output else: kwargs['initial_state'] = initial_state - if constants is not None: - kwargs['constants'] = constants return super(RNN, self).__call__(inputs, **kwargs) - def call(self, - inputs, - mask=None, - training=None, - initial_state=None, - constants=None): + def call(self, inputs, mask=None, training=None, initial_state=None): # input shape: `(samples, time (padded with zeros), input_dim)` # note that the .build() method of subclasses MUST define # self.input_spec and self.state_spec with complete input shapes. if isinstance(inputs, list): + initial_state = inputs[1:] inputs = inputs[0] - if initial_state is not None: + elif initial_state is not None: pass elif self.stateful: initial_state = self.states @@ -665,17 +622,9 @@ def call(self, '- If using the functional API, specify ' 'the time dimension by passing a `shape` ' 'or `batch_shape` argument to your Input layer.') - cell_kwargs = {} - if has_arg(self.cell.call, 'training'): - cell_kwargs['training'] = training - - if constants is not None: - if not has_arg(self.cell.call, 'constants'): - raise TypeError('cell does not take keyword argument constants') - cell_kwargs['constants'] = constants - if cell_kwargs: - step = functools.partial(self.cell.call, **cell_kwargs) + if has_arg(self.cell.call, 'training'): + step = functools.partial(self.cell.call, training=training) else: step = self.cell.call last_output, outputs, states = K.rnn(step, @@ -709,45 +658,6 @@ def call(self, else: return output - def _normalize_args(self, inputs, initial_state=None, constants=None): - """The inputs `initial_state` and `constants` can be passed to - RNN.__call__ either by separate arguments or as part of `inputs`. In - this case `inputs` is a list of tensors of which the first one is the - actual (sequence) input followed by initial states, followed by - constants. - - This method separates and noramlizes the different groups of inputs. - - # Arguments - inputs: tensor of list/tuple of tensors - initial_state: tensor or list of tensors or None - constants: tensor or list of tensors or None - - # Returns - inputs: tensor - initial_state: list of tensors or None - constants: list of tensors or None - """ - if isinstance(inputs, (list, tuple)): - remaining_inputs = inputs[1:] - inputs = inputs[0] - if remaining_inputs and initial_state is None: - if isinstance(self.state_spec, list): - n_states = len(self.state_spec) - else: - n_states = 1 - initial_state = remaining_inputs[:n_states] - remaining_inputs = remaining_inputs[n_states:] - if remaining_inputs and constants is None: - constants = remaining_inputs - if len(remaining_inputs) > 0: - raise ValueError('too many inputs were passed') - - initial_state = _to_list_or_none(initial_state) - constants = _to_list_or_none(constants) - - return inputs, initial_state, constants - def reset_states(self, states=None): if not self.stateful: raise AttributeError('Layer must be stateful.') @@ -844,268 +754,168 @@ def get_losses_for(self, inputs=None): return super(RNN, self).get_losses_for(inputs) -class AttentionRNN(RNN): - """Base class for attentive recurrent layers. +class SimpleRNNCell(Layer): + """Cell class for SimpleRNN. # Arguments - cell: A RNN cell instance supporting attention. It should implement: - - a `call(input_at_t, states_at_t, attended)` method, returning - `(output_at_t, states_at_t_plus_1)`. It must accept the keyword - argument `attended` which refers to the input(s) (tensor or - list of tensors) that is attended to an will be presented as a - whole at each timestep. - - a `state_size` attribute. This can be a single integer - (single state) in which case it is the size of the recurrent - state (which should be the same as the size of the cell - output). This can also be a list/tuple of integers - (one size per state). In this case, the first entry - (`state_size[0]`) should be the same as the size of the cell - output. - If the RNN cell is a keras layer, the input_shape passed to its - `build` method will be a list of the input shape of the regular - sequence input followed by the shape(s) of the attended. - **kwargs: See docs of super class RNN. + units: Positive integer, dimensionality of the output space. + activation: Activation function to use + (see [activations](../activations.md)). + If you pass None, no activation is applied + (ie. "linear" activation: `a(x) = x`). + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix, + used for the linear transformation of the inputs. + (see [initializers](../initializers.md)). + recurrent_initializer: Initializer for the `recurrent_kernel` + weights matrix, + used for the linear transformation of the recurrent state. + (see [initializers](../initializers.md)). + bias_initializer: Initializer for the bias vector + (see [initializers](../initializers.md)). + kernel_regularizer: Regularizer function applied to + the `kernel` weights matrix + (see [regularizer](../regularizers.md)). + recurrent_regularizer: Regularizer function applied to + the `recurrent_kernel` weights matrix + (see [regularizer](../regularizers.md)). + bias_regularizer: Regularizer function applied to the bias vector + (see [regularizer](../regularizers.md)). + activity_regularizer: Regularizer function applied to + the output of the layer (its "activation"). + (see [regularizer](../regularizers.md)). + kernel_constraint: Constraint function applied to + the `kernel` weights matrix + (see [constraints](../constraints.md)). + recurrent_constraint: Constraint function applied to + the `recurrent_kernel` weights matrix + (see [constraints](../constraints.md)). + bias_constraint: Constraint function applied to the bias vector + (see [constraints](../constraints.md)). + dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the inputs. + recurrent_dropout: Float between 0 and 1. + Fraction of the units to drop for + the linear transformation of the recurrent state. + """ - # Input shapes - 3D tensor with shape `(batch_size, timesteps, input_dim)`, - (Optional) 2D tensors with shape `(batch_size, output_dim)`. + def __init__(self, units, + activation='tanh', + use_bias=True, + kernel_initializer='glorot_uniform', + recurrent_initializer='orthogonal', + bias_initializer='zeros', + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0., + recurrent_dropout=0., + **kwargs): + super(SimpleRNNCell, self).__init__(**kwargs) + self.units = units + self.activation = activations.get(activation) + self.use_bias = use_bias - # Attended shapes - ND tensor of the shape expected by the attentive cell. + self.kernel_initializer = initializers.get(kernel_initializer) + self.recurrent_initializer = initializers.get(recurrent_initializer) + self.bias_initializer = initializers.get(bias_initializer) - # Examples + self.kernel_regularizer = regularizers.get(kernel_regularizer) + self.recurrent_regularizer = regularizers.get(recurrent_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) - ```python + self.kernel_constraint = constraints.get(kernel_constraint) + self.recurrent_constraint = constraints.get(recurrent_constraint) + self.bias_constraint = constraints.get(bias_constraint) - TODO: minimal example (using functional API?) - ``` - """ + self.dropout = min(1., max(0., dropout)) + self.recurrent_dropout = min(1., max(0., recurrent_dropout)) + self.state_size = self.units + self._dropout_mask = None + self._recurrent_dropout_mask = None - def __init__(self, cell, **kwargs): - if isinstance(cell, (list, tuple)): - # Note: not obviously how one would want to propagate the attended - # for stacked cells, user should stack them manually into a single - # cell - raise ValueError('AttentionRNN only supports a single cell') - super(AttentionRNN, self).__init__(cell=cell, **kwargs) - # we let base class check that cel has call function before checking - # for the additional argument - if not has_arg(cell.call, 'attended'): - raise ValueError('`cell.call` does not take the keyword argument' - ' attended') + def build(self, input_shape): + self.kernel = self.add_weight(shape=(input_shape[-1], self.units), + name='kernel', + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint) + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units), + name='recurrent_kernel', + initializer=self.recurrent_initializer, + regularizer=self.recurrent_regularizer, + constraint=self.recurrent_constraint) + if self.use_bias: + self.bias = self.add_weight(shape=(self.units,), + name='bias', + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint) + else: + self.bias = None + self.built = True - self._n_attended = None # set in __call__, needed in build to split - # input_shape - self.attended_spec = None + def _generate_dropout_mask(self, inputs, training=None): + if 0 < self.dropout < 1: + ones = K.ones_like(K.squeeze(inputs[:, 0:1, :], axis=1)) - def build(self, input_shape): - attended_shapes = input_shape[-self._n_attended:] - input_shape = input_shape[0] - batch_size = input_shape[0] if self.stateful else None - input_dim = input_shape[-1] - self.input_spec[0] = InputSpec(shape=(batch_size, None, input_dim)) + def dropped_inputs(): + return K.dropout(ones, self.dropout) - attended_specs = [InputSpec(shape=(batch_size,) + attended_shape[1:]) - for attended_shape in attended_shapes] - if len(attended_specs) > 1: - self.attended_spec = attended_specs + self._dropout_mask = K.in_train_phase( + dropped_inputs, + ones, + training=training) else: - self.attended_spec = attended_specs[0] + self._dropout_mask = None - if self.stateful: - self.reset_states() + def _generate_recurrent_dropout_mask(self, inputs, training=None): + if 0 < self.recurrent_dropout < 1: + ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) + ones = K.tile(ones, (1, self.units)) - if isinstance(self.cell, Layer): - step_input_shape = (input_shape[0],) + input_shape[2:] - self.cell.build([step_input_shape] + attended_shapes) + def dropped_inputs(): + return K.dropout(ones, self.dropout) - def __call__(self, inputs, initial_state=None, attended=None, **kwargs): - # If there are multiple inputs, then they should be the main input, - # `initial_state` and `attended` - # TODO what is meant by "e.g. when loading model from file" in comment - # in base class RNN, can there be a problem if initial states are not - # passed in the Attentive RNN with respect ot this!? - inputs, initial_state, attended = self._normalize_args( - inputs, initial_state, attended) + self._recurrent_dropout_mask = K.in_train_phase( + dropped_inputs, + ones, + training=training) + else: + self._recurrent_dropout_mask = None - if attended is None: - raise ValueError('attended input must be passed') - # we need to know length of attended in build - self._n_attended = len(attended) - - check_list = [] - if initial_state: - check_list += initial_state - if attended: - check_list += attended - # at this point check_list cannot be empty - is_keras_tensor = hasattr(check_list[0], '_keras_history') - for tensor in check_list: - if hasattr(tensor, '_keras_history') != is_keras_tensor: - raise ValueError('The initial state and attended of an RNN' - ' layer cannot be specified with a mix of' - ' Keras tensors and non-Keras tensors') - - if is_keras_tensor: - # Compute the full input spec, including state and attended - input_spec = self.input_spec - state_spec = self.state_spec - if not isinstance(input_spec, list): - input_spec = [input_spec] - if not isinstance(state_spec, list): - state_spec = [state_spec] - self.input_spec = input_spec - inputs = [inputs] - if initial_state: - self.input_spec += state_spec - inputs += initial_state - kwargs['initial_state'] = initial_state - if attended: - self.input_spec += self.external_constants_spec - inputs += attended - kwargs['attended'] = attended - - # Perform the call - output = Layer.__call__(self, inputs, **kwargs) - - # Restore original input spec - self.input_spec = input_spec - return output - else: - kwargs['initial_state'] = initial_state - if attended is not None: - kwargs['attended'] = attended - return Layer.__call__(self, inputs, **kwargs) - - def call(self, - inputs, - mask=None, - training=None, - initial_state=None, - attended=None): - # TODO this method duplicates almost everything in RNN.call, - # better solution? - - # input shape: `(samples, time (padded with zeros), input_dim)` - # note that the .build() method of subclasses MUST define - # self.input_spec and self.state_spec with complete input shapes. - if isinstance(inputs, list): - inputs = inputs[0] - if initial_state is not None: - pass - elif self.stateful: - initial_state = self.states - else: - initial_state = self.get_initial_state(inputs) - - if isinstance(mask, list): - mask = mask[0] - - if len(initial_state) != len(self.states): - raise ValueError('Layer has ' + str(len(self.states)) + - ' states but was passed ' + - str(len(initial_state)) + - ' initial states.') - input_shape = K.int_shape(inputs) - timesteps = input_shape[1] - if self.unroll and timesteps in [None, 1]: - raise ValueError('Cannot unroll a RNN if the ' - 'time dimension is undefined or equal to 1. \n' - '- If using a Sequential model, ' - 'specify the time dimension by passing ' - 'an `input_shape` or `batch_input_shape` ' - 'argument to your first layer. If your ' - 'first layer is an Embedding, you can ' - 'also use the `input_length` argument.\n' - '- If using the functional API, specify ' - 'the time dimension by passing a `shape` ' - 'or `batch_shape` argument to your Input layer.') - - cell_kwargs = {'attended': attended} - if has_arg(self.cell.call, 'training'): - cell_kwargs['training'] = training - - # NOTE: by passing the attended implicitly into the K.rnn it is not - # possible for theano backend to optimise the scan op, see section: - # "Explicitly passing inputs of the inner function to scan" in: - # http://deeplearning.net/software/theano/library/scan.html#lib-scan-shared-variables - # but on the other hand we are not passed weights (shared variables) - # of the cell transformation anyway. - step = functools.partial(self.cell.call, **cell_kwargs) - - last_output, outputs, states = K.rnn(step, - inputs, - initial_state, - go_backwards=self.go_backwards, - mask=mask, - unroll=self.unroll, - input_length=timesteps) - if self.stateful: - updates = [] - for i in range(len(states)): - updates.append((self.states[i], states[i])) - self.add_update(updates, inputs) - - if self.return_sequences: - output = outputs - else: - output = last_output - - # Properly set learning phase - if getattr(last_output, '_uses_learning_phase', False): - output._uses_learning_phase = True + def call(self, inputs, states, training=None): + prev_output = states[0] + dp_mask = self._dropout_mask + rec_dp_mask = self._recurrent_dropout_mask - if self.return_state: - if not isinstance(states, (list, tuple)): - states = [states] - else: - states = list(states) - return [output] + states + if dp_mask is not None: + h = K.dot(inputs * dp_mask, self.kernel) else: - return output - - def _normalize_args(self, inputs, initial_state, attended): - """The inputs `initial_state` and `attended` can be passed to - AttentionRNN.__call__ either by separate arguments or as part of - `inputs`. In this case `inputs` is a list of tensors of which the first - one is the actual (sequence) input followed by initial states followed - by the attended. - - This method separates and normalizes the different groups of inputs. - - # Arguments - inputs: tensor of list/tuple of tensors - initial_state: tensor or list of tensors or None - attended: tensor or list of tensors or None - - # Returns - inputs: tensor - initial_state: list of tensors or None - attended: list of tensors or None - """ - if isinstance(inputs, (list, tuple)): - remaining_inputs = inputs[1:] - inputs = inputs[0] - if remaining_inputs and initial_state is None: - if isinstance(self.state_spec, list): - n_states = len(self.state_spec) - else: - n_states = 1 - initial_state = remaining_inputs[:n_states] - remaining_inputs = remaining_inputs[n_states:] - if remaining_inputs and attended is None: - attended = remaining_inputs - if len(remaining_inputs) > 0: - raise ValueError('too many inputs were passed') + h = K.dot(inputs, self.kernel) + if self.bias is not None: + h = K.bias_add(h, self.bias) - initial_state = _to_list_or_none(initial_state) - attended = _to_list_or_none(attended) + if rec_dp_mask is not None: + prev_output *= rec_dp_mask + output = h + K.dot(prev_output, self.recurrent_kernel) + if self.activation is not None: + output = self.activation(output) - return inputs, initial_state, attended + # Properly set learning phase on output tensor. + if 0 < self.dropout + self.recurrent_dropout: + if training is None: + output._uses_learning_phase = True + return output, [output] -class SimpleRNNCell(Layer): - """Cell class for SimpleRNN. +class SimpleRNN(RNN): + """Fully-connected RNN where the output is to be fed back to input. # Arguments units: Positive integer, dimensionality of the output space. @@ -1150,6 +960,7 @@ class SimpleRNNCell(Layer): the linear transformation of the recurrent state. """ + @interfaces.legacy_recurrent_support def __init__(self, units, activation='tanh', use_bias=True, @@ -1159,222 +970,61 @@ def __init__(self, units, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, + activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0., recurrent_dropout=0., **kwargs): - super(SimpleRNNCell, self).__init__(**kwargs) - self.units = units - self.activation = activations.get(activation) - self.use_bias = use_bias + if 'implementation' in kwargs: + kwargs.pop('implementation') + warnings.warn('The `implementation` argument ' + 'in `SimpleRNN` has been deprecated. ' + 'Please remove it from your layer call.') + if K.backend() == 'cntk': + if not kwargs.get('unroll') and (dropout > 0 or recurrent_dropout > 0): + warnings.warn( + 'RNN dropout is not supported with the CNTK backend ' + 'when using dynamic RNNs (i.e. non-unrolled). ' + 'You can either set `unroll=True`, ' + 'set `dropout` and `recurrent_dropout` to 0, ' + 'or use a different backend.') + dropout = 0. + recurrent_dropout = 0. - self.kernel_initializer = initializers.get(kernel_initializer) - self.recurrent_initializer = initializers.get(recurrent_initializer) - self.bias_initializer = initializers.get(bias_initializer) + cell = SimpleRNNCell(units, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + recurrent_initializer=recurrent_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + recurrent_regularizer=recurrent_regularizer, + bias_regularizer=bias_regularizer, + kernel_constraint=kernel_constraint, + recurrent_constraint=recurrent_constraint, + bias_constraint=bias_constraint, + dropout=dropout, + recurrent_dropout=recurrent_dropout) + super(SimpleRNN, self).__init__(cell, **kwargs) + self.activity_regularizer = regularizers.get(activity_regularizer) - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.recurrent_regularizer = regularizers.get(recurrent_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) + def call(self, inputs, mask=None, training=None, initial_state=None): + self.cell._generate_dropout_mask(inputs, training=training) + self.cell._generate_recurrent_dropout_mask(inputs, training=training) + return super(SimpleRNN, self).call(inputs, + mask=mask, + training=training, + initial_state=initial_state) - self.kernel_constraint = constraints.get(kernel_constraint) - self.recurrent_constraint = constraints.get(recurrent_constraint) - self.bias_constraint = constraints.get(bias_constraint) + @property + def units(self): + return self.cell.units - self.dropout = min(1., max(0., dropout)) - self.recurrent_dropout = min(1., max(0., recurrent_dropout)) - self.state_size = self.units - self._dropout_mask = None - self._recurrent_dropout_mask = None - - def build(self, input_shape): - self.kernel = self.add_weight(shape=(input_shape[-1], self.units), - name='kernel', - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - name='recurrent_kernel', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - if self.use_bias: - self.bias = self.add_weight(shape=(self.units,), - name='bias', - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = None - self.built = True - - def _generate_dropout_mask(self, inputs, training=None): - if 0 < self.dropout < 1: - ones = K.ones_like(K.squeeze(inputs[:, 0:1, :], axis=1)) - - def dropped_inputs(): - return K.dropout(ones, self.dropout) - - self._dropout_mask = K.in_train_phase( - dropped_inputs, - ones, - training=training) - else: - self._dropout_mask = None - - def _generate_recurrent_dropout_mask(self, inputs, training=None): - if 0 < self.recurrent_dropout < 1: - ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) - ones = K.tile(ones, (1, self.units)) - - def dropped_inputs(): - return K.dropout(ones, self.dropout) - - self._recurrent_dropout_mask = K.in_train_phase( - dropped_inputs, - ones, - training=training) - else: - self._recurrent_dropout_mask = None - - def call(self, inputs, states, training=None): - prev_output = states[0] - dp_mask = self._dropout_mask - rec_dp_mask = self._recurrent_dropout_mask - - if dp_mask is not None: - h = K.dot(inputs * dp_mask, self.kernel) - else: - h = K.dot(inputs, self.kernel) - if self.bias is not None: - h = K.bias_add(h, self.bias) - - if rec_dp_mask is not None: - prev_output *= rec_dp_mask - output = h + K.dot(prev_output, self.recurrent_kernel) - if self.activation is not None: - output = self.activation(output) - - # Properly set learning phase on output tensor. - if 0 < self.dropout + self.recurrent_dropout: - if training is None: - output._uses_learning_phase = True - return output, [output] - - -class SimpleRNN(RNN): - """Fully-connected RNN where the output is to be fed back to input. - - # Arguments - units: Positive integer, dimensionality of the output space. - activation: Activation function to use - (see [activations](../activations.md)). - If you pass None, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. - (see [initializers](../initializers.md)). - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, - used for the linear transformation of the recurrent state. - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to - the `kernel` weights matrix - (see [constraints](../constraints.md)). - recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the inputs. - recurrent_dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the recurrent state. - """ - - @interfaces.legacy_recurrent_support - def __init__(self, units, - activation='tanh', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - dropout=0., - recurrent_dropout=0., - **kwargs): - if 'implementation' in kwargs: - kwargs.pop('implementation') - warnings.warn('The `implementation` argument ' - 'in `SimpleRNN` has been deprecated. ' - 'Please remove it from your layer call.') - if K.backend() == 'cntk': - if not kwargs.get('unroll') and (dropout > 0 or recurrent_dropout > 0): - warnings.warn( - 'RNN dropout is not supported with the CNTK backend ' - 'when using dynamic RNNs (i.e. non-unrolled). ' - 'You can either set `unroll=True`, ' - 'set `dropout` and `recurrent_dropout` to 0, ' - 'or use a different backend.') - dropout = 0. - recurrent_dropout = 0. - - cell = SimpleRNNCell(units, - activation=activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - recurrent_initializer=recurrent_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - recurrent_regularizer=recurrent_regularizer, - bias_regularizer=bias_regularizer, - kernel_constraint=kernel_constraint, - recurrent_constraint=recurrent_constraint, - bias_constraint=bias_constraint, - dropout=dropout, - recurrent_dropout=recurrent_dropout) - super(SimpleRNN, self).__init__(cell, **kwargs) - self.activity_regularizer = regularizers.get(activity_regularizer) - - def call(self, inputs, mask=None, training=None, initial_state=None): - self.cell._generate_dropout_mask(inputs, training=training) - self.cell._generate_recurrent_dropout_mask(inputs, training=training) - return super(SimpleRNN, self).call(inputs, - mask=mask, - training=training, - initial_state=initial_state) - - @property - def units(self): - return self.cell.units - - @property - def activation(self): - return self.cell.activation + @property + def activation(self): + return self.cell.activation @property def use_bias(self): @@ -2369,6 +2019,266 @@ def from_config(cls, config): return cls(**config) +class AttentionRNN(RNN): + """Base class for attentive recurrent layers. + + # Arguments + cell: A RNN cell instance supporting attention. It should implement: + - a `call(input_at_t, states_at_t, attended)` method, returning + `(output_at_t, states_at_t_plus_1)`. It must accept the keyword + argument `attended` which refers to the input(s) (tensor or + list of tensors) that is attended to an will be presented as a + whole at each timestep. + - a `state_size` attribute. This can be a single integer + (single state) in which case it is the size of the recurrent + state (which should be the same as the size of the cell + output). This can also be a list/tuple of integers + (one size per state). In this case, the first entry + (`state_size[0]`) should be the same as the size of the cell + output. + If the RNN cell is a keras layer, the input_shape passed to its + `build` method will be a list of the input shape of the regular + sequence input followed by the shape(s) of the attended. + **kwargs: See docs of super class RNN. + + # Input shapes + 3D tensor with shape `(batch_size, timesteps, input_dim)`, + (Optional) 2D tensors with shape `(batch_size, output_dim)`. + + # Attended shapes + ND tensor of the shape expected by the attentive cell. + + # Examples + + ```python + + TODO: minimal example (using functional API?) + ``` + """ + + def __init__(self, cell, **kwargs): + if isinstance(cell, (list, tuple)): + # Note: not obviously how one would want to propagate the attended + # for stacked cells, user should stack them manually into a single + # cell + raise ValueError('AttentionRNN only supports a single cell') + super(AttentionRNN, self).__init__(cell=cell, **kwargs) + # we let base class check that cel has call function before checking + # for the additional argument + if not has_arg(cell.call, 'attended'): + raise ValueError('`cell.call` does not take the keyword argument' + ' attended') + + self._n_attended = None # set in __call__, needed in build to split + # input_shape + self.attended_spec = None + + def build(self, input_shape): + attended_shapes = input_shape[-self._n_attended:] + input_shape = input_shape[0] + batch_size = input_shape[0] if self.stateful else None + input_dim = input_shape[-1] + self.input_spec[0] = InputSpec(shape=(batch_size, None, input_dim)) + + attended_specs = [InputSpec(shape=(batch_size,) + attended_shape[1:]) + for attended_shape in attended_shapes] + if len(attended_specs) > 1: + self.attended_spec = attended_specs + else: + self.attended_spec = attended_specs[0] + + if self.stateful: + self.reset_states() + + if isinstance(self.cell, Layer): + step_input_shape = (input_shape[0],) + input_shape[2:] + self.cell.build([step_input_shape] + attended_shapes) + + def __call__(self, inputs, initial_state=None, attended=None, **kwargs): + # If there are multiple inputs, then they should be the main input, + # `initial_state` and `attended` + # TODO what is meant by "e.g. when loading model from file" in comment + # in base class RNN, can there be a problem if initial states are not + # passed in the Attentive RNN with respect ot this!? + inputs, initial_state, attended = self._normalize_args( + inputs, initial_state, attended) + + if attended is None: + raise ValueError('attended input must be passed') + # we need to know length of attended in build + self._n_attended = len(attended) + + check_list = [] + if initial_state: + check_list += initial_state + if attended: + check_list += attended + # at this point check_list cannot be empty + is_keras_tensor = hasattr(check_list[0], '_keras_history') + for tensor in check_list: + if hasattr(tensor, '_keras_history') != is_keras_tensor: + raise ValueError('The initial state and attended of an RNN' + ' layer cannot be specified with a mix of' + ' Keras tensors and non-Keras tensors') + + if is_keras_tensor: + # Compute the full input spec, including state and attended + input_spec = self.input_spec + state_spec = self.state_spec + if not isinstance(input_spec, list): + input_spec = [input_spec] + if not isinstance(state_spec, list): + state_spec = [state_spec] + self.input_spec = input_spec + inputs = [inputs] + if initial_state: + self.input_spec += state_spec + inputs += initial_state + kwargs['initial_state'] = initial_state + if attended: + self.input_spec += self.external_constants_spec + inputs += attended + kwargs['attended'] = attended + + # Perform the call + output = Layer.__call__(self, inputs, **kwargs) + + # Restore original input spec + self.input_spec = input_spec + return output + else: + kwargs['initial_state'] = initial_state + if attended is not None: + kwargs['attended'] = attended + return Layer.__call__(self, inputs, **kwargs) + + def call(self, + inputs, + mask=None, + training=None, + initial_state=None, + attended=None): + # TODO this method duplicates almost everything in RNN.call, + # better solution? + + # input shape: `(samples, time (padded with zeros), input_dim)` + # note that the .build() method of subclasses MUST define + # self.input_spec and self.state_spec with complete input shapes. + if isinstance(inputs, list): + inputs = inputs[0] + if initial_state is not None: + pass + elif self.stateful: + initial_state = self.states + else: + initial_state = self.get_initial_state(inputs) + + if isinstance(mask, list): + mask = mask[0] + + if len(initial_state) != len(self.states): + raise ValueError('Layer has ' + str(len(self.states)) + + ' states but was passed ' + + str(len(initial_state)) + + ' initial states.') + input_shape = K.int_shape(inputs) + timesteps = input_shape[1] + if self.unroll and timesteps in [None, 1]: + raise ValueError('Cannot unroll a RNN if the ' + 'time dimension is undefined or equal to 1. \n' + '- If using a Sequential model, ' + 'specify the time dimension by passing ' + 'an `input_shape` or `batch_input_shape` ' + 'argument to your first layer. If your ' + 'first layer is an Embedding, you can ' + 'also use the `input_length` argument.\n' + '- If using the functional API, specify ' + 'the time dimension by passing a `shape` ' + 'or `batch_shape` argument to your Input layer.') + + cell_kwargs = {'attended': attended} + if has_arg(self.cell.call, 'training'): + cell_kwargs['training'] = training + + # NOTE: by passing the attended implicitly into the K.rnn it is not + # possible for theano backend to optimise the scan op, see section: + # "Explicitly passing inputs of the inner function to scan" in: + # http://deeplearning.net/software/theano/library/scan.html#lib-scan-shared-variables + # but on the other hand we are not passed weights (shared variables) + # of the cell transformation anyway. + step = functools.partial(self.cell.call, **cell_kwargs) + + last_output, outputs, states = K.rnn(step, + inputs, + initial_state, + go_backwards=self.go_backwards, + mask=mask, + unroll=self.unroll, + input_length=timesteps) + if self.stateful: + updates = [] + for i in range(len(states)): + updates.append((self.states[i], states[i])) + self.add_update(updates, inputs) + + if self.return_sequences: + output = outputs + else: + output = last_output + + # Properly set learning phase + if getattr(last_output, '_uses_learning_phase', False): + output._uses_learning_phase = True + + if self.return_state: + if not isinstance(states, (list, tuple)): + states = [states] + else: + states = list(states) + return [output] + states + else: + return output + + def _normalize_args(self, inputs, initial_state, attended): + """The inputs `initial_state` and `attended` can be passed to + AttentionRNN.__call__ either by separate arguments or as part of + `inputs`. In this case `inputs` is a list of tensors of which the first + one is the actual (sequence) input followed by initial states followed + by the attended. + + This method separates and normalizes the different groups of inputs. + + # Arguments + inputs: tensor of list/tuple of tensors + initial_state: tensor or list of tensors or None + attended: tensor or list of tensors or None + + # Returns + inputs: tensor + initial_state: list of tensors or None + attended: list of tensors or None + """ + if isinstance(inputs, (list, tuple)): + remaining_inputs = inputs[1:] + inputs = inputs[0] + if remaining_inputs and initial_state is None: + if isinstance(self.state_spec, list): + n_states = len(self.state_spec) + else: + n_states = 1 + initial_state = remaining_inputs[:n_states] + remaining_inputs = remaining_inputs[n_states:] + if remaining_inputs and attended is None: + attended = remaining_inputs + if len(remaining_inputs) > 0: + raise ValueError('too many inputs were passed') + + initial_state = _to_list_or_none(initial_state) + attended = _to_list_or_none(attended) + + return inputs, initial_state, attended + + def _to_list_or_none(x): # TODO move? Very similar to topology._to_list if x is None or isinstance(x, list): return x From e74b125b9b06b00912184bf049589719a9441758 Mon Sep 17 00:00:00 2001 From: andhus Date: Sun, 8 Oct 2017 22:34:22 +0200 Subject: [PATCH 07/13] renamed constants to attended in FunctionRNNCell, avoided duplicating outputs in wrapped model --- examples/functional_rnn_cell.py | 46 +++++---- keras/layers/recurrent.py | 138 +++++++++++++++++---------- tests/keras/layers/recurrent_test.py | 44 +++++---- 3 files changed, 138 insertions(+), 90 deletions(-) diff --git a/examples/functional_rnn_cell.py b/examples/functional_rnn_cell.py index f3e67b2d104b..14287209173b 100644 --- a/examples/functional_rnn_cell.py +++ b/examples/functional_rnn_cell.py @@ -4,42 +4,46 @@ from keras import Input from keras.layers import add, Dense, Activation, FunctionalRNNCell, RNN, \ - concatenate, multiply, Model + concatenate, multiply, Model, AttentionRNN units = 32 input_size = 5 x = Input((input_size,)) -h_tm1 = Input((units,)) -h_ = add([Dense(units)(x), Dense(units, use_bias=False)(h_tm1)]) -h = Activation('tanh')(h_) +h_in = Input((units,)) +h_ = add([Dense(units)(x), Dense(units, use_bias=False)(h_in)]) +h_out = Activation('tanh')(h_) # Create the cell: - cell = FunctionalRNNCell( - inputs=x, outputs=h, input_states=h_tm1, output_states=h) + inputs=x, outputs=h_out, input_states=h_in, output_states=h_out) x_sequence = Input((None, input_size)) rnn = RNN(cell) y = rnn(x_sequence) -# Now we can modify the cell to make use of "external" constants: -constant_shape = (10,) -c = Input(constant_shape) -density = Dense(constant_shape[0], activation='softmax')( - concatenate([x, h_tm1])) -attention = multiply([density, c]) -h2_ = add([h, Dense(units)(attention)]) -h2 = Activation('tanh')(h2_) +# Modify the cell to make use of attention to "external" constants: +attended_shape = (10,) +attended = Input(attended_shape) +density = Dense(attended_shape[0], activation='softmax')( + concatenate([x, h_in])) +attention = multiply([density, attended]) +h2_ = add([h_out, Dense(units)(attention)]) +h_out_2 = Activation('tanh')(h2_) attention_cell = FunctionalRNNCell( - inputs=x, outputs=h2, input_states=h_tm1, output_states=h2, constants=c) - -attention_rnn = RNN(attention_cell) -y2 = attention_rnn(x_sequence, constants=c) + inputs=x, + outputs=h_out_2, + input_states=h_in, + output_states=h_out_2, + attended=attended +) + +attention_rnn = AttentionRNN(attention_cell) +y2 = attention_rnn(x_sequence, attended=attended) # Note that shape of c is same as in cell (no time dimension added) -attention_model = Model([x_sequence, c], y2) +attention_model = Model([x_sequence, attended], y2) x_sequence_arr = np.random.randn(3, 5, input_size) -c_arr = np.random.randn(3, constant_shape[0]) -y2_arr = attention_model.predict([x_sequence_arr, c_arr]) +attended_arr = np.random.randn(3, attended_shape[0]) +y2_arr = attention_model.predict([x_sequence_arr, attended_arr]) diff --git a/keras/layers/recurrent.py b/keras/layers/recurrent.py index c35ceb19fc50..abb00ece6c1b 100644 --- a/keras/layers/recurrent.py +++ b/keras/layers/recurrent.py @@ -204,8 +204,9 @@ class FunctionalRNNCell(Wrapper): outputs: output tensor at a single timestep input_states: state tensor(s) from previous time step output_states: state tensor(s) after cell transformation - constants: tensor(s) or None, represents inputs that should be static - (the same) for each time step. + attended: tensor(s) or None, represents inputs that should be static + (the same) for each time step. Used for implementing attention + mechanisms. # Examples @@ -232,26 +233,26 @@ class FunctionalRNNCell(Wrapper): # We can also define cells that make use of "external" constants, to # implement attention mechanisms: - constant_shape = (10,) - c = Input(constant_shape) - density = Dense(constant_shape[0], activation='softmax')( + attended_shape = (10,) + attended = Input(attended_shape) + density = Dense(attended_shape[0], activation='softmax')( concatenate([x, h_tm1])) - attention = multiply([density, c]) + attention = multiply([density, attended]) h2_ = add([h_, Dense(units)(attention)]) h2 = Activation('tanh')(h2_) attention_cell = FunctionalRNNCell( inputs=x, outputs=h2, input_states=h_tm1, output_states=h2, - constants=c) + attended=attended) - attention_rnn = RNN(attention_cell) - y2 = attention_rnn(x_sequence, constants=c) + attention_rnn = AttentionRNN(attention_cell) + y2 = attention_rnn(x_sequence, attended=attended) # Remember to pass the constant to the RNN layer (which will pass it on to # the cell). Also note that shape of c is same as in cell (no time # dimension added) - attention_model = Model([x_sequence, c], y2) + attention_model = Model([x_sequence, attended], y2) ``` """ def __init__( @@ -260,15 +261,24 @@ def __init__( outputs, input_states, output_states, - constants=None, + attended=None, **kwargs ): input_states = _to_list_or_none(input_states) output_states = _to_list_or_none(output_states) - constants = _to_list_or_none(constants) + attended = _to_list_or_none(attended) + if outputs == output_states[0]: + self.first_state_is_output = True + model_outputs = output_states + else: + warnings.warn('it is expected by RNN that output tensor is same as' + ' first state') + self.first_state_is_output = False + model_outputs = [outputs] + output_states + model = Model( - inputs=self._get_model_inputs(inputs, input_states, constants), - outputs=[outputs] + output_states + inputs=self._get_model_inputs(inputs, input_states, attended), + outputs=model_outputs ) super(FunctionalRNNCell, self).__init__(layer=model, **kwargs) @@ -284,25 +294,30 @@ def __init__( def state_size(self): return self._state_size - def call(self, inputs, states, constants=None): + def call(self, inputs, states, attended=None): """Defines the cell transformation for a single time step. # Arguments inputs: Tensor representing input at current time step. states: Tensor or list/tuple of tensors representing states from previous time step. - constants: Tensor or list of tensors or None representing inputs + attended: Tensor or list of tensors or None representing inputs that should be the same at each time step. """ - outputs = self.layer(self._get_model_inputs(inputs, states, constants)) - output, states = outputs[0], outputs[1:] - - return output, states - - def _get_model_inputs(self, inputs, input_states, constants): + outputs = self.layer(self._get_model_inputs(inputs, states, attended)) + if not isinstance(outputs, list): + # if a list of a single output is passed to Model it still + # just returns a tensor + outputs = [outputs] + output = outputs[0] + new_states = outputs if self.first_state_is_output else outputs[1:] + return output, new_states + + @staticmethod + def _get_model_inputs(inputs, input_states, attended): inputs = [inputs] + list(input_states) - if constants is not None: - inputs += constants + if attended is not None: + inputs += attended return inputs @@ -2147,8 +2162,41 @@ class AttentionRNN(RNN): # Examples ```python + units = 32 + input_size = 5 + attended_shape = (10,) + + x = Input((input_size,)) + h_in = Input((units,)) + attended = Input(attended_shape) + + # predict "attention density" based on input and previous state + density = Dense(attended_shape[0], activation='softmax')( + concatenate([x, h_in])) + attention = multiply([density, attended]) + + h_ = add([ + Dense(units)(x), + Dense(units)(attention), + Dense(units, use_bias=False)(h_in) + ]) + h_out = Activation('tanh')(h_) + + # create cell + attention_cell = FunctionalRNNCell( + inputs=x, + outputs=h_out, + input_states=[h_in], + output_states=[h_out], + attended=attended + ) - TODO: minimal example (using functional API?) + # apply on input sequence + x_sequence = Input((None, input_size)) + attention_rnn = AttentionRNN(attention_cell) + y = attention_rnn(x_sequence, attended=attended) + + attention_model = Model([x_sequence, attended], y) ``` """ @@ -2165,24 +2213,19 @@ def __init__(self, cell, **kwargs): raise ValueError('`cell.call` does not take the keyword argument' ' attended') - self._n_attended = None # set in __call__, needed in build to split - # input_shape self.attended_spec = None def build(self, input_shape): - attended_shapes = input_shape[-self._n_attended:] + if isinstance(self.attended_spec, list): + attended_shapes = input_shape[-len(self.attended_spec):] + else: + attended_shapes = input_shape[-1:] + input_shape = input_shape[0] batch_size = input_shape[0] if self.stateful else None input_dim = input_shape[-1] self.input_spec[0] = InputSpec(shape=(batch_size, None, input_dim)) - attended_specs = [InputSpec(shape=(batch_size,) + attended_shape[1:]) - for attended_shape in attended_shapes] - if len(attended_specs) > 1: - self.attended_spec = attended_specs - else: - self.attended_spec = attended_specs[0] - if self.stateful: self.reset_states() @@ -2201,14 +2244,14 @@ def __call__(self, inputs, initial_state=None, attended=None, **kwargs): if attended is None: raise ValueError('attended input must be passed') - # we need to know length of attended in build - self._n_attended = len(attended) + # we need to append attended spec to input spec below + self.attended_spec = [InputSpec(shape=K.int_shape(attended_)) + for attended_ in attended] - check_list = [] if initial_state: - check_list += initial_state - if attended: - check_list += attended + check_list = initial_state + attended + else: + check_list = attended # at this point check_list cannot be empty is_keras_tensor = hasattr(check_list[0], '_keras_history') for tensor in check_list: @@ -2231,10 +2274,9 @@ def __call__(self, inputs, initial_state=None, attended=None, **kwargs): self.input_spec += state_spec inputs += initial_state kwargs['initial_state'] = initial_state - if attended: - self.input_spec += self.external_constants_spec - inputs += attended - kwargs['attended'] = attended + self.input_spec += self.attended_spec + inputs += attended + kwargs['attended'] = attended # Perform the call output = Layer.__call__(self, inputs, **kwargs) @@ -2243,9 +2285,9 @@ def __call__(self, inputs, initial_state=None, attended=None, **kwargs): self.input_spec = input_spec return output else: - kwargs['initial_state'] = initial_state - if attended is not None: - kwargs['attended'] = attended + if initial_state: + kwargs['initial_state'] = initial_state + kwargs['attended'] = attended return Layer.__call__(self, inputs, **kwargs) def call(self, diff --git a/tests/keras/layers/recurrent_test.py b/tests/keras/layers/recurrent_test.py index 3526e5aaa9ca..ca66ef42bde5 100644 --- a/tests/keras/layers/recurrent_test.py +++ b/tests/keras/layers/recurrent_test.py @@ -568,7 +568,7 @@ def test_batch_size_equal_one(layer_class): model.train_on_batch(x, y) -def test_rnn_cell_with_constants_layer(): +def test_attention_rnn(): class RNNCellWithConstants(keras.layers.Layer): @@ -597,12 +597,12 @@ def build(self, input_shape): name='constant_kernel') self.built = True - def call(self, inputs, states, constants): + def call(self, inputs, states, attended): [prev_output] = states - [constant] = constants + [attended] = attended h_input = keras.backend.dot(inputs, self.input_kernel) h_state = keras.backend.dot(prev_output, self.recurrent_kernel) - h_const = keras.backend.dot(constant, self.constant_kernel) + h_const = keras.backend.dot(attended, self.constant_kernel) output = h_input + h_state + h_const return output, [output] @@ -613,11 +613,11 @@ def get_config(self): # Test basic case. x = keras.Input((None, 5)) - c = keras.Input((3,)) + attended = keras.Input((3,)) cell = RNNCellWithConstants(32) - layer = recurrent.RNN(cell) - y = layer(x, constants=c) - model = keras.models.Model([x, c], y) + layer = recurrent.AttentionRNN(cell) + y = layer(x, attended=attended) + model = keras.models.Model([x, attended], y) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch( [np.zeros((6, 5, 5)), np.zeros((6, 3))], @@ -626,17 +626,17 @@ def get_config(self): # Test basic case serialization. x_np = np.random.random((6, 5, 5)) - c_np = np.random.random((6, 3)) - y_np = model.predict([x_np, c_np]) + attended_np = np.random.random((6, 3)) + y_np = model.predict([x_np, attended_np]) weights = model.get_weights() config = layer.get_config() with keras.utils.CustomObjectScope( {'RNNCellWithConstants': RNNCellWithConstants}): - layer = recurrent.RNN.from_config(config) - y = layer(x, constants=c) - model = keras.models.Model([x, c], y) + layer = recurrent.AttentionRNN.from_config(config) + y = layer(x, attended=attended) + model = keras.models.Model([x, attended], y) model.set_weights(weights) - y_np_2 = model.predict([x_np, c_np]) + y_np_2 = model.predict([x_np, attended_np]) assert_allclose(y_np, y_np_2, atol=1e-4) @@ -662,7 +662,7 @@ def test_functional_rnn_cell(): model.train_on_batch(np.zeros((6, 5, input_size)), np.zeros((6, units))) -def test_functional_rnn_cell_with_constants(): +def test_functional_rnn_cell_with_attended(): layers = keras.layers # Create the cell: @@ -671,22 +671,24 @@ def test_functional_rnn_cell_with_constants(): constant_shape = (10,) x = Input((input_size,)) h_tm1 = Input((units,)) - c = Input(constant_shape) + attended = Input(constant_shape) h_ = layers.add([ layers.Dense(units)(x), layers.Dense(units)(h_tm1), - layers.Dense(units)(c) + layers.Dense(units)(attended) ]) h = layers.Activation('tanh')(h_) cell = recurrent.FunctionalRNNCell( - inputs=x, outputs=h, input_states=h_tm1, output_states=h, constants=c) + inputs=x, outputs=h, input_states=h_tm1, output_states=h, + attended=attended + ) # Test basic case. x_seq = Input((None, input_size)) - layer = recurrent.RNN(cell) - y = layer(x_seq, constants=c) - model = keras.models.Model([x_seq, c], y) + layer = recurrent.AttentionRNN(cell) + y = layer(x_seq, attended=attended) + model = keras.models.Model([x_seq, attended], y) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch( [np.zeros((6, 5, input_size)), np.zeros((6, constant_shape[0]))], From fb91e4e4eec842d24bb1d7079f1bd3f6298e5da4 Mon Sep 17 00:00:00 2001 From: andhus Date: Sun, 8 Oct 2017 22:54:55 +0200 Subject: [PATCH 08/13] minor clean-up of docs --- keras/layers/recurrent.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/keras/layers/recurrent.py b/keras/layers/recurrent.py index abb00ece6c1b..0caa07ce9ba8 100644 --- a/keras/layers/recurrent.py +++ b/keras/layers/recurrent.py @@ -230,9 +230,7 @@ class FunctionalRNNCell(Wrapper): rnn = RNN(cell) y = rnn(x_sequence) - # We can also define cells that make use of "external" constants, to - # implement attention mechanisms: - + # We can also define cells that attend to "external" constants attended_shape = (10,) attended = Input(attended_shape) density = Dense(attended_shape[0], activation='softmax')( @@ -248,9 +246,9 @@ class FunctionalRNNCell(Wrapper): attention_rnn = AttentionRNN(attention_cell) y2 = attention_rnn(x_sequence, attended=attended) - # Remember to pass the constant to the RNN layer (which will pass it on to - # the cell). Also note that shape of c is same as in cell (no time - # dimension added) + # Remember to pass the attended to the AttentionRNN layer (which will pass + # it on to the cell). Also note that shape of the attended is same as in + # cell (no time dimension added) attention_model = Model([x_sequence, attended], y2) ``` @@ -2138,7 +2136,7 @@ class AttentionRNN(RNN): - a `call(input_at_t, states_at_t, attended)` method, returning `(output_at_t, states_at_t_plus_1)`. It must accept the keyword argument `attended` which refers to the input(s) (tensor or - list of tensors) that is attended to an will be presented as a + list of tensors) that is attended to and will be presented as a whole at each timestep. - a `state_size` attribute. This can be a single integer (single state) in which case it is the size of the recurrent @@ -2149,7 +2147,7 @@ class AttentionRNN(RNN): output. If the RNN cell is a keras layer, the input_shape passed to its `build` method will be a list of the input shape of the regular - sequence input followed by the shape(s) of the attended. + (sequence) input followed by the shape(s) of the attended. **kwargs: See docs of super class RNN. # Input shapes @@ -2202,7 +2200,7 @@ class AttentionRNN(RNN): def __init__(self, cell, **kwargs): if isinstance(cell, (list, tuple)): - # Note: not obviously how one would want to propagate the attended + # Note: not obvious how one would want to propagate the attended # for stacked cells, user should stack them manually into a single # cell raise ValueError('AttentionRNN only supports a single cell') @@ -2210,8 +2208,8 @@ def __init__(self, cell, **kwargs): # we let base class check that cel has call function before checking # for the additional argument if not has_arg(cell.call, 'attended'): - raise ValueError('`cell.call` does not take the keyword argument' - ' attended') + raise ValueError('cell.call does not take the required keyword ' + 'argument attended') self.attended_spec = None @@ -2238,7 +2236,7 @@ def __call__(self, inputs, initial_state=None, attended=None, **kwargs): # `initial_state` and `attended` # TODO what is meant by "e.g. when loading model from file" in comment # in base class RNN, can there be a problem if initial states are not - # passed in the Attentive RNN with respect ot this!? + # passed in the Attentive RNN with respect to this!? inputs, initial_state, attended = self._normalize_args( inputs, initial_state, attended) @@ -2342,7 +2340,7 @@ def call(self, # possible for theano backend to optimise the scan op, see section: # "Explicitly passing inputs of the inner function to scan" in: # http://deeplearning.net/software/theano/library/scan.html#lib-scan-shared-variables - # but on the other hand we are not passed weights (shared variables) + # but on the other hand we are not passing weights (shared variables) # of the cell transformation anyway. step = functools.partial(self.cell.call, **cell_kwargs) From fcc854cdf59a9644bf6ced3e82a6cc66ff9cf0d5 Mon Sep 17 00:00:00 2001 From: andhus Date: Mon, 9 Oct 2017 09:00:12 +0200 Subject: [PATCH 09/13] Minor cleanup & improvments in docs, fixed PEP breaking formatting in attention test --- examples/functional_rnn_cell.py | 48 ++++++------ keras/layers/recurrent.py | 113 ++++++++++++++------------- tests/keras/layers/recurrent_test.py | 52 ++++++------ 3 files changed, 108 insertions(+), 105 deletions(-) diff --git a/examples/functional_rnn_cell.py b/examples/functional_rnn_cell.py index 14287209173b..2c50212809ed 100644 --- a/examples/functional_rnn_cell.py +++ b/examples/functional_rnn_cell.py @@ -3,47 +3,51 @@ import numpy as np from keras import Input -from keras.layers import add, Dense, Activation, FunctionalRNNCell, RNN, \ - concatenate, multiply, Model, AttentionRNN +from keras.models import Model +from keras.layers import add, concatenate, multiply, Dense, Activation +from keras.layers.recurrent import FunctionalRNNCell, RNN, AttentionRNN units = 32 input_size = 5 + +# Use functional API to define RNN Cell transformation (in this case +# simple vanilla RNN) for a single time step: x = Input((input_size,)) h_in = Input((units,)) h_ = add([Dense(units)(x), Dense(units, use_bias=False)(h_in)]) h_out = Activation('tanh')(h_) +cell = FunctionalRNNCell(inputs=x, + outputs=h_out, + input_states=h_in, + output_states=h_out) -# Create the cell: -cell = FunctionalRNNCell( - inputs=x, outputs=h_out, input_states=h_in, output_states=h_out) - +# Inject cell in RNN and apply to input sequence x_sequence = Input((None, input_size)) rnn = RNN(cell) y = rnn(x_sequence) -# Modify the cell to make use of attention to "external" constants: +# Modify the cell to make use of attention (condition transformation on +# "external" constants such as an image or another sequence): attended_shape = (10,) attended = Input(attended_shape) -density = Dense(attended_shape[0], activation='softmax')( +attention_density = Dense(attended_shape[0], activation='softmax')( concatenate([x, h_in])) -attention = multiply([density, attended]) -h2_ = add([h_out, Dense(units)(attention)]) -h_out_2 = Activation('tanh')(h2_) - -attention_cell = FunctionalRNNCell( - inputs=x, - outputs=h_out_2, - input_states=h_in, - output_states=h_out_2, - attended=attended -) - +attention = multiply([attention_density, attended]) +h2_ = add([h_, Dense(units)(attention)]) +h2_out = Activation('tanh')(h2_) +attention_cell = FunctionalRNNCell(inputs=x, + outputs=h2_out, + input_states=h_in, + output_states=h2_out, + attended=attended) + +# Pass the attentive cell to the AttentionRNN. Note that shape of attended is +# same as in cell (no time dimension added) attention_rnn = AttentionRNN(attention_cell) y2 = attention_rnn(x_sequence, attended=attended) -# Note that shape of c is same as in cell (no time dimension added) +# Apply it on some (mock) data attention_model = Model([x_sequence, attended], y2) - x_sequence_arr = np.random.randn(3, 5, input_size) attended_arr = np.random.randn(3, attended_shape[0]) y2_arr = attention_model.predict([x_sequence_arr, attended_arr]) diff --git a/keras/layers/recurrent.py b/keras/layers/recurrent.py index 0caa07ce9ba8..8512be91926b 100644 --- a/keras/layers/recurrent.py +++ b/keras/layers/recurrent.py @@ -211,46 +211,45 @@ class FunctionalRNNCell(Wrapper): # Examples ```python - # Use functional API to define RNN Cell transformation (in this case - # simple vanilla RNN) for a single time step: - - units = 32 - input_size = 5 - x = Input((input_size,)) - h_tm1 = Input((units,)) - h_ = add([Dense(units)(x), Dense(units, use_bias=False)(h_tm1)]) - h = Activation('tanh')(h_) - - # Create the cell: - - cell = FunctionalRNNCell( - inputs=x, outputs=h, input_states=h_tm1, output_states=h) - - x_sequence = Input((None, input_size)) - rnn = RNN(cell) - y = rnn(x_sequence) - - # We can also define cells that attend to "external" constants - attended_shape = (10,) - attended = Input(attended_shape) - density = Dense(attended_shape[0], activation='softmax')( - concatenate([x, h_tm1])) - attention = multiply([density, attended]) - h2_ = add([h_, Dense(units)(attention)]) - h2 = Activation('tanh')(h2_) - - attention_cell = FunctionalRNNCell( - inputs=x, outputs=h2, input_states=h_tm1, output_states=h2, - attended=attended) - - attention_rnn = AttentionRNN(attention_cell) - y2 = attention_rnn(x_sequence, attended=attended) - - # Remember to pass the attended to the AttentionRNN layer (which will pass - # it on to the cell). Also note that shape of the attended is same as in - # cell (no time dimension added) - - attention_model = Model([x_sequence, attended], y2) + # Use functional API to define RNN Cell transformation (in this case + # simple vanilla RNN) for a single time step: + units = 32 + input_size = 5 + x = Input((input_size,)) + h_in = Input((units,)) + h_ = add([Dense(units)(x), Dense(units, use_bias=False)(h_in)]) + h_out = Activation('tanh')(h_) + cell = FunctionalRNNCell(inputs=x, + outputs=h_out, + input_states=h_in, + output_states=h_out) + + # Inject cell in RNN and apply to input sequence + x_sequence = Input((None, input_size)) + rnn = RNN(cell) + y = rnn(x_sequence) + + # Modify the cell to make use of attention (condition transformation on + # "external" constants such as an image or another sequence): + attended_shape = (10,) + attended = Input(attended_shape) + attention_density = Dense(attended_shape[0], activation='softmax')( + concatenate([x, h_in])) + attention = multiply([attention_density, attended]) + h2_ = add([h_, Dense(units)(attention)]) + h2_out = Activation('tanh')(h2_) + attention_cell = FunctionalRNNCell(inputs=x, + outputs=h2_out, + input_states=h_in, + output_states=h2_out, + attended=attended) + + # Pass the attentive cell to the AttentionRNN. Note that shape of + # attended is same as in cell (no time dimension added) + attention_rnn = AttentionRNN(attention_cell) + y2 = attention_rnn(x_sequence, attended=attended) + + attention_model = Model([x_sequence, attended], y2) ``` """ def __init__( @@ -265,6 +264,9 @@ def __init__( input_states = _to_list_or_none(input_states) output_states = _to_list_or_none(output_states) attended = _to_list_or_none(attended) + + # the same tensor should not be present multiple times in output of + # wrapped Model if outputs == output_states[0]: self.first_state_is_output = True model_outputs = output_states @@ -273,7 +275,6 @@ def __init__( ' first state') self.first_state_is_output = False model_outputs = [outputs] + output_states - model = Model( inputs=self._get_model_inputs(inputs, input_states, attended), outputs=model_outputs @@ -301,6 +302,10 @@ def call(self, inputs, states, attended=None): previous time step. attended: Tensor or list of tensors or None representing inputs that should be the same at each time step. + + # Returns + output: output of cell transformation + new_states: the updated cell states """ outputs = self.layer(self._get_model_inputs(inputs, states, attended)) if not isinstance(outputs, list): @@ -2169,27 +2174,23 @@ class AttentionRNN(RNN): attended = Input(attended_shape) # predict "attention density" based on input and previous state - density = Dense(attended_shape[0], activation='softmax')( + attention_density = Dense(attended_shape[0], activation='softmax')( concatenate([x, h_in])) - attention = multiply([density, attended]) + attention = multiply([attention_density, attended]) - h_ = add([ - Dense(units)(x), - Dense(units)(attention), - Dense(units, use_bias=False)(h_in) - ]) + h_ = add([Dense(units)(x), + Dense(units)(attention), + Dense(units, use_bias=False)(h_in)]) h_out = Activation('tanh')(h_) # create cell - attention_cell = FunctionalRNNCell( - inputs=x, - outputs=h_out, - input_states=[h_in], - output_states=[h_out], - attended=attended - ) + attention_cell = FunctionalRNNCell(inputs=x, + outputs=h_out, + input_states=[h_in], + output_states=[h_out], + attended=attended) - # apply on input sequence + # apply to input sequence x_sequence = Input((None, input_size)) attention_rnn = AttentionRNN(attention_cell) y = attention_rnn(x_sequence, attended=attended) diff --git a/tests/keras/layers/recurrent_test.py b/tests/keras/layers/recurrent_test.py index ca66ef42bde5..eab72ba44163 100644 --- a/tests/keras/layers/recurrent_test.py +++ b/tests/keras/layers/recurrent_test.py @@ -570,18 +570,18 @@ def test_batch_size_equal_one(layer_class): def test_attention_rnn(): - class RNNCellWithConstants(keras.layers.Layer): + class AttentionRNNCell(keras.layers.Layer): def __init__(self, units, **kwargs): self.units = units self.state_size = units - super(RNNCellWithConstants, self).__init__(**kwargs) + super(AttentionRNNCell, self).__init__(**kwargs) def build(self, input_shape): if not isinstance(input_shape, list): - raise TypeError('expects constants shape') - [input_shape, constant_shape] = input_shape - # will (and should) raise if more than one constant passed + raise TypeError('expects shape of attended') + [input_shape, attended_shape] = input_shape + # will (and should) raise if more than one attended tensor passed self.input_kernel = self.add_weight( shape=(input_shape[-1], self.units), @@ -591,10 +591,10 @@ def build(self, input_shape): shape=(self.units, self.units), initializer='uniform', name='recurrent_kernel') - self.constant_kernel = self.add_weight( - shape=(constant_shape[-1], self.units), + self.attended_kernel = self.add_weight( + shape=(attended_shape[-1], self.units), initializer='uniform', - name='constant_kernel') + name='attended_kernel') self.built = True def call(self, inputs, states, attended): @@ -602,19 +602,19 @@ def call(self, inputs, states, attended): [attended] = attended h_input = keras.backend.dot(inputs, self.input_kernel) h_state = keras.backend.dot(prev_output, self.recurrent_kernel) - h_const = keras.backend.dot(attended, self.constant_kernel) + h_const = keras.backend.dot(attended, self.attended_kernel) output = h_input + h_state + h_const return output, [output] def get_config(self): config = {'units': self.units} - base_config = super(RNNCellWithConstants, self).get_config() + base_config = super(AttentionRNNCell, self).get_config() return dict(list(base_config.items()) + list(config.items())) # Test basic case. x = keras.Input((None, 5)) attended = keras.Input((3,)) - cell = RNNCellWithConstants(32) + cell = AttentionRNNCell(32) layer = recurrent.AttentionRNN(cell) y = layer(x, attended=attended) model = keras.models.Model([x, attended], y) @@ -630,9 +630,8 @@ def get_config(self): y_np = model.predict([x_np, attended_np]) weights = model.get_weights() config = layer.get_config() - with keras.utils.CustomObjectScope( - {'RNNCellWithConstants': RNNCellWithConstants}): - layer = recurrent.AttentionRNN.from_config(config) + with keras.utils.CustomObjectScope({'AttentionRNNCell': AttentionRNNCell}): + layer = recurrent.AttentionRNN.from_config(config) y = layer(x, attended=attended) model = keras.models.Model([x, attended], y) model.set_weights(weights) @@ -650,9 +649,10 @@ def test_functional_rnn_cell(): h_tm1 = Input((units,)) h_ = layers.add([layers.Dense(units)(x), layers.Dense(units)(h_tm1)]) h = layers.Activation('tanh')(h_) - cell = recurrent.FunctionalRNNCell( - inputs=x, outputs=h, input_states=h_tm1, output_states=h) - + cell = recurrent.FunctionalRNNCell(inputs=x, + outputs=h, + input_states=h_tm1, + output_states=h) # Test basic case. x_seq = Input((None, input_size)) layer = recurrent.RNN(cell) @@ -672,18 +672,16 @@ def test_functional_rnn_cell_with_attended(): x = Input((input_size,)) h_tm1 = Input((units,)) attended = Input(constant_shape) - h_ = layers.add([ - layers.Dense(units)(x), - layers.Dense(units)(h_tm1), - layers.Dense(units)(attended) - ]) + h_ = layers.add([layers.Dense(units)(x), + layers.Dense(units)(h_tm1), + layers.Dense(units)(attended)]) h = layers.Activation('tanh')(h_) - cell = recurrent.FunctionalRNNCell( - inputs=x, outputs=h, input_states=h_tm1, output_states=h, - attended=attended - ) - + cell = recurrent.FunctionalRNNCell(inputs=x, + outputs=h, + input_states=h_tm1, + output_states=h, + attended=attended) # Test basic case. x_seq = Input((None, input_size)) layer = recurrent.AttentionRNN(cell) From ab89c6ae1983b643e12145af0c902b05a9c9d2db Mon Sep 17 00:00:00 2001 From: andhus Date: Sat, 21 Oct 2017 02:31:32 +0200 Subject: [PATCH 10/13] removed FunctionalRNNCell and AttentionRNN, added back support for constants in RNN --- examples/functional_rnn_cell.py | 53 --- keras/layers/recurrent.py | 634 +++++++-------------------- tests/keras/layers/recurrent_test.py | 106 ++--- 3 files changed, 183 insertions(+), 610 deletions(-) delete mode 100644 examples/functional_rnn_cell.py diff --git a/examples/functional_rnn_cell.py b/examples/functional_rnn_cell.py deleted file mode 100644 index 2c50212809ed..000000000000 --- a/examples/functional_rnn_cell.py +++ /dev/null @@ -1,53 +0,0 @@ -from __future__ import division, print_function - -import numpy as np - -from keras import Input -from keras.models import Model -from keras.layers import add, concatenate, multiply, Dense, Activation -from keras.layers.recurrent import FunctionalRNNCell, RNN, AttentionRNN - -units = 32 -input_size = 5 - -# Use functional API to define RNN Cell transformation (in this case -# simple vanilla RNN) for a single time step: -x = Input((input_size,)) -h_in = Input((units,)) -h_ = add([Dense(units)(x), Dense(units, use_bias=False)(h_in)]) -h_out = Activation('tanh')(h_) -cell = FunctionalRNNCell(inputs=x, - outputs=h_out, - input_states=h_in, - output_states=h_out) - -# Inject cell in RNN and apply to input sequence -x_sequence = Input((None, input_size)) -rnn = RNN(cell) -y = rnn(x_sequence) - -# Modify the cell to make use of attention (condition transformation on -# "external" constants such as an image or another sequence): -attended_shape = (10,) -attended = Input(attended_shape) -attention_density = Dense(attended_shape[0], activation='softmax')( - concatenate([x, h_in])) -attention = multiply([attention_density, attended]) -h2_ = add([h_, Dense(units)(attention)]) -h2_out = Activation('tanh')(h2_) -attention_cell = FunctionalRNNCell(inputs=x, - outputs=h2_out, - input_states=h_in, - output_states=h2_out, - attended=attended) - -# Pass the attentive cell to the AttentionRNN. Note that shape of attended is -# same as in cell (no time dimension added) -attention_rnn = AttentionRNN(attention_cell) -y2 = attention_rnn(x_sequence, attended=attended) - -# Apply it on some (mock) data -attention_model = Model([x_sequence, attended], y2) -x_sequence_arr = np.random.randn(3, 5, input_size) -attended_arr = np.random.randn(3, attended_shape[0]) -y2_arr = attention_model.predict([x_sequence_arr, attended_arr]) diff --git a/keras/layers/recurrent.py b/keras/layers/recurrent.py index 8512be91926b..867563cd94a2 100644 --- a/keras/layers/recurrent.py +++ b/keras/layers/recurrent.py @@ -1,11 +1,8 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import import numpy as np -import functools import warnings -from keras.engine import Model -from keras.layers.wrappers import Wrapper from .. import backend as K from .. import activations from .. import initializers @@ -196,142 +193,15 @@ def get_losses_for(self, inputs=None): return losses -class FunctionalRNNCell(Wrapper): - """Wrapper for allowing composition of RNN Cells using functional API. - - # Arguments: - inputs: input tensor at a single time step - outputs: output tensor at a single timestep - input_states: state tensor(s) from previous time step - output_states: state tensor(s) after cell transformation - attended: tensor(s) or None, represents inputs that should be static - (the same) for each time step. Used for implementing attention - mechanisms. - - # Examples - - ```python - # Use functional API to define RNN Cell transformation (in this case - # simple vanilla RNN) for a single time step: - units = 32 - input_size = 5 - x = Input((input_size,)) - h_in = Input((units,)) - h_ = add([Dense(units)(x), Dense(units, use_bias=False)(h_in)]) - h_out = Activation('tanh')(h_) - cell = FunctionalRNNCell(inputs=x, - outputs=h_out, - input_states=h_in, - output_states=h_out) - - # Inject cell in RNN and apply to input sequence - x_sequence = Input((None, input_size)) - rnn = RNN(cell) - y = rnn(x_sequence) - - # Modify the cell to make use of attention (condition transformation on - # "external" constants such as an image or another sequence): - attended_shape = (10,) - attended = Input(attended_shape) - attention_density = Dense(attended_shape[0], activation='softmax')( - concatenate([x, h_in])) - attention = multiply([attention_density, attended]) - h2_ = add([h_, Dense(units)(attention)]) - h2_out = Activation('tanh')(h2_) - attention_cell = FunctionalRNNCell(inputs=x, - outputs=h2_out, - input_states=h_in, - output_states=h2_out, - attended=attended) - - # Pass the attentive cell to the AttentionRNN. Note that shape of - # attended is same as in cell (no time dimension added) - attention_rnn = AttentionRNN(attention_cell) - y2 = attention_rnn(x_sequence, attended=attended) - - attention_model = Model([x_sequence, attended], y2) - ``` - """ - def __init__( - self, - inputs, - outputs, - input_states, - output_states, - attended=None, - **kwargs - ): - input_states = _to_list_or_none(input_states) - output_states = _to_list_or_none(output_states) - attended = _to_list_or_none(attended) - - # the same tensor should not be present multiple times in output of - # wrapped Model - if outputs == output_states[0]: - self.first_state_is_output = True - model_outputs = output_states - else: - warnings.warn('it is expected by RNN that output tensor is same as' - ' first state') - self.first_state_is_output = False - model_outputs = [outputs] + output_states - model = Model( - inputs=self._get_model_inputs(inputs, input_states, attended), - outputs=model_outputs - ) - super(FunctionalRNNCell, self).__init__(layer=model, **kwargs) - - in_states_shape = [K.int_shape(state) for state in input_states] - out_states_shape = [K.int_shape(state) for state in output_states] - if not in_states_shape == out_states_shape: - raise ValueError( - 'shape of input_states: {} are not same as shape of ' - 'output_states: {}'.format(in_states_shape, out_states_shape)) - self._state_size = [state_shape[-1] for state_shape in in_states_shape] - - @property - def state_size(self): - return self._state_size - - def call(self, inputs, states, attended=None): - """Defines the cell transformation for a single time step. - - # Arguments - inputs: Tensor representing input at current time step. - states: Tensor or list/tuple of tensors representing states from - previous time step. - attended: Tensor or list of tensors or None representing inputs - that should be the same at each time step. - - # Returns - output: output of cell transformation - new_states: the updated cell states - """ - outputs = self.layer(self._get_model_inputs(inputs, states, attended)) - if not isinstance(outputs, list): - # if a list of a single output is passed to Model it still - # just returns a tensor - outputs = [outputs] - output = outputs[0] - new_states = outputs if self.first_state_is_output else outputs[1:] - return output, new_states - - @staticmethod - def _get_model_inputs(inputs, input_states, attended): - inputs = [inputs] + list(input_states) - if attended is not None: - inputs += attended - - return inputs - - class RNN(Layer): """Base class for recurrent layers. # Arguments cell: A RNN cell instance. A RNN cell is a class that has: - a `call(input_at_t, states_at_t)` method, returning - `(output_at_t, states_at_t_plus_1)`. + `(output_at_t, states_at_t_plus_1)`. The call method of the + cell can also take the optional argument `constants`, see + section "Note on passing external constants" below. - a `state_size` attribute. This can be a single integer (single state) in which case it is the size of the recurrent state @@ -423,6 +293,14 @@ class RNN(Layer): `states` should be a numpy array or list of numpy arrays representing the initial state of the RNN layer. + # Note on passing external constants to RNNs + You can pass "external" constants to the cell using the `constants` + keyword argument of RNN.__call__ (as well as RNN.call) method. This + requires that the `cell.call` method accepts the same keyword argument + `constants`. Such constants can be used to condition the cell + transformation on additional static inputs (not changing over time) + (a.k.a. as attention mechanism). + # Examples ```python @@ -494,13 +372,11 @@ def __init__(self, cell, self.supports_masking = True self.input_spec = [InputSpec(ndim=3)] - if hasattr(self.cell.state_size, '__len__'): - self.state_spec = [InputSpec(shape=(None, dim)) - for dim in self.cell.state_size] - else: - self.state_spec = InputSpec(shape=(None, self.cell.state_size)) + self.state_spec = None self._states = None - + self.constants_spec = None + self._n_constants = None # used for splitting inputs after + # serialization of layer @property def states(self): if self._states is None: @@ -546,6 +422,13 @@ def compute_mask(self, inputs, mask): return output_mask def build(self, input_shape): + # Note input_shape will be list of shapes of initial states and + # constants if these are passed in __call__. + if self._n_constants is not None: + constants_shape = input_shape[-self._n_constants:] + else: + constants_shape = None + if isinstance(input_shape, list): input_shape = input_shape[0] @@ -553,12 +436,32 @@ def build(self, input_shape): input_dim = input_shape[-1] self.input_spec[0] = InputSpec(shape=(batch_size, None, input_dim)) - if self.stateful: - self.reset_states() - + # allow cell (if layer) to build before we set or validate state_spec if isinstance(self.cell, Layer): step_input_shape = (input_shape[0],) + input_shape[2:] - self.cell.build(step_input_shape) + if constants_shape is not None: + self.cell.build([step_input_shape] + constants_shape) + else: + self.cell.build(step_input_shape) + + # set or validate state_spec + if hasattr(self.cell.state_size, '__len__'): + state_size = list(self.cell.state_size) + else: + state_size = [self.cell.state_size] + + if self.state_spec is not None: + # initial_state was passed in call, check compatibility + if not [spec.shape[-1] for spec in self.state_spec] == state_size: + raise ValueError( + 'an initial_state was passed that is not compatible with' + ' cell.state_size, state_spec: {}, cell.state_size:' + ' {}'.format(self.state_spec, self.cell.state_size)) + else: + self.state_spec = [InputSpec(shape=(None, dim)) + for dim in state_size] + if self.stateful: + self.reset_states() def get_initial_state(self, inputs): # build an all-zero tensor of shape (samples, output_dim) @@ -571,62 +474,68 @@ def get_initial_state(self, inputs): else: return [K.tile(initial_state, [1, self.cell.state_size])] - def __call__(self, inputs, initial_state=None, **kwargs): - # If there are multiple inputs, then - # they should be the main input and `initial_state` - # e.g. when loading model from file - if isinstance(inputs, (list, tuple)) and len(inputs) > 1 and initial_state is None: - initial_state = inputs[1:] - inputs = inputs[0] + def __call__(self, inputs, initial_state=None, constants=None, **kwargs): + inputs, initial_state, constants = self._normalize_args( + inputs, initial_state, constants) - # If `initial_state` is specified, - # and if it a Keras tensor, - # then add it to the inputs and temporarily - # modify the input spec to include the state. - if initial_state is None: + if initial_state is None and constants is None: return super(RNN, self).__call__(inputs, **kwargs) - if not isinstance(initial_state, (list, tuple)): - initial_state = [initial_state] + # If any of `initial_state` or `constants` are specified and are Keras + # tensors, then add them to the inputs and temporarily modify the + # input_spec to include them. - is_keras_tensor = hasattr(initial_state[0], '_keras_history') - for tensor in initial_state: + check_list = [] + if initial_state is not None: + kwargs['initial_state'] = initial_state + check_list += initial_state + self.state_spec = [InputSpec(shape=K.int_shape(state)) + for state in initial_state] + if constants is not None: + kwargs['constants'] = constants + check_list += constants + self.constants_spec = [InputSpec(shape=K.int_shape(constant)) + for constant in constants] + self._n_constants = len(constants) + # at this point check_list cannot be empty + is_keras_tensor = hasattr(check_list[0], '_keras_history') + for tensor in check_list: if hasattr(tensor, '_keras_history') != is_keras_tensor: - raise ValueError('The initial state of an RNN layer cannot be' - ' specified with a mix of Keras tensors and' - ' non-Keras tensors') + raise ValueError('The initial state and constants of an RNN' + ' layer cannot be specified with a mix of' + ' Keras tensors and non-Keras tensors') if is_keras_tensor: - # Compute the full input spec, including state - input_spec = self.input_spec - state_spec = self.state_spec - if not isinstance(input_spec, list): - input_spec = [input_spec] - if not isinstance(state_spec, list): - state_spec = [state_spec] - self.input_spec = input_spec + state_spec - - # Compute the full inputs, including state - inputs = [inputs] + list(initial_state) - - # Perform the call - output = super(RNN, self).__call__(inputs, **kwargs) - - # Restore original input spec - self.input_spec = input_spec + # Compute the full input spec, including state and constants + full_input = [inputs] + full_input_spec = self.input_spec + if initial_state: + full_input += initial_state + full_input_spec += self.state_spec + if constants: + full_input += constants + full_input_spec += self.constants_spec + # Perform the call with temporarily replaced input_spec + original_input_spec = self.input_spec + self.input_spec = full_input_spec + output = super(RNN, self).__call__(full_input, **kwargs) + self.input_spec = original_input_spec return output else: - kwargs['initial_state'] = initial_state return super(RNN, self).__call__(inputs, **kwargs) - def call(self, inputs, mask=None, training=None, initial_state=None): + def call(self, + inputs, + mask=None, + training=None, + initial_state=None, + constants=None): # input shape: `(samples, time (padded with zeros), input_dim)` # note that the .build() method of subclasses MUST define # self.input_spec and self.state_spec with complete input shapes. if isinstance(inputs, list): - initial_state = inputs[1:] inputs = inputs[0] - elif initial_state is not None: + if initial_state is not None: pass elif self.stateful: initial_state = self.states @@ -656,13 +565,27 @@ def call(self, inputs, mask=None, training=None, initial_state=None): 'the time dimension by passing a `shape` ' 'or `batch_shape` argument to your Input layer.') + kwargs = {} if has_arg(self.cell.call, 'training'): - step = functools.partial(self.cell.call, training=training) + kwargs['training'] = training + + if constants: + if not has_arg(self.cell.call, 'constants'): + raise ValueError('RNN cell does not support constants') + + def step(inputs, states): + constants = states[-self._n_constants:] + states = states[:-self._n_constants] + return self.cell.call(inputs, states, constants=constants, + **kwargs) else: - step = self.cell.call + def step(inputs, states): + return self.cell.call(inputs, states, **kwargs) + last_output, outputs, states = K.rnn(step, inputs, initial_state, + constants=constants, go_backwards=self.go_backwards, mask=mask, unroll=self.unroll, @@ -691,6 +614,48 @@ def call(self, inputs, mask=None, training=None, initial_state=None): else: return output + def _normalize_args(self, inputs, initial_state, constants): + """When running a model loaded from file, the input tensors + `initial_state` and `constants` can be passed to RNN.__call__ as part + of `inputs` in stead of by the dedicated keyword argumetes. In this + case `inputs` is a list of tensors of which the first one is the + actual (sequence) input followed by initial states, followed by + constants. + + This method makes sure initial_states and constants are separated from + inputs and that the are lists of tensors (or None). + + # Arguments + inputs: tensor of list/tuple of tensors + initial_state: tensor or list of tensors or None + constants: tensor or list of tensors or None + + # Returns + inputs: tensor + initial_state: list of tensors or None + constants: list of tensors or None + """ + if isinstance(inputs, list): + assert initial_state is None and constants is None + if self._n_constants is not None: + constants = inputs[-self._n_constants:] + inputs = inputs[:-self._n_constants] + if len(inputs) > 1: + initial_state = inputs[1:] + inputs = inputs[0] + + def to_list_or_none(x): # TODO break out? + if x is None or isinstance(x, list): + return x + if isinstance(x, tuple): + return list(x) + return [x] + + initial_state = to_list_or_none(initial_state) + constants = to_list_or_none(constants) + + return inputs, initial_state, constants + def reset_states(self, states=None): if not self.stateful: raise AttributeError('Layer must be stateful.') @@ -749,6 +714,9 @@ def get_config(self): 'go_backwards': self.go_backwards, 'stateful': self.stateful, 'unroll': self.unroll} + if self._n_constants is not None: + config['_n_constants'] = self._n_constants + cell_config = self.cell.get_config() config['cell'] = {'class_name': self.cell.__class__.__name__, 'config': cell_config} @@ -760,7 +728,10 @@ def from_config(cls, config, custom_objects=None): from . import deserialize as deserialize_layer cell = deserialize_layer(config.pop('cell'), custom_objects=custom_objects) - return cls(cell, **config) + n_constants = config.pop('_n_constants', None) + layer = cls(cell, **config) + layer._n_constants = n_constants + return layer @property def trainable_weights(self): @@ -2131,294 +2102,3 @@ def from_config(cls, config): if 'implementation' in config and config['implementation'] == 0: config['implementation'] = 1 return cls(**config) - - -class AttentionRNN(RNN): - """Base class for attentive recurrent layers. - - # Arguments - cell: A RNN cell instance supporting attention. It should implement: - - a `call(input_at_t, states_at_t, attended)` method, returning - `(output_at_t, states_at_t_plus_1)`. It must accept the keyword - argument `attended` which refers to the input(s) (tensor or - list of tensors) that is attended to and will be presented as a - whole at each timestep. - - a `state_size` attribute. This can be a single integer - (single state) in which case it is the size of the recurrent - state (which should be the same as the size of the cell - output). This can also be a list/tuple of integers - (one size per state). In this case, the first entry - (`state_size[0]`) should be the same as the size of the cell - output. - If the RNN cell is a keras layer, the input_shape passed to its - `build` method will be a list of the input shape of the regular - (sequence) input followed by the shape(s) of the attended. - **kwargs: See docs of super class RNN. - - # Input shapes - 3D tensor with shape `(batch_size, timesteps, input_dim)`, - (Optional) 2D tensors with shape `(batch_size, output_dim)`. - - # Attended shapes - ND tensor of the shape expected by the attentive cell. - - # Examples - - ```python - units = 32 - input_size = 5 - attended_shape = (10,) - - x = Input((input_size,)) - h_in = Input((units,)) - attended = Input(attended_shape) - - # predict "attention density" based on input and previous state - attention_density = Dense(attended_shape[0], activation='softmax')( - concatenate([x, h_in])) - attention = multiply([attention_density, attended]) - - h_ = add([Dense(units)(x), - Dense(units)(attention), - Dense(units, use_bias=False)(h_in)]) - h_out = Activation('tanh')(h_) - - # create cell - attention_cell = FunctionalRNNCell(inputs=x, - outputs=h_out, - input_states=[h_in], - output_states=[h_out], - attended=attended) - - # apply to input sequence - x_sequence = Input((None, input_size)) - attention_rnn = AttentionRNN(attention_cell) - y = attention_rnn(x_sequence, attended=attended) - - attention_model = Model([x_sequence, attended], y) - ``` - """ - - def __init__(self, cell, **kwargs): - if isinstance(cell, (list, tuple)): - # Note: not obvious how one would want to propagate the attended - # for stacked cells, user should stack them manually into a single - # cell - raise ValueError('AttentionRNN only supports a single cell') - super(AttentionRNN, self).__init__(cell=cell, **kwargs) - # we let base class check that cel has call function before checking - # for the additional argument - if not has_arg(cell.call, 'attended'): - raise ValueError('cell.call does not take the required keyword ' - 'argument attended') - - self.attended_spec = None - - def build(self, input_shape): - if isinstance(self.attended_spec, list): - attended_shapes = input_shape[-len(self.attended_spec):] - else: - attended_shapes = input_shape[-1:] - - input_shape = input_shape[0] - batch_size = input_shape[0] if self.stateful else None - input_dim = input_shape[-1] - self.input_spec[0] = InputSpec(shape=(batch_size, None, input_dim)) - - if self.stateful: - self.reset_states() - - if isinstance(self.cell, Layer): - step_input_shape = (input_shape[0],) + input_shape[2:] - self.cell.build([step_input_shape] + attended_shapes) - - def __call__(self, inputs, initial_state=None, attended=None, **kwargs): - # If there are multiple inputs, then they should be the main input, - # `initial_state` and `attended` - # TODO what is meant by "e.g. when loading model from file" in comment - # in base class RNN, can there be a problem if initial states are not - # passed in the Attentive RNN with respect to this!? - inputs, initial_state, attended = self._normalize_args( - inputs, initial_state, attended) - - if attended is None: - raise ValueError('attended input must be passed') - # we need to append attended spec to input spec below - self.attended_spec = [InputSpec(shape=K.int_shape(attended_)) - for attended_ in attended] - - if initial_state: - check_list = initial_state + attended - else: - check_list = attended - # at this point check_list cannot be empty - is_keras_tensor = hasattr(check_list[0], '_keras_history') - for tensor in check_list: - if hasattr(tensor, '_keras_history') != is_keras_tensor: - raise ValueError('The initial state and attended of an RNN' - ' layer cannot be specified with a mix of' - ' Keras tensors and non-Keras tensors') - - if is_keras_tensor: - # Compute the full input spec, including state and attended - input_spec = self.input_spec - state_spec = self.state_spec - if not isinstance(input_spec, list): - input_spec = [input_spec] - if not isinstance(state_spec, list): - state_spec = [state_spec] - self.input_spec = input_spec - inputs = [inputs] - if initial_state: - self.input_spec += state_spec - inputs += initial_state - kwargs['initial_state'] = initial_state - self.input_spec += self.attended_spec - inputs += attended - kwargs['attended'] = attended - - # Perform the call - output = Layer.__call__(self, inputs, **kwargs) - - # Restore original input spec - self.input_spec = input_spec - return output - else: - if initial_state: - kwargs['initial_state'] = initial_state - kwargs['attended'] = attended - return Layer.__call__(self, inputs, **kwargs) - - def call(self, - inputs, - mask=None, - training=None, - initial_state=None, - attended=None): - # TODO this method duplicates almost everything in RNN.call, - # better solution? - - # input shape: `(samples, time (padded with zeros), input_dim)` - # note that the .build() method of subclasses MUST define - # self.input_spec and self.state_spec with complete input shapes. - if isinstance(inputs, list): - inputs = inputs[0] - if initial_state is not None: - pass - elif self.stateful: - initial_state = self.states - else: - initial_state = self.get_initial_state(inputs) - - if isinstance(mask, list): - mask = mask[0] - - if len(initial_state) != len(self.states): - raise ValueError('Layer has ' + str(len(self.states)) + - ' states but was passed ' + - str(len(initial_state)) + - ' initial states.') - input_shape = K.int_shape(inputs) - timesteps = input_shape[1] - if self.unroll and timesteps in [None, 1]: - raise ValueError('Cannot unroll a RNN if the ' - 'time dimension is undefined or equal to 1. \n' - '- If using a Sequential model, ' - 'specify the time dimension by passing ' - 'an `input_shape` or `batch_input_shape` ' - 'argument to your first layer. If your ' - 'first layer is an Embedding, you can ' - 'also use the `input_length` argument.\n' - '- If using the functional API, specify ' - 'the time dimension by passing a `shape` ' - 'or `batch_shape` argument to your Input layer.') - - cell_kwargs = {'attended': attended} - if has_arg(self.cell.call, 'training'): - cell_kwargs['training'] = training - - # NOTE: by passing the attended implicitly into the K.rnn it is not - # possible for theano backend to optimise the scan op, see section: - # "Explicitly passing inputs of the inner function to scan" in: - # http://deeplearning.net/software/theano/library/scan.html#lib-scan-shared-variables - # but on the other hand we are not passing weights (shared variables) - # of the cell transformation anyway. - step = functools.partial(self.cell.call, **cell_kwargs) - - last_output, outputs, states = K.rnn(step, - inputs, - initial_state, - go_backwards=self.go_backwards, - mask=mask, - unroll=self.unroll, - input_length=timesteps) - if self.stateful: - updates = [] - for i in range(len(states)): - updates.append((self.states[i], states[i])) - self.add_update(updates, inputs) - - if self.return_sequences: - output = outputs - else: - output = last_output - - # Properly set learning phase - if getattr(last_output, '_uses_learning_phase', False): - output._uses_learning_phase = True - - if self.return_state: - if not isinstance(states, (list, tuple)): - states = [states] - else: - states = list(states) - return [output] + states - else: - return output - - def _normalize_args(self, inputs, initial_state, attended): - """The inputs `initial_state` and `attended` can be passed to - AttentionRNN.__call__ either by separate arguments or as part of - `inputs`. In this case `inputs` is a list of tensors of which the first - one is the actual (sequence) input followed by initial states followed - by the attended. - - This method separates and normalizes the different groups of inputs. - - # Arguments - inputs: tensor of list/tuple of tensors - initial_state: tensor or list of tensors or None - attended: tensor or list of tensors or None - - # Returns - inputs: tensor - initial_state: list of tensors or None - attended: list of tensors or None - """ - if isinstance(inputs, (list, tuple)): - remaining_inputs = inputs[1:] - inputs = inputs[0] - if remaining_inputs and initial_state is None: - if isinstance(self.state_spec, list): - n_states = len(self.state_spec) - else: - n_states = 1 - initial_state = remaining_inputs[:n_states] - remaining_inputs = remaining_inputs[n_states:] - if remaining_inputs and attended is None: - attended = remaining_inputs - if len(remaining_inputs) > 0: - raise ValueError('too many inputs were passed') - - initial_state = _to_list_or_none(initial_state) - attended = _to_list_or_none(attended) - - return inputs, initial_state, attended - - -def _to_list_or_none(x): # TODO move? Very similar to topology._to_list - if x is None or isinstance(x, list): - return x - if isinstance(x, tuple): - return list(x) - return [x] diff --git a/tests/keras/layers/recurrent_test.py b/tests/keras/layers/recurrent_test.py index eab72ba44163..24aa68d4d761 100644 --- a/tests/keras/layers/recurrent_test.py +++ b/tests/keras/layers/recurrent_test.py @@ -568,20 +568,20 @@ def test_batch_size_equal_one(layer_class): model.train_on_batch(x, y) -def test_attention_rnn(): +def test_rnn_cell_with_constants_layer(): - class AttentionRNNCell(keras.layers.Layer): + class RNNCellWithConstants(keras.layers.Layer): def __init__(self, units, **kwargs): self.units = units self.state_size = units - super(AttentionRNNCell, self).__init__(**kwargs) + super(RNNCellWithConstants, self).__init__(**kwargs) def build(self, input_shape): if not isinstance(input_shape, list): - raise TypeError('expects shape of attended') - [input_shape, attended_shape] = input_shape - # will (and should) raise if more than one attended tensor passed + raise TypeError('expects constants shape') + [input_shape, constant_shape] = input_shape + # will (and should) raise if more than one constant passed self.input_kernel = self.add_weight( shape=(input_shape[-1], self.units), @@ -591,33 +591,33 @@ def build(self, input_shape): shape=(self.units, self.units), initializer='uniform', name='recurrent_kernel') - self.attended_kernel = self.add_weight( - shape=(attended_shape[-1], self.units), + self.constant_kernel = self.add_weight( + shape=(constant_shape[-1], self.units), initializer='uniform', - name='attended_kernel') + name='constant_kernel') self.built = True - def call(self, inputs, states, attended): + def call(self, inputs, states, constants): [prev_output] = states - [attended] = attended + [constant] = constants h_input = keras.backend.dot(inputs, self.input_kernel) h_state = keras.backend.dot(prev_output, self.recurrent_kernel) - h_const = keras.backend.dot(attended, self.attended_kernel) + h_const = keras.backend.dot(constant, self.constant_kernel) output = h_input + h_state + h_const return output, [output] def get_config(self): config = {'units': self.units} - base_config = super(AttentionRNNCell, self).get_config() + base_config = super(RNNCellWithConstants, self).get_config() return dict(list(base_config.items()) + list(config.items())) # Test basic case. x = keras.Input((None, 5)) - attended = keras.Input((3,)) - cell = AttentionRNNCell(32) - layer = recurrent.AttentionRNN(cell) - y = layer(x, attended=attended) - model = keras.models.Model([x, attended], y) + c = keras.Input((3,)) + cell = RNNCellWithConstants(32) + layer = recurrent.RNN(cell) + y = layer(x, constants=c) + model = keras.models.Model([x, c], y) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch( [np.zeros((6, 5, 5)), np.zeros((6, 3))], @@ -626,73 +626,19 @@ def get_config(self): # Test basic case serialization. x_np = np.random.random((6, 5, 5)) - attended_np = np.random.random((6, 3)) - y_np = model.predict([x_np, attended_np]) + c_np = np.random.random((6, 3)) + y_np = model.predict([x_np, c_np]) weights = model.get_weights() config = layer.get_config() - with keras.utils.CustomObjectScope({'AttentionRNNCell': AttentionRNNCell}): - layer = recurrent.AttentionRNN.from_config(config) - y = layer(x, attended=attended) - model = keras.models.Model([x, attended], y) + with keras.utils.CustomObjectScope( + {'RNNCellWithConstants': RNNCellWithConstants}): + layer = recurrent.RNN.from_config(config) + y = layer(x, constants=c) + model = keras.models.Model([x, c], y) model.set_weights(weights) - y_np_2 = model.predict([x_np, attended_np]) + y_np_2 = model.predict([x_np, c_np]) assert_allclose(y_np, y_np_2, atol=1e-4) -def test_functional_rnn_cell(): - layers = keras.layers - - # Create the cell: - units = 8 - input_size = 5 - x = Input((input_size,)) - h_tm1 = Input((units,)) - h_ = layers.add([layers.Dense(units)(x), layers.Dense(units)(h_tm1)]) - h = layers.Activation('tanh')(h_) - cell = recurrent.FunctionalRNNCell(inputs=x, - outputs=h, - input_states=h_tm1, - output_states=h) - # Test basic case. - x_seq = Input((None, input_size)) - layer = recurrent.RNN(cell) - y = layer(x_seq) - model = keras.models.Model(x_seq, y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch(np.zeros((6, 5, input_size)), np.zeros((6, units))) - - -def test_functional_rnn_cell_with_attended(): - layers = keras.layers - - # Create the cell: - units = 8 - input_size = 5 - constant_shape = (10,) - x = Input((input_size,)) - h_tm1 = Input((units,)) - attended = Input(constant_shape) - h_ = layers.add([layers.Dense(units)(x), - layers.Dense(units)(h_tm1), - layers.Dense(units)(attended)]) - h = layers.Activation('tanh')(h_) - - cell = recurrent.FunctionalRNNCell(inputs=x, - outputs=h, - input_states=h_tm1, - output_states=h, - attended=attended) - # Test basic case. - x_seq = Input((None, input_size)) - layer = recurrent.AttentionRNN(cell) - y = layer(x_seq, attended=attended) - model = keras.models.Model([x_seq, attended], y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch( - [np.zeros((6, 5, input_size)), np.zeros((6, constant_shape[0]))], - np.zeros((6, units)) - ) - - if __name__ == '__main__': pytest.main([__file__]) From 95c2359b6fc1c447047fdcfbbbdcc614fcd8b1b1 Mon Sep 17 00:00:00 2001 From: andhus Date: Sun, 22 Oct 2017 11:07:46 +0200 Subject: [PATCH 11/13] fixed PEP8 violations --- keras/layers/recurrent.py | 4 ++-- tests/keras/layers/recurrent_test.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/keras/layers/recurrent.py b/keras/layers/recurrent.py index 867563cd94a2..31c670db9abf 100644 --- a/keras/layers/recurrent.py +++ b/keras/layers/recurrent.py @@ -375,8 +375,8 @@ def __init__(self, cell, self.state_spec = None self._states = None self.constants_spec = None - self._n_constants = None # used for splitting inputs after - # serialization of layer + self._n_constants = None + @property def states(self): if self._states is None: diff --git a/tests/keras/layers/recurrent_test.py b/tests/keras/layers/recurrent_test.py index 24aa68d4d761..d8256f9af851 100644 --- a/tests/keras/layers/recurrent_test.py +++ b/tests/keras/layers/recurrent_test.py @@ -630,8 +630,8 @@ def get_config(self): y_np = model.predict([x_np, c_np]) weights = model.get_weights() config = layer.get_config() - with keras.utils.CustomObjectScope( - {'RNNCellWithConstants': RNNCellWithConstants}): + custom_objects = {'RNNCellWithConstants': RNNCellWithConstants} + with keras.utils.CustomObjectScope(custom_objects): layer = recurrent.RNN.from_config(config) y = layer(x, constants=c) model = keras.models.Model([x, c], y) From 86fdd939079cb53e466ccc45485fc11dfd250b15 Mon Sep 17 00:00:00 2001 From: andhus Date: Wed, 25 Oct 2017 01:02:03 +0200 Subject: [PATCH 12/13] fixed minor review comments --- keras/layers/recurrent.py | 82 +++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 43 deletions(-) diff --git a/keras/layers/recurrent.py b/keras/layers/recurrent.py index 31c670db9abf..d3c1119fecd1 100644 --- a/keras/layers/recurrent.py +++ b/keras/layers/recurrent.py @@ -295,11 +295,11 @@ class RNN(Layer): # Note on passing external constants to RNNs You can pass "external" constants to the cell using the `constants` - keyword argument of RNN.__call__ (as well as RNN.call) method. This + keyword argument of `RNN.__call__` (as well as `RNN.call`) method. This requires that the `cell.call` method accepts the same keyword argument `constants`. Such constants can be used to condition the cell - transformation on additional static inputs (not changing over time) - (a.k.a. as attention mechanism). + transformation on additional static inputs (not changing over time), + a.k.a. an attention mechanism. # Examples @@ -375,7 +375,7 @@ def __init__(self, cell, self.state_spec = None self._states = None self.constants_spec = None - self._n_constants = None + self._num_constants = None @property def states(self): @@ -424,8 +424,8 @@ def compute_mask(self, inputs, mask): def build(self, input_shape): # Note input_shape will be list of shapes of initial states and # constants if these are passed in __call__. - if self._n_constants is not None: - constants_shape = input_shape[-self._n_constants:] + if self._num_constants is not None: + constants_shape = input_shape[-self._num_constants:] else: constants_shape = None @@ -475,7 +475,7 @@ def get_initial_state(self, inputs): return [K.tile(initial_state, [1, self.cell.state_size])] def __call__(self, inputs, initial_state=None, constants=None, **kwargs): - inputs, initial_state, constants = self._normalize_args( + inputs, initial_state, constants = self._standardize_args( inputs, initial_state, constants) if initial_state is None and constants is None: @@ -485,36 +485,33 @@ def __call__(self, inputs, initial_state=None, constants=None, **kwargs): # tensors, then add them to the inputs and temporarily modify the # input_spec to include them. - check_list = [] + additional_inputs = [] + additional_specs = [] if initial_state is not None: kwargs['initial_state'] = initial_state - check_list += initial_state + additional_inputs += initial_state self.state_spec = [InputSpec(shape=K.int_shape(state)) for state in initial_state] + additional_specs += self.state_spec if constants is not None: kwargs['constants'] = constants - check_list += constants + additional_inputs += constants self.constants_spec = [InputSpec(shape=K.int_shape(constant)) for constant in constants] - self._n_constants = len(constants) - # at this point check_list cannot be empty - is_keras_tensor = hasattr(check_list[0], '_keras_history') - for tensor in check_list: + self._num_constants = len(constants) + additional_specs += self.constants_spec + # at this point additional_inputs cannot be empty + is_keras_tensor = hasattr(additional_inputs[0], '_keras_history') + for tensor in additional_inputs: if hasattr(tensor, '_keras_history') != is_keras_tensor: - raise ValueError('The initial state and constants of an RNN' + raise ValueError('The initial state or constants of an RNN' ' layer cannot be specified with a mix of' ' Keras tensors and non-Keras tensors') if is_keras_tensor: # Compute the full input spec, including state and constants - full_input = [inputs] - full_input_spec = self.input_spec - if initial_state: - full_input += initial_state - full_input_spec += self.state_spec - if constants: - full_input += constants - full_input_spec += self.constants_spec + full_input = [inputs] + additional_inputs + full_input_spec = self.input_spec + additional_specs # Perform the call with temporarily replaced input_spec original_input_spec = self.input_spec self.input_spec = full_input_spec @@ -574,8 +571,8 @@ def call(self, raise ValueError('RNN cell does not support constants') def step(inputs, states): - constants = states[-self._n_constants:] - states = states[:-self._n_constants] + constants = states[-self._num_constants:] + states = states[:-self._num_constants] return self.cell.call(inputs, states, constants=constants, **kwargs) else: @@ -614,19 +611,18 @@ def step(inputs, states): else: return output - def _normalize_args(self, inputs, initial_state, constants): - """When running a model loaded from file, the input tensors - `initial_state` and `constants` can be passed to RNN.__call__ as part - of `inputs` in stead of by the dedicated keyword argumetes. In this - case `inputs` is a list of tensors of which the first one is the - actual (sequence) input followed by initial states, followed by - constants. + def _standardize_args(self, inputs, initial_state, constants): + """Brings the arguments of `__call__` that can contain input tensors to + standard format. - This method makes sure initial_states and constants are separated from - inputs and that the are lists of tensors (or None). + When running a model loaded from file, the input tensors + `initial_state` and `constants` can be passed to `RNN.__call__` as part + of `inputs` instead of by the dedicated keyword arguments. This method + makes sure the arguments are separated and that `initial_state` and + `constants` are lists of tensors (or None). # Arguments - inputs: tensor of list/tuple of tensors + inputs: tensor or list/tuple of tensors initial_state: tensor or list of tensors or None constants: tensor or list of tensors or None @@ -637,14 +633,14 @@ def _normalize_args(self, inputs, initial_state, constants): """ if isinstance(inputs, list): assert initial_state is None and constants is None - if self._n_constants is not None: - constants = inputs[-self._n_constants:] - inputs = inputs[:-self._n_constants] + if self._num_constants is not None: + constants = inputs[-self._num_constants:] + inputs = inputs[:-self._num_constants] if len(inputs) > 1: initial_state = inputs[1:] inputs = inputs[0] - def to_list_or_none(x): # TODO break out? + def to_list_or_none(x): if x is None or isinstance(x, list): return x if isinstance(x, tuple): @@ -714,8 +710,8 @@ def get_config(self): 'go_backwards': self.go_backwards, 'stateful': self.stateful, 'unroll': self.unroll} - if self._n_constants is not None: - config['_n_constants'] = self._n_constants + if self._num_constants is not None: + config['num_constants'] = self._num_constants cell_config = self.cell.get_config() config['cell'] = {'class_name': self.cell.__class__.__name__, @@ -728,9 +724,9 @@ def from_config(cls, config, custom_objects=None): from . import deserialize as deserialize_layer cell = deserialize_layer(config.pop('cell'), custom_objects=custom_objects) - n_constants = config.pop('_n_constants', None) + num_constants = config.pop('num_constants', None) layer = cls(cell, **config) - layer._n_constants = n_constants + layer._num_constants = num_constants return layer @property From d33d919590f66ae7770be8f23e4746b037fbe2c5 Mon Sep 17 00:00:00 2001 From: andhus Date: Wed, 25 Oct 2017 01:47:21 +0200 Subject: [PATCH 13/13] added test case for when both inital_state and constants are passed to RNN.__call__ --- tests/keras/layers/recurrent_test.py | 99 +++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 1 deletion(-) diff --git a/tests/keras/layers/recurrent_test.py b/tests/keras/layers/recurrent_test.py index d8256f9af851..19d318a060a3 100644 --- a/tests/keras/layers/recurrent_test.py +++ b/tests/keras/layers/recurrent_test.py @@ -632,13 +632,110 @@ def get_config(self): config = layer.get_config() custom_objects = {'RNNCellWithConstants': RNNCellWithConstants} with keras.utils.CustomObjectScope(custom_objects): - layer = recurrent.RNN.from_config(config) + layer = recurrent.RNN.from_config(config.copy()) y = layer(x, constants=c) model = keras.models.Model([x, c], y) model.set_weights(weights) y_np_2 = model.predict([x_np, c_np]) assert_allclose(y_np, y_np_2, atol=1e-4) + # test flat list inputs + with keras.utils.CustomObjectScope(custom_objects): + layer = recurrent.RNN.from_config(config.copy()) + y = layer([x, c]) + model = keras.models.Model([x, c], y) + model.set_weights(weights) + y_np_3 = model.predict([x_np, c_np]) + assert_allclose(y_np, y_np_3, atol=1e-4) + + +def test_rnn_cell_with_constants_layer_passing_initial_state(): + + class RNNCellWithConstants(keras.layers.Layer): + + def __init__(self, units, **kwargs): + self.units = units + self.state_size = units + super(RNNCellWithConstants, self).__init__(**kwargs) + + def build(self, input_shape): + if not isinstance(input_shape, list): + raise TypeError('expects constants shape') + [input_shape, constant_shape] = input_shape + # will (and should) raise if more than one constant passed + + self.input_kernel = self.add_weight( + shape=(input_shape[-1], self.units), + initializer='uniform', + name='kernel') + self.recurrent_kernel = self.add_weight( + shape=(self.units, self.units), + initializer='uniform', + name='recurrent_kernel') + self.constant_kernel = self.add_weight( + shape=(constant_shape[-1], self.units), + initializer='uniform', + name='constant_kernel') + self.built = True + + def call(self, inputs, states, constants): + [prev_output] = states + [constant] = constants + h_input = keras.backend.dot(inputs, self.input_kernel) + h_state = keras.backend.dot(prev_output, self.recurrent_kernel) + h_const = keras.backend.dot(constant, self.constant_kernel) + output = h_input + h_state + h_const + return output, [output] + + def get_config(self): + config = {'units': self.units} + base_config = super(RNNCellWithConstants, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + # Test basic case. + x = keras.Input((None, 5)) + c = keras.Input((3,)) + s = keras.Input((32,)) + cell = RNNCellWithConstants(32) + layer = recurrent.RNN(cell) + y = layer(x, initial_state=s, constants=c) + model = keras.models.Model([x, s, c], y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch( + [np.zeros((6, 5, 5)), np.zeros((6, 32)), np.zeros((6, 3))], + np.zeros((6, 32)) + ) + + # Test basic case serialization. + x_np = np.random.random((6, 5, 5)) + s_np = np.random.random((6, 32)) + c_np = np.random.random((6, 3)) + y_np = model.predict([x_np, s_np, c_np]) + weights = model.get_weights() + config = layer.get_config() + custom_objects = {'RNNCellWithConstants': RNNCellWithConstants} + with keras.utils.CustomObjectScope(custom_objects): + layer = recurrent.RNN.from_config(config.copy()) + y = layer(x, initial_state=s, constants=c) + model = keras.models.Model([x, s, c], y) + model.set_weights(weights) + y_np_2 = model.predict([x_np, s_np, c_np]) + assert_allclose(y_np, y_np_2, atol=1e-4) + + # verify that state is used + y_np_2_different_s = model.predict([x_np, s_np + 10., c_np]) + with pytest.raises(AssertionError): + assert_allclose(y_np, y_np_2_different_s, atol=1e-4) + + # test flat list inputs + with keras.utils.CustomObjectScope(custom_objects): + layer = recurrent.RNN.from_config(config.copy()) + y = layer([x, s, c]) + model = keras.models.Model([x, s, c], y) + model.set_weights(weights) + y_np_3 = model.predict([x_np, s_np, c_np]) + assert_allclose(y_np, y_np_3, atol=1e-4) + if __name__ == '__main__': pytest.main([__file__])