From 0d2a2b88101a66a31d56d475aaa6b6007b64b124 Mon Sep 17 00:00:00 2001 From: workingloong Date: Tue, 17 Mar 2020 20:35:31 +0800 Subject: [PATCH 1/7] Support concatenating the tensor adding offset --- .../layers/concatenate_with_offset.py | 65 +++++++++++++++++++ .../tests/concatenate_with_offset_test.py | 31 +++++++++ 2 files changed, 96 insertions(+) create mode 100644 elasticdl_preprocessing/layers/concatenate_with_offset.py create mode 100644 elasticdl_preprocessing/tests/concatenate_with_offset_test.py diff --git a/elasticdl_preprocessing/layers/concatenate_with_offset.py b/elasticdl_preprocessing/layers/concatenate_with_offset.py new file mode 100644 index 000000000..9b6aae2c6 --- /dev/null +++ b/elasticdl_preprocessing/layers/concatenate_with_offset.py @@ -0,0 +1,65 @@ +import tensorflow as tf + + +class ConcatenateWithOffset(tf.keras.layers.Concatenate): + """Layer that add offset for tensor in the list of inputs and + concatenate the tensors. + + It takes as input a list of tensors and returns a single tensor. + Firstly, it will add an offset in offsets for each tensor in inputs. + Then concatenate them to a single tensor. The tensor in inputs + must have the same type, `Tensor` or `RaggedTensor` or `SparseTensor` and + the same shape. + + Example : + ```python + a1 = tf.constant([[1], [1], [1]]) + a2 = tf.constant([[2], [2], [2]]) + offsets = [0, 10] + layer = ConcatenateWithOffset(offsets=offsets, axis=1) + layer([a1, a2]) + [[ 1 12] + [ 1 12] + [ 1 12]] + ``` + + Arguments: + offsets: numeric list to add + axis: Axis along which to concatenate. + **kwargs: standard layer keyword arguments. + """ + def __init__(self, offsets, axis=-1): + super(ConcatenateWithOffset, self).__init__() + self.offsets = offsets + self.axis = axis + + def call(self, inputs): + ids_with_offset = [] + if len(self.offsets) != len(inputs): + raise ValueError( + "The offsets length is not equal to inputs length" + "the inputs are {}, offsets are {}".format( + inputs, self.offsets + ) + ) + for i, tensor in enumerate(inputs): + if isinstance(tensor, tf.SparseTensor): + ids_with_offset.append( + tf.SparseTensor( + indices=tensor.indices, + values=tensor.values + self.offsets[i], + dense_shape=tensor.dense_shape, + ) + ) + else: + ids_with_offset.append(tensor + self.offsets[i]) + + if isinstance(ids_with_offset[0], tf.SparseTensor): + result = tf.sparse.concat( + axis=self.axis, sp_inputs=ids_with_offset + ) + else: + result = tf.keras.layers.concatenate( + ids_with_offset, axis=self.axis + ) + return result diff --git a/elasticdl_preprocessing/tests/concatenate_with_offset_test.py b/elasticdl_preprocessing/tests/concatenate_with_offset_test.py new file mode 100644 index 000000000..b51764271 --- /dev/null +++ b/elasticdl_preprocessing/tests/concatenate_with_offset_test.py @@ -0,0 +1,31 @@ +import unittest + +import numpy as np +import tensorflow as tf + +from elasticdl_preprocessing.layers.concatenate_with_offset import ( + ConcatenateWithOffset +) + + +class ConcatenateWithOffsetTest(unittest.TestCase): + def test_concatenate_with_offset(self): + tensor_1 = tf.constant([[1], [1], [1]]) + tensor_2 = tf.constant([[2], [2], [2]]) + offsets = [0, 10] + concat_layer = ConcatenateWithOffset(offsets=offsets, axis=1) + + output = concat_layer([tensor_1, tensor_2]) + expected_out = np.array([[1, 12], [1, 12], [1, 12]]) + self.assertTrue(np.array_equal(output.numpy(), expected_out)) + + ragged_tensor_1 = tf.ragged.constant([[1], [], [1]]) + ragged_tensor_2 = tf.ragged.constant([[2], [2], []]) + output = concat_layer([ragged_tensor_1, ragged_tensor_2]) + expected_out = np.array([1, 12, 12, 1]) + self.assertTrue(np.array_equal(output.values.numpy(), expected_out)) + + sparse_tensor_1 = ragged_tensor_1.to_sparse() + sparse_tensor_2 = ragged_tensor_2.to_sparse() + output = concat_layer([sparse_tensor_1, sparse_tensor_2]) + self.assertTrue(np.array_equal(output.values.numpy(), expected_out)) From 92233354694bc6bac27f3c9d9d28b5df93e8fc1f Mon Sep 17 00:00:00 2001 From: workingloong Date: Wed, 18 Mar 2020 14:49:03 +0800 Subject: [PATCH 2/7] Reformat code by pre-commit hooks --- elasticdl_preprocessing/layers/concatenate_with_offset.py | 1 + elasticdl_preprocessing/tests/concatenate_with_offset_test.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/elasticdl_preprocessing/layers/concatenate_with_offset.py b/elasticdl_preprocessing/layers/concatenate_with_offset.py index 9b6aae2c6..4cdaa0458 100644 --- a/elasticdl_preprocessing/layers/concatenate_with_offset.py +++ b/elasticdl_preprocessing/layers/concatenate_with_offset.py @@ -28,6 +28,7 @@ class ConcatenateWithOffset(tf.keras.layers.Concatenate): axis: Axis along which to concatenate. **kwargs: standard layer keyword arguments. """ + def __init__(self, offsets, axis=-1): super(ConcatenateWithOffset, self).__init__() self.offsets = offsets diff --git a/elasticdl_preprocessing/tests/concatenate_with_offset_test.py b/elasticdl_preprocessing/tests/concatenate_with_offset_test.py index b51764271..cbdc3fee5 100644 --- a/elasticdl_preprocessing/tests/concatenate_with_offset_test.py +++ b/elasticdl_preprocessing/tests/concatenate_with_offset_test.py @@ -4,7 +4,7 @@ import tensorflow as tf from elasticdl_preprocessing.layers.concatenate_with_offset import ( - ConcatenateWithOffset + ConcatenateWithOffset, ) From d34eabc7275cf489b1b135d97dc466861452c83a Mon Sep 17 00:00:00 2001 From: workingloong Date: Mon, 23 Mar 2020 10:01:55 +0800 Subject: [PATCH 3/7] Check equality of sparse tensors and ragged tensors --- .../tests/concatenate_with_offset_test.py | 19 ++++++++++++++----- elasticdl_preprocessing/tests/test_utils.py | 1 - 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/elasticdl_preprocessing/tests/concatenate_with_offset_test.py b/elasticdl_preprocessing/tests/concatenate_with_offset_test.py index cbdc3fee5..b2330425a 100644 --- a/elasticdl_preprocessing/tests/concatenate_with_offset_test.py +++ b/elasticdl_preprocessing/tests/concatenate_with_offset_test.py @@ -6,6 +6,10 @@ from elasticdl_preprocessing.layers.concatenate_with_offset import ( ConcatenateWithOffset, ) +from elasticdl_preprocessing.tests.test_utils import ( + sparse_tensor_equal, + ragged_tensor_equal, +) class ConcatenateWithOffsetTest(unittest.TestCase): @@ -19,13 +23,18 @@ def test_concatenate_with_offset(self): expected_out = np.array([[1, 12], [1, 12], [1, 12]]) self.assertTrue(np.array_equal(output.numpy(), expected_out)) - ragged_tensor_1 = tf.ragged.constant([[1], [], [1]]) - ragged_tensor_2 = tf.ragged.constant([[2], [2], []]) + ragged_tensor_1 = tf.ragged.constant([[1], [], [1]], dtype=tf.int64) + ragged_tensor_2 = tf.ragged.constant([[2], [2], []], dtype=tf.int64) output = concat_layer([ragged_tensor_1, ragged_tensor_2]) - expected_out = np.array([1, 12, 12, 1]) - self.assertTrue(np.array_equal(output.values.numpy(), expected_out)) + expected_out = tf.ragged.constant([[1, 12], [12], [1]], dtype=tf.int64) + self.assertTrue(ragged_tensor_equal(output, expected_out)) sparse_tensor_1 = ragged_tensor_1.to_sparse() sparse_tensor_2 = ragged_tensor_2.to_sparse() output = concat_layer([sparse_tensor_1, sparse_tensor_2]) - self.assertTrue(np.array_equal(output.values.numpy(), expected_out)) + expected_out = tf.SparseTensor( + indices=np.array([[0, 0], [0, 1], [1, 1], [2, 0]]), + values=np.array([1, 12, 12, 1]), + dense_shape=(3, 2) + ) + self.assertTrue(sparse_tensor_equal(output, expected_out)) diff --git a/elasticdl_preprocessing/tests/test_utils.py b/elasticdl_preprocessing/tests/test_utils.py index 001db7cc5..fc05dbcdd 100644 --- a/elasticdl_preprocessing/tests/test_utils.py +++ b/elasticdl_preprocessing/tests/test_utils.py @@ -20,7 +20,6 @@ def sparse_tensor_equal(sp_a, sp_b): def ragged_tensor_equal(rt_a, rt_b): - print(rt_a, rt_b) if rt_a.shape.as_list() != rt_b.shape.as_list(): return False From ec3445e4726f2a5daa4dbcfcfea4a3a4f6da4189 Mon Sep 17 00:00:00 2001 From: workingloong Date: Mon, 23 Mar 2020 10:02:47 +0800 Subject: [PATCH 4/7] Reformat code by hooks --- elasticdl_preprocessing/tests/concatenate_with_offset_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/elasticdl_preprocessing/tests/concatenate_with_offset_test.py b/elasticdl_preprocessing/tests/concatenate_with_offset_test.py index b2330425a..f40d3f107 100644 --- a/elasticdl_preprocessing/tests/concatenate_with_offset_test.py +++ b/elasticdl_preprocessing/tests/concatenate_with_offset_test.py @@ -7,8 +7,8 @@ ConcatenateWithOffset, ) from elasticdl_preprocessing.tests.test_utils import ( - sparse_tensor_equal, ragged_tensor_equal, + sparse_tensor_equal, ) @@ -35,6 +35,6 @@ def test_concatenate_with_offset(self): expected_out = tf.SparseTensor( indices=np.array([[0, 0], [0, 1], [1, 1], [2, 0]]), values=np.array([1, 12, 12, 1]), - dense_shape=(3, 2) + dense_shape=(3, 2), ) self.assertTrue(sparse_tensor_equal(output, expected_out)) From e580f27bb95ab7b998ec436a4f688fafcea2d7c6 Mon Sep 17 00:00:00 2001 From: workingloong Date: Mon, 23 Mar 2020 10:18:28 +0800 Subject: [PATCH 5/7] Check the inputs is ragged tensor --- elasticdl_preprocessing/tests/test_utils.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/elasticdl_preprocessing/tests/test_utils.py b/elasticdl_preprocessing/tests/test_utils.py index fc05dbcdd..0335ac598 100644 --- a/elasticdl_preprocessing/tests/test_utils.py +++ b/elasticdl_preprocessing/tests/test_utils.py @@ -1,6 +1,5 @@ import numpy as np import tensorflow as tf -from tensorflow.python.ops.ragged import ragged_tensor def sparse_tensor_equal(sp_a, sp_b): @@ -20,14 +19,19 @@ def sparse_tensor_equal(sp_a, sp_b): def ragged_tensor_equal(rt_a, rt_b): + if not isinstance(rt_a, tf.RaggedTensor) or not isinstance( + rt_b, tf.RaggedTensor + ): + return False + if rt_a.shape.as_list() != rt_b.shape.as_list(): return False for i in range(rt_a.shape[0]): sub_rt_a = rt_a[i] sub_rt_b = rt_b[i] - if ragged_tensor.is_ragged(sub_rt_a) and ragged_tensor.is_ragged( - sub_rt_b + if isinstance(sub_rt_a, tf.RaggedTensor) and isinstance( + sub_rt_b, tf.RaggedTensor ): if not ragged_tensor_equal(sub_rt_a, sub_rt_b): return False From b433126d7a908154d2c59537178bf3f2a0b2af74 Mon Sep 17 00:00:00 2001 From: workingloong Date: Mon, 23 Mar 2020 10:21:58 +0800 Subject: [PATCH 6/7] Check inputs are SparseTensor --- elasticdl_preprocessing/tests/test_utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/elasticdl_preprocessing/tests/test_utils.py b/elasticdl_preprocessing/tests/test_utils.py index 0335ac598..540e9d918 100644 --- a/elasticdl_preprocessing/tests/test_utils.py +++ b/elasticdl_preprocessing/tests/test_utils.py @@ -3,6 +3,11 @@ def sparse_tensor_equal(sp_a, sp_b): + if not isinstance(sp_a, tf.SparseTensor) or not isinstance( + sp_b, tf.SparseTensor + ): + return False + if not np.array_equal(sp_a.dense_shape.numpy(), sp_b.dense_shape.numpy()): return False From bc42427083f588ad94453f42617ceffe1ba680cd Mon Sep 17 00:00:00 2001 From: workingloong Date: Wed, 25 Mar 2020 19:19:41 +0800 Subject: [PATCH 7/7] Fix docstring by comments --- elasticdl_preprocessing/layers/concatenate_with_offset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/elasticdl_preprocessing/layers/concatenate_with_offset.py b/elasticdl_preprocessing/layers/concatenate_with_offset.py index 4cdaa0458..77af0fcdc 100644 --- a/elasticdl_preprocessing/layers/concatenate_with_offset.py +++ b/elasticdl_preprocessing/layers/concatenate_with_offset.py @@ -2,8 +2,8 @@ class ConcatenateWithOffset(tf.keras.layers.Concatenate): - """Layer that add offset for tensor in the list of inputs and - concatenate the tensors. + """Layer that add offset to each id tensor in the input list and + then concatenate these tensors. It takes as input a list of tensors and returns a single tensor. Firstly, it will add an offset in offsets for each tensor in inputs.