sql-machine-learning · brightcoder01 · Mar 25, 2020 · Mar 17, 2020 · Mar 17, 2020 · Mar 18, 2020
diff --git a/elasticdl_preprocessing/layers/concatenate_with_offset.py b/elasticdl_preprocessing/layers/concatenate_with_offset.py
@@ -0,0 +1,66 @@
+import tensorflow as tf
+
+
+class ConcatenateWithOffset(tf.keras.layers.Concatenate):
+    """Layer that add offset to each id tensor in the input list and
+    then concatenate these tensors.
+
+    It takes as input a list of tensors and returns a single tensor.
+    Firstly, it will add an offset in offsets for each tensor in inputs.
+    Then concatenate them to a single tensor. The tensor in inputs
+    must have the same type, `Tensor` or `RaggedTensor` or `SparseTensor` and
+    the same shape.
+
+    Example :
+    ```python
+        a1 = tf.constant([[1], [1], [1]])
+        a2 = tf.constant([[2], [2], [2]])
+        offsets = [0, 10]
+        layer = ConcatenateWithOffset(offsets=offsets, axis=1)
+        layer([a1, a2])
+        [[ 1 12]
+         [ 1 12]
+         [ 1 12]]
+    ```
+
+    Arguments:
+        offsets: numeric list to add
+        axis: Axis along which to concatenate.
+        **kwargs: standard layer keyword arguments.
+    """
+
+    def __init__(self, offsets, axis=-1):
+        super(ConcatenateWithOffset, self).__init__()
+        self.offsets = offsets
+        self.axis = axis
+
+    def call(self, inputs):
+        ids_with_offset = []
+        if len(self.offsets) != len(inputs):
+            raise ValueError(
+                "The offsets length is not equal to inputs length"
+                "the inputs are {}, offsets are {}".format(
+                    inputs, self.offsets
+                )
+            )
+        for i, tensor in enumerate(inputs):
+            if isinstance(tensor, tf.SparseTensor):
+                ids_with_offset.append(
+                    tf.SparseTensor(
+                        indices=tensor.indices,
+                        values=tensor.values + self.offsets[i],
+                        dense_shape=tensor.dense_shape,
+                    )
+                )
+            else:
+                ids_with_offset.append(tensor + self.offsets[i])
+
+        if isinstance(ids_with_offset[0], tf.SparseTensor):
+            result = tf.sparse.concat(
+                axis=self.axis, sp_inputs=ids_with_offset
+            )
+        else:
+            result = tf.keras.layers.concatenate(
+                ids_with_offset, axis=self.axis
+            )
+        return result
diff --git a/elasticdl_preprocessing/tests/concatenate_with_offset_test.py b/elasticdl_preprocessing/tests/concatenate_with_offset_test.py
@@ -0,0 +1,40 @@
+import unittest
+
+import numpy as np
+import tensorflow as tf
+
+from elasticdl_preprocessing.layers.concatenate_with_offset import (
+    ConcatenateWithOffset,
+)
+from elasticdl_preprocessing.tests.test_utils import (
+    ragged_tensor_equal,
+    sparse_tensor_equal,
+)
+
+
+class ConcatenateWithOffsetTest(unittest.TestCase):
+    def test_concatenate_with_offset(self):
+        tensor_1 = tf.constant([[1], [1], [1]])
+        tensor_2 = tf.constant([[2], [2], [2]])
+        offsets = [0, 10]
+        concat_layer = ConcatenateWithOffset(offsets=offsets, axis=1)
+
+        output = concat_layer([tensor_1, tensor_2])
+        expected_out = np.array([[1, 12], [1, 12], [1, 12]])
+        self.assertTrue(np.array_equal(output.numpy(), expected_out))
+
+        ragged_tensor_1 = tf.ragged.constant([[1], [], [1]], dtype=tf.int64)
+        ragged_tensor_2 = tf.ragged.constant([[2], [2], []], dtype=tf.int64)
+        output = concat_layer([ragged_tensor_1, ragged_tensor_2])
+        expected_out = tf.ragged.constant([[1, 12], [12], [1]], dtype=tf.int64)
+        self.assertTrue(ragged_tensor_equal(output, expected_out))
+
+        sparse_tensor_1 = ragged_tensor_1.to_sparse()
+        sparse_tensor_2 = ragged_tensor_2.to_sparse()
+        output = concat_layer([sparse_tensor_1, sparse_tensor_2])
+        expected_out = tf.SparseTensor(
+            indices=np.array([[0, 0], [0, 1], [1, 1], [2, 0]]),
+            values=np.array([1, 12, 12, 1]),
+            dense_shape=(3, 2),
+        )
+        self.assertTrue(sparse_tensor_equal(output, expected_out))
diff --git a/elasticdl_preprocessing/tests/test_utils.py b/elasticdl_preprocessing/tests/test_utils.py
@@ -1,9 +1,13 @@
 import numpy as np
 import tensorflow as tf
-from tensorflow.python.ops.ragged import ragged_tensor
 
 
 def sparse_tensor_equal(sp_a, sp_b):
+    if not isinstance(sp_a, tf.SparseTensor) or not isinstance(
+        sp_b, tf.SparseTensor
+    ):
+        return False
+
     if not np.array_equal(sp_a.dense_shape.numpy(), sp_b.dense_shape.numpy()):
         return False
 
@@ -20,15 +24,19 @@ def sparse_tensor_equal(sp_a, sp_b):
 
 
 def ragged_tensor_equal(rt_a, rt_b):
-    print(rt_a, rt_b)
+    if not isinstance(rt_a, tf.RaggedTensor) or not isinstance(
+        rt_b, tf.RaggedTensor
+    ):
+        return False
+
     if rt_a.shape.as_list() != rt_b.shape.as_list():
         return False
 
     for i in range(rt_a.shape[0]):
         sub_rt_a = rt_a[i]
         sub_rt_b = rt_b[i]
-        if ragged_tensor.is_ragged(sub_rt_a) and ragged_tensor.is_ragged(
-            sub_rt_b
+        if isinstance(sub_rt_a, tf.RaggedTensor) and isinstance(
+            sub_rt_b, tf.RaggedTensor
         ):
             if not ragged_tensor_equal(sub_rt_a, sub_rt_b):
                 return False