sql-machine-learning · brightcoder01 · Mar 19, 2020 · Mar 16, 2020 · Mar 16, 2020 · Mar 16, 2020
diff --git a/elasticdl_preprocessing/layers/__init__.py b/elasticdl_preprocessing/layers/__init__.py
diff --git a/elasticdl_preprocessing/layers/to_number.py b/elasticdl_preprocessing/layers/to_number.py
@@ -0,0 +1,55 @@
+import tensorflow as tf
+
+_NUMBER_DTYPES = [
+    tf.int8,
+    tf.uint8,
+    tf.int16,
+    tf.uint16,
+    tf.int32,
+    tf.uint32,
+    tf.int64,
+    tf.uint64,
+    tf.float16,
+    tf.float32,
+    tf.float64,
+    tf.bfloat16,
+    tf.double,
+]
+
+
+class ToNumber(tf.keras.layers.Layer):
+    """Convert the inputs to a number dtype (int, float, double)
+
+    Input Shape: Tensor or SparseTensor of any shape
+    Output Shape: Tensor or SparseTensor of the same shape with input
+    """
+
+    def __init__(self, out_type, default_value):
+        super(ToNumber, self).__init__()
+        if out_type not in _NUMBER_DTYPES:
+            raise ValueError("{} is not a number type.".format(out_type))
+        self.out_type = out_type
+        self.default_value = default_value
+
+    def call(self, inputs):
+        if isinstance(inputs, tf.SparseTensor):
+            number_value = self._cast_dense_to_number(inputs.values)
+            return tf.SparseTensor(
+                indices=inputs.indices,
+                values=number_value,
+                dense_shape=inputs.dense_shape,
+            )
+        else:
+            return self._cast_dense_to_number(inputs)
+
+    def _cast_dense_to_number(self, dense_inputs):
+        if dense_inputs.dtype is tf.string:
+            default_value = str(self.default_value)
+            outputs = tf.where(
+                tf.equal(dense_inputs, ""), x=default_value, y=dense_inputs
+            )
+            outputs = tf.strings.to_number(outputs, out_type=self.out_type)
+        else:
+            outputs = tf.cast(dense_inputs, self.out_type)
+
+        return outputs
diff --git a/elasticdl_preprocessing/tests/__init__.py b/elasticdl_preprocessing/tests/__init__.py
diff --git a/elasticdl_preprocessing/tests/test_utils.py b/elasticdl_preprocessing/tests/test_utils.py
@@ -0,0 +1,17 @@
+import numpy as np
+
+
+def sparse_tensor_equal(sp_a, sp_b):
+    if not np.array_equal(sp_a.dense_shape.numpy(), sp_b.dense_shape.numpy()):
+        return False
+
+    if not np.array_equal(sp_a.indices.numpy(), sp_b.indices.numpy()):
+        return False
+
+    if sp_a.values.dtype != sp_b.values.dtype:
+        return False
+
+    if not np.array_equal(sp_a.values.numpy(), sp_b.values.numpy()):
+        return False
+
+    return True
diff --git a/elasticdl_preprocessing/tests/to_number_test.py b/elasticdl_preprocessing/tests/to_number_test.py
@@ -0,0 +1,63 @@
+import unittest
+
+import numpy as np
+import tensorflow as tf
+
+from elasticdl_preprocessing.layers.to_number import ToNumber
+from elasticdl_preprocessing.tests.test_utils import sparse_tensor_equal
+
+
+class ToNumberTest(unittest.TestCase):
+    def test_call_dense(self):
+        layer = ToNumber(out_type=tf.int32, default_value=-1)
+        input = tf.constant([["123", ""], ["456", "-789"]], tf.string)
+        output = layer.call(input)
+        expected_output = tf.constant([[123, -1], [456, -789]], tf.int32)
+        self.assertEqual(output.dtype, tf.int32)
+        self.assertTrue(
+            np.array_equal(output.numpy(), expected_output.numpy())
+        )
+
+        layer = ToNumber(out_type=tf.float32, default_value=0.0)
+        input = tf.constant([["123.1", ""], ["456", "-789.987"]], tf.string)
+        output = layer.call(input)
+        expected_output = tf.constant(
+            [[123.1, 0.0], [456.0, -789.987]], tf.float32
+        )
+        self.assertEqual(output.dtype, tf.float32)
+        self.assertTrue(
+            np.array_equal(output.numpy(), expected_output.numpy())
+        )
+
+    def test_call_sparse(self):
+        layer = ToNumber(out_type=tf.int32, default_value=-1)
+        input = tf.SparseTensor(
+            indices=[[0, 2], [2, 1], [2, 3], [5, 4]],
+            values=tf.constant(["123", "", "456", "-789"], tf.string),
+            dense_shape=[6, 5],
+        )
+        output = layer.call(input)
+        expected_output = tf.SparseTensor(
+            indices=[[0, 2], [2, 1], [2, 3], [5, 4]],
+            values=tf.constant([123, -1, 456, -789], tf.int32),
+            dense_shape=[6, 5],
+        )
+        self.assertTrue(sparse_tensor_equal(output, expected_output))
+
+        layer = ToNumber(out_type=tf.float32, default_value=0.0)
+        input = tf.SparseTensor(
+            indices=[[0, 2], [2, 1], [2, 3], [5, 4]],
+            values=tf.constant(["123.1", "", "456", "-789.987"], tf.string),
+            dense_shape=[6, 5],
+        )
+        output = layer.call(input)
+        expected_output = tf.SparseTensor(
+            indices=[[0, 2], [2, 1], [2, 3], [5, 4]],
+            values=tf.constant([123.1, 0.0, 456.0, -789.987], tf.float32),
+            dense_shape=[6, 5],
+        )
+        self.assertTrue(sparse_tensor_equal(output, expected_output))
+
+
+if __name__ == "__main__":
+    unittest.main()