Skip to content

Commit f30cfa6

Browse files
Add the ToNumber preprocess layer. (#1845)
* Add the Preprocessing folder in ElasticDL and README.md * Do some rephrase * Add the initial version of to_number and to_sparse * Build the directory structure * Add the ToNumber preprocessing layer * Add test_utils in elasticdl_preprocessing tests
1 parent 6acbb35 commit f30cfa6

File tree

3 files changed

+135
-0
lines changed

3 files changed

+135
-0
lines changed
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import tensorflow as tf
2+
3+
_NUMBER_DTYPES = [
4+
tf.int8,
5+
tf.uint8,
6+
tf.int16,
7+
tf.uint16,
8+
tf.int32,
9+
tf.uint32,
10+
tf.int64,
11+
tf.uint64,
12+
tf.float16,
13+
tf.float32,
14+
tf.float64,
15+
tf.bfloat16,
16+
tf.double,
17+
]
18+
19+
20+
class ToNumber(tf.keras.layers.Layer):
21+
"""Convert the inputs to a number dtype (int, float, double)
22+
23+
Input Shape: Tensor or SparseTensor of any shape
24+
Output Shape: Tensor or SparseTensor of the same shape with input
25+
"""
26+
27+
def __init__(self, out_type, default_value):
28+
super(ToNumber, self).__init__()
29+
if out_type not in _NUMBER_DTYPES:
30+
raise ValueError("{} is not a number type.".format(out_type))
31+
self.out_type = out_type
32+
self.default_value = default_value
33+
34+
def call(self, inputs):
35+
if isinstance(inputs, tf.SparseTensor):
36+
number_value = self._cast_dense_to_number(inputs.values)
37+
return tf.SparseTensor(
38+
indices=inputs.indices,
39+
values=number_value,
40+
dense_shape=inputs.dense_shape,
41+
)
42+
else:
43+
return self._cast_dense_to_number(inputs)
44+
45+
def _cast_dense_to_number(self, dense_inputs):
46+
if dense_inputs.dtype is tf.string:
47+
default_value = str(self.default_value)
48+
outputs = tf.where(
49+
tf.equal(dense_inputs, ""), x=default_value, y=dense_inputs
50+
)
51+
outputs = tf.strings.to_number(outputs, out_type=self.out_type)
52+
else:
53+
outputs = tf.cast(dense_inputs, self.out_type)
54+
55+
return outputs
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import numpy as np
2+
3+
4+
def sparse_tensor_equal(sp_a, sp_b):
5+
if not np.array_equal(sp_a.dense_shape.numpy(), sp_b.dense_shape.numpy()):
6+
return False
7+
8+
if not np.array_equal(sp_a.indices.numpy(), sp_b.indices.numpy()):
9+
return False
10+
11+
if sp_a.values.dtype != sp_b.values.dtype:
12+
return False
13+
14+
if not np.array_equal(sp_a.values.numpy(), sp_b.values.numpy()):
15+
return False
16+
17+
return True
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import unittest
2+
3+
import numpy as np
4+
import tensorflow as tf
5+
6+
from elasticdl_preprocessing.layers.to_number import ToNumber
7+
from elasticdl_preprocessing.tests.test_utils import sparse_tensor_equal
8+
9+
10+
class ToNumberTest(unittest.TestCase):
11+
def test_call_dense(self):
12+
layer = ToNumber(out_type=tf.int32, default_value=-1)
13+
input = tf.constant([["123", ""], ["456", "-789"]], tf.string)
14+
output = layer.call(input)
15+
expected_output = tf.constant([[123, -1], [456, -789]], tf.int32)
16+
self.assertEqual(output.dtype, tf.int32)
17+
self.assertTrue(
18+
np.array_equal(output.numpy(), expected_output.numpy())
19+
)
20+
21+
layer = ToNumber(out_type=tf.float32, default_value=0.0)
22+
input = tf.constant([["123.1", ""], ["456", "-789.987"]], tf.string)
23+
output = layer.call(input)
24+
expected_output = tf.constant(
25+
[[123.1, 0.0], [456.0, -789.987]], tf.float32
26+
)
27+
self.assertEqual(output.dtype, tf.float32)
28+
self.assertTrue(
29+
np.array_equal(output.numpy(), expected_output.numpy())
30+
)
31+
32+
def test_call_sparse(self):
33+
layer = ToNumber(out_type=tf.int32, default_value=-1)
34+
input = tf.SparseTensor(
35+
indices=[[0, 2], [2, 1], [2, 3], [5, 4]],
36+
values=tf.constant(["123", "", "456", "-789"], tf.string),
37+
dense_shape=[6, 5],
38+
)
39+
output = layer.call(input)
40+
expected_output = tf.SparseTensor(
41+
indices=[[0, 2], [2, 1], [2, 3], [5, 4]],
42+
values=tf.constant([123, -1, 456, -789], tf.int32),
43+
dense_shape=[6, 5],
44+
)
45+
self.assertTrue(sparse_tensor_equal(output, expected_output))
46+
47+
layer = ToNumber(out_type=tf.float32, default_value=0.0)
48+
input = tf.SparseTensor(
49+
indices=[[0, 2], [2, 1], [2, 3], [5, 4]],
50+
values=tf.constant(["123.1", "", "456", "-789.987"], tf.string),
51+
dense_shape=[6, 5],
52+
)
53+
output = layer.call(input)
54+
expected_output = tf.SparseTensor(
55+
indices=[[0, 2], [2, 1], [2, 3], [5, 4]],
56+
values=tf.constant([123.1, 0.0, 456.0, -789.987], tf.float32),
57+
dense_shape=[6, 5],
58+
)
59+
self.assertTrue(sparse_tensor_equal(output, expected_output))
60+
61+
62+
if __name__ == "__main__":
63+
unittest.main()

0 commit comments

Comments
 (0)