sql-machine-learning
diff --git a/‎model_zoo/census_model_sqlflow/__init__.py b/‎model_zoo/census_model_sqlflow/__init__.py
diff --git a/‎model_zoo/census_model_sqlflow/dnn/__init__.py b/‎model_zoo/census_model_sqlflow/dnn/__init__.py
diff --git a/‎model_zoo/census_model_sqlflow/dnn/census_dnn.sql
Lines changed: 19 additions & 0 deletions b/‎model_zoo/census_model_sqlflow/dnn/census_dnn.sql
Lines changed: 19 additions & 0 deletions
diff --git a/‎model_zoo/census_model_sqlflow/dnn/census_feature_column.py
Lines changed: 54 additions & 0 deletions b/‎model_zoo/census_model_sqlflow/dnn/census_feature_column.py
Lines changed: 54 additions & 0 deletions
diff --git a/‎model_zoo/census_model_sqlflow/dnn/census_functional.py
Lines changed: 101 additions & 0 deletions b/‎model_zoo/census_model_sqlflow/dnn/census_functional.py
Lines changed: 101 additions & 0 deletions
diff --git a/‎model_zoo/census_model_sqlflow/wide_and_deep/__init__.py b/‎model_zoo/census_model_sqlflow/wide_and_deep/__init__.py
diff --git a/‎model_zoo/census_model_sqlflow/wide_and_deep/census_wide_and_deep.sql
Lines changed: 12 additions & 0 deletions b/‎model_zoo/census_model_sqlflow/wide_and_deep/census_wide_and_deep.sql
Lines changed: 12 additions & 0 deletions
@@ -0,0 +1,19 @@
+SELECT *
+FROM census_income
+TO TRAIN DNNClassifier
+WITH model.hidden_units = [10, 20]
+COLUMN (
+    age, 
+    capital_gain, 
+    capital_loss, 
+    hours_per_week, 
+    EMBEDDING(workclass, 16),
+    EMBEDDING(education, 16),
+    EMBEDDING(martial_status, 16),
+    EMBEDDING(occupation, 16),
+    EMBEDDING(relationship, 16),
+    EMBEDDING(race, 16),
+    EMBEDDING(sex, 16),
+    EMBEDDING(native_country, 16)
+)
+LABEL label
@@ -0,0 +1,54 @@
+import tensorflow as tf
+
+CATEGORICAL_FEATURE_KEYS = [
+    "workclass",
+    "education",
+    "marital-status",
+    "occupation",
+    "relationship",
+    "race",
+    "sex",
+    "native-country",
+]
+NUMERIC_FEATURE_KEYS = [
+    "age",
+    "capital-gain",
+    "capital-loss",
+    "hours-per-week",
+]
+LABEL_KEY = "label"
+
+
+def get_feature_columns():
+    feature_columns = []
+
+    for numeric_feature_key in NUMERIC_FEATURE_KEYS:
+        numeric_feature = tf.feature_column.numeric_column(numeric_feature_key)
+        feature_columns.append(numeric_feature)
+
+    for categorical_feature_key in CATEGORICAL_FEATURE_KEYS:
+        embedding_feature = tf.feature_column.embedding_column(
+            tf.feature_column.categorical_column_with_hash_bucket(
+                categorical_feature_key, hash_bucket_size=64
+            ),
+            dimension=16,
+        )
+        feature_columns.append(embedding_feature)
+
+    return feature_columns
+
+
+def get_feature_input_layers():
+    feature_input_layers = {}
+
+    for numeric_feature_key in NUMERIC_FEATURE_KEYS:
+        feature_input_layers[numeric_feature_key] = tf.keras.Input(
+            shape=(1,), name=numeric_feature_key, dtype=tf.float32
+        )
+
+    for categorical_feature_key in CATEGORICAL_FEATURE_KEYS:
+        feature_input_layers[categorical_feature_key] = tf.keras.Input(
+            shape=(1,), name=categorical_feature_key, dtype=tf.string
+        )
+
+    return feature_input_layers
@@ -0,0 +1,101 @@
+import tensorflow as tf
+from tensorflow.python.keras.metrics import accuracy
+
+from model_zoo.census_model_sqlflow.dnn.census_feature_column import (
+    get_feature_columns,
+    get_feature_input_layers,
+)
+
+
+# The model definition from model zoo
+# Input Params:
+#   feature_columns: The feature column array.
+#       It can be generated from `COLUMN` clause.
+#   feature_input_layers: The input layers specify the feature inputs.
+def dnn_classifier(feature_columns, feature_input_layers):
+    feature_layer = tf.keras.layers.DenseFeatures(feature_columns)
+    x = feature_layer(feature_input_layers)
+    x = tf.keras.layers.Dense(16, activation="relu")(x)
+    x = tf.keras.layers.Dense(16, activation="relu")(x)
+    y = tf.keras.layers.Dense(1, activation="sigmoid")(x)
+
+    model = tf.keras.Model(inputs=feature_input_layers, outputs=y)
+
+    return model
+
+
+# The entry point of the submitter program
+def custom_model():
+    feature_columns = get_feature_columns()
+    feature_input_layers = get_feature_input_layers()
+
+    return dnn_classifier(
+        feature_columns=feature_columns,
+        feature_input_layers=feature_input_layers,
+    )
+
+
+def loss(labels, predictions):
+    labels = tf.expand_dims(labels, axis=1)
+    return tf.keras.losses.binary_crossentropy(labels, predictions)
+
+
+def optimizer():
+    return tf.keras.optimizers.Adam()
+
+
+def eval_metrics_fn():
+    return {
+        "accuracy": lambda labels, predictions: accuracy(
+            tf.cast(tf.squeeze(tf.round(predictions)), tf.int32),
+            tf.cast(labels, tf.int32),
+        )
+    }
+
+
+CATEGORICAL_FEATURE_KEYS = [
+    "workclass",
+    "education",
+    "marital-status",
+    "occupation",
+    "relationship",
+    "race",
+    "sex",
+    "native-country",
+]
+NUMERIC_FEATURE_KEYS = [
+    "age",
+    "capital-gain",
+    "capital-loss",
+    "hours-per-week",
+]
+LABEL_KEY = "label"
+
+
+# TODO: The dataset_fn and the column names above is bound with
+# the input data source. We can consider move it out of the
+# model definition file. Currently ElasticDL framework has the
+# limitation that the dataset_fn is in the same file with model def.
+def dataset_fn(dataset, mode, _):
+    def _parse_data(record):
+
+        feature_description = dict(
+            [
+                (name, tf.io.FixedLenFeature([], tf.string))
+                for name in CATEGORICAL_FEATURE_KEYS
+            ]
+            + [
+                (name, tf.io.FixedLenFeature([], tf.float32))
+                for name in NUMERIC_FEATURE_KEYS
+            ]
+            + [(LABEL_KEY, tf.io.FixedLenFeature([], tf.int64))]
+        )
+
+        parsed_record = tf.io.parse_single_example(record, feature_description)
+        label = parsed_record.pop(LABEL_KEY)
+
+        return parsed_record, label
+
+    dataset = dataset.map(_parse_data)
+
+    return dataset
@@ -0,0 +1,12 @@
+SELECT *
+FROM census_income
+TO TRAIN WideAndDeepClassifier
+COLUMN (
+    SET GROUP(VOCABULARIZE(workclass), BUCKETIZE(capital_gain, bucket_num=5), BUCKETIZE(capital_loss, bucket_num=5), BUCKTIZE(hours_per_week, bucket_num=6)) AS group_1,
+    SET GROUP(HASH(education), HASH(occupation), VOCABULARIZE(martial_status), VOCABULARIZE(relationship)) AS group_2,
+    SET GROUP(BUCKETIZE(age, bucket_num=5), HASH(native_country), VOCABULARIZE(race), VOCABULARIZE(sex)) AS group_3,
+
+    [EMBEDDING(group1, 1), EMBEDDING(group2, 1)] AS wide_embeddings
+    [EMBEDDING(group1, 8), EMBEDDING(group2, 8), EMBEDDING(group3, 8)] AS deep_embeddings
+)
+LABEL label