fastmachinelearning · vloncar · Jun 3, 2022 · May 13, 2022 · May 13, 2022 · May 13, 2022
diff --git a/hls4ml/backends/fpga/passes/embedding.py b/hls4ml/backends/fpga/passes/embedding.py
@@ -0,0 +1,36 @@
+from hls4ml.backends.template import LayerConfigTemplate, FunctionCallTemplate
+from hls4ml.model.layers import Embedding
+
+
+embed_config_template = """struct config{index} : nnet::embed_config {{
+    static const unsigned n_in = {n_in};
+    static const unsigned n_out = {n_out};
+    static const unsigned vocab_size = {vocab_size};
+    static const unsigned io_type = nnet::{iotype};
+    static const unsigned reuse_factor = {reuse};
+    typedef {embeddings_t.name} embeddings_t;
+}};\n"""
+
+embed_function_template = 'nnet::embedding<{input_t}, {output_t}, {config}>({input}, {output}, {e});'
+
+embed_include_list = ['nnet_utils/nnet_embed.h', 'nnet_utils/nnet_embed_stream.h']
+
+class EmbeddingConfigTemplate(LayerConfigTemplate):
+    def __init__(self):
+        super().__init__(Embedding)
+        self.template = embed_config_template
+
+    def format(self, node):
+        params = self._default_config_params(node)
+        return self.template.format(**params)
+
+class EmbeddingFunctionTemplate(FunctionCallTemplate):
+    def __init__(self):
+        super().__init__(Embedding, include_header=embed_include_list)
+        self.template = embed_function_template
+
+    def format(self, node):
+        params = self._default_function_params(node)
+        params['e'] = node.get_weights('embeddings').name
+
+        return self.template.format(**params)
diff --git a/hls4ml/backends/quartus/quartus_backend.py b/hls4ml/backends/quartus/quartus_backend.py
@@ -10,7 +10,7 @@
 from contextlib import contextmanager
 
 from hls4ml.model.types import NamedType, IntegerPrecisionType, FixedPrecisionType
-from hls4ml.model.layers import Layer, Dense, BatchNormalization, Activation, ParametrizedActivation, PReLU, Softmax
+from hls4ml.model.layers import Embedding, Layer, Dense, BatchNormalization, Activation, ParametrizedActivation, PReLU, Softmax
 from hls4ml.model.optimizer import get_backend_passes, layer_optimizer, model_optimizer
 from hls4ml.model.flow import register_flow
 from hls4ml.backends import FPGABackend
@@ -193,3 +193,8 @@ def init_softmax(self, layer):
             layer.set_attr('implementation', 'latency')
         else:
             layer.set_attr('implementation', layer.model.config.get_strategy(layer).lower())
+
+    @layer_optimizer(Embedding)
+    def init_embed(self, layer):
+        if layer.attributes['n_in'] is None:
+           raise Exception('Input length of Embedding layer must be specified.')
diff --git a/hls4ml/backends/vivado/passes/core_templates.py b/hls4ml/backends/vivado/passes/core_templates.py
@@ -174,39 +174,3 @@ def format(self, node):
         params['config'] = '{}_config{}'.format(node.get_attr('activation'), node.index)
 
         return self.template.format(**params)
-
-
-# Embedding templates
-
-embed_config_template = """struct config{index} : nnet::embed_config {{
-    static const unsigned n_in = {n_in};
-    static const unsigned n_out = {n_out};
-    static const unsigned vocab_size = {vocab_size};
-    static const unsigned io_type = nnet::{iotype};
-    static const unsigned reuse_factor = {reuse};
-    typedef {embeddings_t.name} embeddings_t;
-}};\n"""
-
-embed_function_template = 'nnet::embedding<{input_t}, {output_t}, {config}>({input}, {output}, {e});'
-
-embed_include_list = ['nnet_utils/nnet_embed.h', 'nnet_utils/nnet_embed_stream.h']
-
-class EmbeddingConfigTemplate(LayerConfigTemplate):
-    def __init__(self):
-        super().__init__(Embedding)
-        self.template = embed_config_template
-
-    def format(self, node):
-        params = self._default_config_params(node)
-        return self.template.format(**params)
-
-class EmbeddingFunctionTemplate(FunctionCallTemplate):
-    def __init__(self):
-        super().__init__(Embedding, include_header=embed_include_list)
-        self.template = embed_function_template
-
-    def format(self, node):
-        params = self._default_function_params(node)
-        params['e'] = node.get_weights('embeddings').name
-
-        return self.template.format(**params)
diff --git a/hls4ml/templates/quartus/firmware/nnet_utils/nnet_embed.h b/hls4ml/templates/quartus/firmware/nnet_utils/nnet_embed.h
@@ -0,0 +1,47 @@
+#ifndef NNET_EMBED_H_
+#define NNET_EMBED_H_
+
+#include "nnet_common.h"
+#include "nnet_helpers.h"
+
+namespace nnet {
+
+    struct embed_config {
+        // Internal data type definitions
+        typedef float embeddings_t;
+
+        // (Default layer sizes, overwritten form the backend
+        static const unsigned n_in = 10;
+        static const unsigned n_out = 16;
+        static const unsigned vocab_size = 50;
+
+        // Resource reuse info
+        static const unsigned io_type = io_parallel;
+        static const unsigned reuse_factor = 1;
+    };
+
+    template<class data_T, class res_T, typename CONFIG_T>
+    void embedding(
+        data_T data[CONFIG_T::n_in],
+        res_T  res[CONFIG_T::n_in * CONFIG_T::n_out],
+        const typename CONFIG_T::embeddings_t embeddings[CONFIG_T::vocab_size * CONFIG_T::n_out]) {
+
+        /*
+        * Can store embeddings[] in a register, but a large multiiplexer 
+        * is created due to a non-constant access pattern
+        */
+
+        InputSequence:
+        #pragma ii CONFIG_T::reuse_factor 
+        #pragma unroll
+        for (int j = 0; j < CONFIG_T::n_in; j++) {
+            DenseEmbedding: 
+            #pragma unroll
+            for (int i = 0; i < CONFIG_T::n_out; i++) {
+                res[j * CONFIG_T::n_out + i] = embeddings[data[j].to_uint() * CONFIG_T::n_out + i];
+            }
+        }
+    }
+
+}
+#endif
diff --git a/hls4ml/templates/quartus/firmware/nnet_utils/nnet_embed_stream.h b/hls4ml/templates/quartus/firmware/nnet_utils/nnet_embed_stream.h
@@ -0,0 +1,10 @@
+/*
+*   PLACEHOLDER - The common pass embedding.py includes both parallel and streaming implementations; streaming is currently not supported in Quartus
+*/
+
+#ifndef NNET_EMBED_STREAM_H_
+#define NNET_EMBED_STREAM_H_
+
+namespace nnet {}
+
+#endif
diff --git a/test/pytest/test_embed.py b/test/pytest/test_embed.py
@@ -20,24 +20,31 @@ def keras_model():
     return model
 
 @pytest.fixture
-@pytest.mark.parametrize('io_type', ['io_parallel',
-                                     'io_stream'])
-def hls_model(keras_model, io_type):
+@pytest.mark.parametrize('backend, io_type', [
+                            ('Vivado', 'io_parallel'),
+                            ('Vivado', 'io_stream'),
+                            ('Quartus', 'io_parallel')
+                        ])
+def hls_model(keras_model, backend, io_type):
     hls_config = hls4ml.utils.config_from_keras_model(keras_model,
                                                       default_precision='ap_fixed<16,6>',
                                                       granularity='name')
     hls_config['LayerName']['embedding_input']['Precision']['result'] = 'ap_uint<4>'
-    out_dir = str(test_root_path / 'hls4mlprj_embed_{}').format(io_type)
+    out_dir = str(test_root_path / 'hls4mlprj_embed_{}_{}').format(backend, io_type)
     hls_model = hls4ml.converters.convert_from_keras_model(keras_model,
+                                                           backend=backend,
                                                            hls_config=hls_config,
                                                            io_type=io_type,
                                                            output_dir=out_dir)
 
     hls_model.compile()
     return hls_model
 
-@pytest.mark.parametrize('io_type', ['io_parallel',
-                                     'io_stream'])
+@pytest.mark.parametrize('backend, io_type', [
+                            ('Vivado', 'io_parallel'),
+                            ('Vivado', 'io_stream'),
+                            ('Quartus', 'io_parallel')
+                        ])
 def test_embedding_accuracy(data, keras_model, hls_model):
     X = data
     model = keras_model