fastmachinelearning · GiuseppeDiGuglielmo · Aug 17, 2022 · Aug 23, 2022 · Aug 24, 2022 · Aug 25, 2022
diff --git a/hls4ml/backends/vivado_accelerator/supported_boards.json b/hls4ml/backends/vivado_accelerator/supported_boards.json
@@ -38,5 +38,11 @@
     "python_drivers": {"axi_stream": "axi_stream_driver.py"},
     "krnl_rtl_srcs": {"axi_stream":  "krnl_rtl_src"},
     "c_drivers": {}
+  },
+  "ultra96v2": {
+    "part": "xczu3eg-sbva484-1-e",
+    "tcl_scripts": {"axi_lite": "axi_lite_design.tcl", "axi_stream":  "axi_stream_design.tcl", "axi_master": "axi_master_design.tcl"},
+    "python_drivers": {"axi_stream": "axi_master_driver.py"},
+    "c_drivers": { "axi_master": "axi_master_design.c"}
   }
 }
diff --git a/hls4ml/backends/vivado_accelerator/vivado_accelerator_config.py b/hls4ml/backends/vivado_accelerator/vivado_accelerator_config.py
@@ -128,14 +128,22 @@ def get_clock_period(self):
     def get_driver_path(self):
         if  self.board.startswith('alveo'):
             return '../templates/vivado_accelerator/' + 'alveo/' + self.driver + '_drivers/' + \
-               self.get_driver_file()
+               self.get_driver_files()
         else:
             return '../templates/vivado_accelerator/' + self.board + '/' + self.driver + '_drivers/' + \
-               self.get_driver_file()
-
-    def get_driver_file(self):
-        driver_ext = '.py' if self.driver == 'python' else '.h'
-        return self.interface + '_driver' + driver_ext
+               self.get_driver_files()
+
+    #def get_driver_file(self):
+    #    driver_ext = '.py' if self.driver == 'python' else '.h'
+    #    return self.interface + '_driver' + driver_ext
+
+    def get_driver_files(self):
+        if self.driver == 'c':
+            driver_dir = 'sdk'
+            return driver_dir
+        elif self.driver == 'python':
+            driver_ext = '.py'
+            return self.interface + '_driver' + driver_ext
 
     def get_krnl_rtl_src_dir(self):
         return '../templates/vivado_accelerator/' + 'alveo/' + '/krnl_rtl_src'

diff --git a/hls4ml/model/graph.py b/hls4ml/model/graph.py
@@ -604,14 +604,20 @@ def compile(self):
         self._top_function_lib = ctypes.cdll.LoadLibrary(lib_name)
 
     def _get_top_function(self, x):
+
+        io_type = self.config.get_config_value('IOType')
+        interface = self.config.get_config_value('AcceleratorConfig')['Interface'] if self.config.get_config_value('AcceleratorConfig') else None
+        config_weights = (io_type == 'io_stream') and (interface == 'axi_master')
+
         if self._top_function_lib is None:
             raise Exception('Model not compiled')
         if len(self.get_input_variables()) == 1:
             xlist = [x]
         else: 
             xlist = x
         n_outputs = len(self.get_output_variables())
-
+        n_weights = len(self.get_weight_variables())
+
         for xi in xlist:
             if not isinstance(xi, np.ndarray):
                 raise Exception('Expected numpy.ndarray, but got {}'.format(type(x)))
@@ -628,9 +634,9 @@ def _get_top_function(self, x):
         else:
             raise Exception('Invalid type ({}) of numpy array. Supported types are: single, float32, double, float64, float_.'.format(x0.dtype))
 
-
         top_function.restype = None
-        top_function.argtypes = [npc.ndpointer(ctype, flags="C_CONTIGUOUS") for i in range(len(xlist) + n_outputs)]
+        top_function.argtypes = [npc.ndpointer(ctype, flags="C_CONTIGUOUS") \
+                for i in range(len(xlist) + (n_weights if config_weights else 0) + n_outputs)]
 
         return top_function, ctype
 
@@ -654,10 +660,16 @@ def _compute_n_samples(self, x):
         return int(n_sample)
 
     def predict(self, x):
+
+        io_type = self.config.get_config_value('IOType')
+        interface = self.config.get_config_value('AcceleratorConfig')['Interface'] if self.config.get_config_value('AcceleratorConfig') else None
+        config_weights = (io_type == 'io_stream') and (interface == 'axi_master')
+
         top_function, ctype = self._get_top_function(x)
         n_samples = self._compute_n_samples(x)
         n_inputs = len(self.get_input_variables())
         n_outputs = len(self.get_output_variables())
+        n_weights = len(self.get_weight_variables())
 
         curr_dir = os.getcwd()
         os.chdir(self.config.get_output_dir() + '/firmware')
@@ -675,10 +687,16 @@ def predict(self, x):
                     inp = [np.asarray(xj[i]) for xj in x]
                 argtuple = inp
                 argtuple += predictions
+                if config_weights:
+                    for j in range(n_weights):
+                        weights = [float(w) for w in self.get_weight_variables()[j]]
+                        argtuple += [np.asarray(weights)]
                 argtuple = tuple(argtuple)
                 top_function(*argtuple)
-                output.append(predictions)
-
+                if config_weights and n_samples == 1 and n_inputs:
+                    output.append([predictions])
+                else:
+                    output.append(predictions)
 
             # Convert to list of numpy arrays (one for each output)
             output = [np.asarray([output[i_sample][i_output] for i_sample in range(n_samples)]) for i_output in range(n_outputs)]

diff --git a/hls4ml/model/optimizer/passes/nop.py b/hls4ml/model/optimizer/passes/nop.py
@@ -6,7 +6,7 @@ def match(self, node):
         cast = False
         if isinstance(node, Activation):
             cast = node.get_input_variable().type.precision != node.get_output_variable().type.precision
-        return isinstance(node, Activation) and node.get_attr('activation') == 'linear' and not cast
+        return isinstance(node, Activation) and node.get_attr('activation') == 'linear' # and not cast
 
     def transform(self, model, node):
         model.remove_node(node)

diff --git a/hls4ml/templates/vivado/myproject_test.cpp b/hls4ml/templates/vivado/myproject_test.cpp
@@ -22,8 +22,9 @@
 #include <vector>
 #include <map>
 #include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
+#include <cstdlib>
+#include <cmath>
+#include <cfloat>
 
 #include "firmware/myproject.h"
 #include "firmware/nnet_utils/nnet_helpers.h"
@@ -56,6 +57,10 @@ int main(int argc, char **argv)
   std::string pline;
   int e = 0;
 
+  //hls-fpga-machine-learning insert weights
+
+  //hls-fpga-machine-learning insert load weights
+
   if (fin.is_open() && fpr.is_open()) {
     while ( std::getline(fin,iline) && std::getline (fpr,pline) ) {
       if (e % CHECKPOINT == 0) std::cout << "Processing input " << e << std::endl;

diff --git a/hls4ml/templates/vivado_accelerator/myproject_axi.cpp b/hls4ml/templates/vivado_accelerator/myproject_axi.cpp
@@ -1,14 +1,17 @@
 //hls-fpga-machine-learning insert include
 
 void myproject(
-    input_axi_t in[N_IN],
-    output_axi_t out[N_OUT]
-        ){
+    input_axi_t in[N_IN]
+    , output_axi_t out[N_OUT]
+    //hls-fpga-machine-learning insert weights
+    ){
 
     //hls-fpga-machine-learning insert interface
 
     //hls-fpga-machine-learning insert local vars
 
+    //hls-fpga-machine-learning insert enqueue weights
+
     //hls-fpga-machine-learning insert enqueue
 
     //hls-fpga-machine-learning insert call

diff --git a/hls4ml/templates/vivado_accelerator/myproject_axi.h b/hls4ml/templates/vivado_accelerator/myproject_axi.h
@@ -7,7 +7,8 @@
 //hls-fpga-machine-learning insert definitions
 
 void myproject(
-    input_axi_t in[N_IN],
-    output_axi_t out[N_OUT]
-        );
+    input_axi_t in[N_IN]
+    , output_axi_t out[N_OUT]
+    //hls-fpga-machine-learning insert weights
+    );
 #endif